Documentation and pep8 compilance

2018-11-16 21:13:45 +05:30
parent 8a3cefde22
commit d27eef116f
19 changed files with 342 additions and 216 deletions
--- a/core/htmlParser.py
+++ b/core/htmlParser.py
@@ -2,22 +2,28 @@ import re

 from core.config import badTags, xsschecker

+
 def htmlParser(response, encoding):
-    rawResponse = response
-    response = response.text
-    if encoding:
+    rawResponse = response  # raw response returned by requests
+    response = response.text  # response content
+    if encoding:  # if the user has specified an encoding, encode the probe in that
        response = response.replace(encoding(xsschecker), xsschecker)
-    tags = [] #  tags in which the input is reflected
-    locations = [] #  contexts in which the input is reflected
-    attributes = [] #  attribute names
-    environments = [] #  strings needed to break out of the context
-    positions = []
+    tags = []  # tags in which the input is reflected
+    locations = []  # contexts in which the input is reflected
+    attributes = []  # attribute names
+    environments = []  # strings needed to break out of the context
+    positions = []  # postions of all the reflections of the xsschecker
    for match in re.finditer(xsschecker, response):
        positions.append(match.start())
+
+#  It finds the contexts of the reflections
+
    parts = response.split(xsschecker)
-    parts.remove(parts[0]) #  remove first element since it doesn't contain xsschecker
-    parts = [xsschecker + s for s in parts] #  add xsschecker in front of all elements
-    for part in parts: #  iterate over the parts
+    # remove first element since it doesn't contain xsschecker
+    parts.remove(parts[0])
+    # add xsschecker in front of all elements
+    parts = [xsschecker + s for s in parts]
+    for part in parts:  # iterate over the parts
        deep = part.split('>')
        if '</script' in deep[0]:
            location = 'script'
@@ -33,56 +39,69 @@ def htmlParser(response, encoding):
                        continue
                location = 'script'
                for char in part:
-                    if char == '<': #  the only way to find out if it's attribute context is to see if '<' is present. 
-                        location = 'attribute' #  no, it doesn't match '<script>'
+                    # the only way to find out if it's attribute context is to see if '<' is present.
+                    if char == '<':
+                        location = 'attribute'  # no, it doesn't match '<script>'
                        break
                num += 1
        if '<' not in response:
            if rawResponse.headers['Content-Type'] == 'text/html':
                location = 'html'
-        locations.append(location) #  add location to locations list
-    num = 0 #  dummy value to keep record of occurence being processed
-    for occ in re.finditer(xsschecker, response, re.IGNORECASE): #  find xsschecker in response and return matches
-        toLook = list(response[occ.end():]) #  convert "xsschecker to EOF" into a list
-        for loc in range(len(toLook)): #  interate over the chars
-            if toLook[loc] in ('\'', '"', '`'): #  if the char is a quote
-                environments.append(toLook[loc]) #  add it to enviornemts list
+        locations.append(location)  # add location to locations list
+
+#  Finds the "environment" of reflections. is it within double quotes? Which tag contains the reflection?
+
+    num = 0  # dummy value to keep record of occurence being processed
+    # find xsschecker in response and return matches
+    for occ in re.finditer(xsschecker, response, re.IGNORECASE):
+        # convert "xsschecker to EOF" into a list
+        toLook = list(response[occ.end():])
+        for loc in range(len(toLook)):  # interate over the chars
+            if toLook[loc] in ('\'', '"', '`'):  # if the char is a quote
+                environments.append(toLook[loc])  # add it to enviornemts list
                tokens = response.split('<')
-                goodTokens = [] #  tokens which contain xsschecker
-                for token in tokens: #  iterate over tokens
-                    if xsschecker in token: #  if xsschecker is in token
-                        goodTokens.append(token) #  add it to goodTokens list
+                goodTokens = []  # tokens which contain xsschecker
+                for token in tokens:  # iterate over tokens
+                    if xsschecker in token:  # if xsschecker is in token
+                        goodTokens.append(token)  # add it to goodTokens list
+                        # attributes and their values are generally seperated with space so...
                        attrs = token.split(' ')
-                        for attr in attrs:
-                            if xsschecker in attr:
+                        for attr in attrs:  # iterate over the attribute
+                            if xsschecker in attr:  # is xsschecker in this attribute?
+                                # alright, this is the one we need
                                attributes.append(attr.split('=')[0])
                                break
                try:
-                    tag = re.search(r'\w+', goodTokens[num]).group() #  finds the tag "inside" which input is refelcted
+                    # finds the tag "inside" which input is refelcted
+                    tag = re.search(r'\w+', goodTokens[num]).group()
                except IndexError:
                    try:
-                        tag = re.search(r'\w+', goodTokens[num - 1]).group() #  finds the tag "inside" which input is refelcted
+                        # finds the tag "inside" which input is refelcted
+                        tag = re.search(r'\w+', goodTokens[num - 1]).group()
                    except IndexError:
                        tag = 'null'
-                tags.append(tag) #  add the tag to the tags
+                tags.append(tag)  # add the tag to the tags list
                break
-            elif toLook[loc] == '<':
+            elif toLook[loc] == '<':  # if we encounter a closing angular brackt
+                # check if the next character to it is a / to make sure its a closing tag
                if toLook[loc + 1] == '/':
                    tag = ''.join(toLook).split('</')[1].split('>')[0]
-                    if tag in badTags:
+                    if tag in badTags:  # if the tag is a non-executable context e.g. noscript, textarea
+                        # add it to environments because we need to break out of it
                        environments.append('</' + tag + '/>')
                    else:
                        environments.append('')
-                    tags.append(tag)
+                    tags.append(tag)  # add the tag to tags list
+                    # since it's a closing tag, it can't have any attributes
                    attributes.append('')
                break
            loc += 1
        num += 1
-    occurences = {}
+    occurences = {}  # a dict to store all the collected information about the reflections
    for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):
        occurences[i] = {}
        occurences[i]['position'] = position
-        if loc == 'comment':
-            env = '-->'
+        if loc == 'comment':  # if context is html comment
+            env = '-->'  # add --> as environment as we need this to break out
        occurences[i]['context'] = [loc, env, tag, attr]
    return [occurences, positions]