re-written to perfection
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from core.config import badTags, xsschecker
|
from core.config import badTags, xsschecker
|
||||||
from core.utils import isBadContext
|
from core.utils import isBadContext, equalize
|
||||||
|
|
||||||
|
|
||||||
def htmlParser(response, encoding):
|
def htmlParser(response, encoding):
|
||||||
@@ -9,104 +9,70 @@ def htmlParser(response, encoding):
|
|||||||
response = response.text # response content
|
response = response.text # response content
|
||||||
if encoding: # if the user has specified an encoding, encode the probe in that
|
if encoding: # if the user has specified an encoding, encode the probe in that
|
||||||
response = response.replace(encoding(xsschecker), xsschecker)
|
response = response.replace(encoding(xsschecker), xsschecker)
|
||||||
tags = [] # tags in which the input is reflected
|
reflections = response.count(xsschecker)
|
||||||
locations = [] # contexts in which the input is reflected
|
position_and_context = {}
|
||||||
attributes = [] # attribute names
|
environment_details = {}
|
||||||
environments = [] # strings needed to break out of the context
|
clean_response = re.sub(r'<!--[.\s\S]*?-->', '', response)
|
||||||
positions = [] # postions of all the reflections of the xsschecker
|
script_context = re.finditer(r'(?i)<script[^>]*>.*?(%s).*?</script>' % xsschecker, clean_response)
|
||||||
for match in re.finditer(xsschecker, response):
|
for occurence in script_context:
|
||||||
positions.append(match.start())
|
thisPosition = occurence.start(1)
|
||||||
|
position_and_context[thisPosition] = 'script'
|
||||||
# It finds the contexts of the reflections
|
environment_details[thisPosition] = {}
|
||||||
|
environment_details[thisPosition]['details'] = {}
|
||||||
parts = response.split(xsschecker)
|
attribute_context = re.finditer(r'<[^>]*?(%s)[^>]*?>' % xsschecker, clean_response)
|
||||||
# remove first element since it doesn't contain xsschecker
|
for occurence in attribute_context:
|
||||||
parts.remove(parts[0])
|
match = occurence.group(0)
|
||||||
# add xsschecker in front of all elements
|
thisPosition = occurence.start(1)
|
||||||
parts = [xsschecker + s for s in parts]
|
parts = re.split(r'\s', match)
|
||||||
for part in parts: # iterate over the parts
|
tag = parts[0][1:]
|
||||||
deep = part.split('>')
|
for part in parts:
|
||||||
if '</script' in deep[0]:
|
if xsschecker in part:
|
||||||
location = 'script'
|
Type, quote, name, value = '', '', '', ''
|
||||||
elif '</' in deep[0] or len(parts) == 1:
|
if '=' in part:
|
||||||
location = 'html'
|
quote = re.search(r'=([\'`"])?', part).group(1)
|
||||||
|
name_and_value = part.split('=')[0], '='.join(part.split('=')[1:])
|
||||||
|
if xsschecker == name_and_value[0]:
|
||||||
|
Type = 'name'
|
||||||
else:
|
else:
|
||||||
num = 0
|
Type = 'value'
|
||||||
for i in deep:
|
name = name_and_value[0]
|
||||||
if i[-2:] == '--':
|
value = name_and_value[1].rstrip('>').rstrip(quote).lstrip(quote)
|
||||||
if '<!--' not in ''.join(deep[:num + 1]):
|
else:
|
||||||
location = 'comment'
|
Type = 'flag'
|
||||||
break
|
position_and_context[thisPosition] = 'attribute'
|
||||||
continue
|
environment_details[thisPosition] = {}
|
||||||
location = 'script'
|
environment_details[thisPosition]['details'] = {'tag' : tag, 'type' : Type, 'quote' : quote, 'value' : value, 'name' : name}
|
||||||
for char in part:
|
html_context = re.finditer(xsschecker, clean_response)
|
||||||
# the only way to find out if it's attribute context is to see if '<' is present.
|
for occurence in html_context:
|
||||||
if char == '<':
|
thisPosition = occurence.start()
|
||||||
location = 'attribute' # no, it doesn't match '<script>'
|
if thisPosition not in position_and_context:
|
||||||
break
|
position_and_context[occurence.start()] = 'html'
|
||||||
num += 1
|
environment_details[thisPosition] = {}
|
||||||
if '<' not in response:
|
environment_details[thisPosition]['details'] = {}
|
||||||
if rawResponse.headers['Content-Type'].startswith('text/html'):
|
comment_context = re.finditer(r'<!--(?![.\s\S]*-->)[.\s\S]*(%s)[.\s\S]*?-->' % xsschecker, response)
|
||||||
location = 'html'
|
for occurence in comment_context:
|
||||||
locations.append(location) # add location to locations list
|
thisPosition = occurence.start(1)
|
||||||
|
position_and_context[thisPosition] = 'comment'
|
||||||
|
environment_details[thisPosition] = {}
|
||||||
|
environment_details[thisPosition]['details'] = {}
|
||||||
|
database = {}
|
||||||
|
for i in sorted(position_and_context):
|
||||||
|
database[i] = {}
|
||||||
|
database[i]['position'] = i
|
||||||
|
database[i]['context'] = position_and_context[i]
|
||||||
|
database[i]['details'] = environment_details[i]['details']
|
||||||
|
|
||||||
bad_contexts = re.finditer(r'''(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*</\1>''' % xsschecker, response)
|
bad_contexts = re.finditer(r'(?s)(?i)<(style|template|textarea|title|noembed|noscript)>[.\s\S]*(%s)[.\s\S]*</\1>' % xsschecker, response)
|
||||||
non_executable_contexts = []
|
non_executable_contexts = []
|
||||||
for each in bad_contexts:
|
for each in bad_contexts:
|
||||||
non_executable_contexts.append([each.start(), each.end(), each.group(1)])
|
non_executable_contexts.append([each.start(), each.end(), each.group(1)])
|
||||||
# Finds the "environment" of reflections. is it within double quotes? Which tag contains the reflection?
|
|
||||||
num = 0 # dummy value to keep record of occurence being processed
|
if non_executable_contexts:
|
||||||
# find xsschecker in response and return matches
|
for key in database.keys():
|
||||||
for occ in re.finditer(xsschecker, response, re.IGNORECASE):
|
position = database[key]['position']
|
||||||
# convert "xsschecker to EOF" into a list
|
badTag = isBadContext(position, non_executable_contexts)
|
||||||
toLook = list(response[occ.end():])
|
if badTag:
|
||||||
for loc in range(len(toLook)): # interate over the chars
|
database[key]['details']['badTag'] = badTag
|
||||||
if toLook[loc] in ('\'', '"', '`'): # if the char is a quote
|
|
||||||
environments.append(toLook[loc]) # add it to environments list
|
|
||||||
tokens = response.split('<')
|
|
||||||
goodTokens = [] # tokens which contain xsschecker
|
|
||||||
for token in tokens: # iterate over tokens
|
|
||||||
if xsschecker in token: # if xsschecker is in token
|
|
||||||
goodTokens.append(token) # add it to goodTokens list
|
|
||||||
# attributes and their values are generally seperated with space so...
|
|
||||||
attrs = token.split(' ')
|
|
||||||
for attr in attrs: # iterate over the attribute
|
|
||||||
if xsschecker in attr: # is xsschecker in this attribute?
|
|
||||||
# alright, this is the one we need
|
|
||||||
attributeName = attr.split('=')[0]
|
|
||||||
attributeValue = ''.join(attr.split('=')[1:])
|
|
||||||
if attributeValue.startswith('\'') or attributeValue.startswith('"'):
|
|
||||||
attributeValue = attributeValue[1:-1]
|
|
||||||
attributes.append({attributeName:attributeValue})
|
|
||||||
break
|
|
||||||
try:
|
|
||||||
# finds the tag "inside" which input is refelcted
|
|
||||||
tag = re.search(r'\w+', goodTokens[num]).group()
|
|
||||||
except IndexError:
|
|
||||||
try:
|
|
||||||
# finds the tag "inside" which input is refelcted
|
|
||||||
tag = re.search(r'\w+', goodTokens[num - 1]).group()
|
|
||||||
except IndexError:
|
|
||||||
tag = 'null'
|
|
||||||
tags.append(tag) # add the tag to the tags list
|
|
||||||
break
|
|
||||||
else: # if we encounter a closing angular brackt
|
|
||||||
# check if the next character to it is a / to make sure its a closing tag
|
|
||||||
badContext = isBadContext(positions[num], non_executable_contexts)
|
|
||||||
if badContext:
|
|
||||||
environments.append('</' + badContext + '>')
|
|
||||||
else:
|
else:
|
||||||
environments.append('')
|
database[key]['details']['badTag'] = ''
|
||||||
tags.append('')
|
return database
|
||||||
attributes.append('')
|
|
||||||
break
|
|
||||||
loc += 1
|
|
||||||
num += 1
|
|
||||||
occurences = {} # a dict to store all the collected information about the reflections
|
|
||||||
for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):
|
|
||||||
occurences[i] = {}
|
|
||||||
occurences[i]['position'] = position
|
|
||||||
if loc == 'comment': # if context is html comment
|
|
||||||
env = '-->' # add --> as environment as we need this to break out
|
|
||||||
occurences[i]['context'] = [loc, env, tag, attr]
|
|
||||||
return [occurences, positions]
|
|
||||||
|
|||||||
Reference in New Issue
Block a user