Fixed HTML comment context handling + Refactor

This commit is contained in:
Somdev Sangwan
2018-11-15 15:41:01 +05:30
committed by GitHub
parent 60ec370775
commit 614e47276c
18 changed files with 81 additions and 74 deletions

View File

@@ -1,23 +1,23 @@
import re
from core.config import badTags
from core.config import xsschecker
from core.config import badTags, xsschecker
def htmlParser(response, encoding):
rawResponse = response
response = response.text
if encoding:
response = response.replace(encoding(xsschecker), xsschecker)
tags = [] # tags in which the input is reflected
locations = [] # contexts in which the input is reflected
attributes = [] # attribute names
environments = [] # strings needed to break out of the context
tags = [] # tags in which the input is reflected
locations = [] # contexts in which the input is reflected
attributes = [] # attribute names
environments = [] # strings needed to break out of the context
positions = []
for match in re.finditer(xsschecker, response):
positions.append(match.start())
parts = response.split(xsschecker)
parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker
parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements
for part in parts: # iterate over the parts
parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker
parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements
for part in parts: # iterate over the parts
deep = part.split('>')
if '</script' in deep[0]:
location = 'script'
@@ -38,31 +38,31 @@ def htmlParser(response, encoding):
if '<' not in response:
if rawResponse.headers['Content-Type'] == 'text/html':
location = 'html'
locations.append(location) # add location to locations list
num = 0 # dummy value to keep record of occurence being processed
for occ in re.finditer(xsschecker, response, re.IGNORECASE): # find xsschecker in response and return matches
toLook = list(response[occ.end():]) # convert "xsschecker to EOF" into a list
for loc in range(len(toLook)): # interate over the chars
if toLook[loc] in ('\'', '"', '`'): # if the char is a quote
environments.append(toLook[loc]) # add it to enviornemts list
locations.append(location) # add location to locations list
num = 0 # dummy value to keep record of occurence being processed
for occ in re.finditer(xsschecker, response, re.IGNORECASE): # find xsschecker in response and return matches
toLook = list(response[occ.end():]) # convert "xsschecker to EOF" into a list
for loc in range(len(toLook)): # interate over the chars
if toLook[loc] in ('\'', '"', '`'): # if the char is a quote
environments.append(toLook[loc]) # add it to enviornemts list
tokens = response.split('<')
goodTokens = [] # tokens which contain xsschecker
for token in tokens: # iterate over tokens
if xsschecker in token: # if xsschecker is in token
goodTokens.append(token) # add it to goodTokens list
goodTokens = [] # tokens which contain xsschecker
for token in tokens: # iterate over tokens
if xsschecker in token: # if xsschecker is in token
goodTokens.append(token) # add it to goodTokens list
attrs = token.split(' ')
for attr in attrs:
if xsschecker in attr:
attributes.append(attr.split('=')[0])
break
try:
tag = re.search(r'\w+', goodTokens[num]).group() # finds the tag "inside" which input is refelcted
tag = re.search(r'\w+', goodTokens[num]).group() # finds the tag "inside" which input is refelcted
except IndexError:
try:
tag = re.search(r'\w+', goodTokens[num - 1]).group() # finds the tag "inside" which input is refelcted
tag = re.search(r'\w+', goodTokens[num - 1]).group() # finds the tag "inside" which input is refelcted
except IndexError:
tag = 'null'
tags.append(tag) # add the tag to the tags
tags.append(tag) # add the tag to the tags
break
elif toLook[loc] == '<':
if toLook[loc + 1] == '/':
@@ -79,8 +79,8 @@ def htmlParser(response, encoding):
occurences = {}
for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):
occurences[i] = {}
if loc == 'comment':
value = '-->'
occurences[i]['position'] = position
if loc == 'comment':
env = '-->'
occurences[i]['context'] = [loc, env, tag, attr]
return [occurences, positions]