XSStrike/core/htmlParser.py

import re
from core.config import badTags
from core.config import xsschecker

def htmlParser(response):
    tags = [] # tags in which the input is reflected
    locations = [] # contexts in which the input is reflected
    attributes = [] # attribute names
    environments = [] # strings needed to break out of the context
    positions = []
    for match in re.finditer(xsschecker, response):
        positions.append(match.start())
    parts = response.split(xsschecker)
    parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker
    parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements
    for part in parts: # iterate over the parts
        deep = part.split('>')
        if '</script' in deep[0]:
            location = 'script'
        elif '</' in deep[0]:
            location = 'html'
        elif deep[0][-2:] == '--':
            location = 'comment'
        else:
            if '<script' in response:
                location = 'script'
                for char in part:
                    if char == '<':
                        location = 'attribute'
                        break
            else:
                location = 'html'
        locations.append(location) # add location to locations list
    num = 0 # dummy value to keep record of occurence being processed
    for occ in re.finditer(xsschecker, response, re.IGNORECASE): # find xsschecker in response and return matches
        toLook = list(response[occ.end():]) # convert "xsschecker to EOF" into a list
        for loc in range(len(toLook)): # interate over the chars
            if toLook[loc] in ('\'', '"', '`'): # if the char is a quote
                environments.append(toLook[loc]) # add it to enviornemts list
                tokens = response.split('<')
                goodTokens = [] # tokens which contain xsschecker
                for token in tokens: # iterate over tokens
                    if xsschecker in token: # if xsschecker is in token
                        goodTokens.append(token) # add it to goodTokens list
                        attrs = token.split(' ')
                        for attr in attrs:
                            if xsschecker in attr:
                                attributes.append(attr.split('=')[0])
                                break
                try:
                    tag = re.search(r'\w+', goodTokens[num]).group() # finds the tag "inside" which input is refelcted
                except IndexError:
                    try:
                        tag = re.search(r'\w+', goodTokens[num - 1]).group() # finds the tag "inside" which input is refelcted
                    except IndexError:
                        tag = 'null'
                tags.append(tag) # add the tag to the tags
                break
            elif toLook[loc] == '<':
                if toLook[loc + 1] == '/':
                    tag = ''.join(toLook).split('</')[1].split('>')[0]
                    if tag in badTags:
                        environments.append('</' + tag + '/>')
                    else:
                        environments.append('')
                    tags.append(tag)
                    attributes.append('')
                break
            loc += 1
        num += 1
    occurences = {}
    for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):
        occurences[i] = {}
        if loc == 'comment':
            value = '-->'
        occurences[i]['position'] = position
        occurences[i]['context'] = [loc, env, tag, attr]
    return [occurences, positions]
Add files via upload 2018-10-27 18:58:52 +05:30			`import re`
			`from core.config import badTags`
			`from core.config import xsschecker`

			`def htmlParser(response):`
			`tags = [] # tags in which the input is reflected`
			`locations = [] # contexts in which the input is reflected`
			`attributes = [] # attribute names`
			`environments = [] # strings needed to break out of the context`
Handle dynamic number of reflections (Fixes #78) 2018-10-30 16:28:56 +05:30			`positions = []`
			`for match in re.finditer(xsschecker, response):`
			`positions.append(match.start())`
Add files via upload 2018-10-27 18:58:52 +05:30			`parts = response.split(xsschecker)`
			`parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker`
			`parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements`
			`for part in parts: # iterate over the parts`
			`deep = part.split('>')`
			`if '</script' in deep[0]:`
			`location = 'script'`
			`elif '</' in deep[0]:`
			`location = 'html'`
			`elif deep[0][-2:] == '--':`
			`location = 'comment'`
			`else:`
handle wrong content type 2018-10-28 12:42:59 +05:30			`if '<script' in response:`
			`location = 'script'`
			`for char in part:`
			`if char == '<':`
			`location = 'attribute'`
			`break`
			`else:`
			`location = 'html'`
Add files via upload 2018-10-27 18:58:52 +05:30			`locations.append(location) # add location to locations list`
			`num = 0 # dummy value to keep record of occurence being processed`
			`for occ in re.finditer(xsschecker, response, re.IGNORECASE): # find xsschecker in response and return matches`
			`toLook = list(response[occ.end():]) # convert "xsschecker to EOF" into a list`
			`for loc in range(len(toLook)): # interate over the chars`
			if toLook[loc] in ('\'', '"', '`'): # if the char is a quote
			`environments.append(toLook[loc]) # add it to enviornemts list`
			`tokens = response.split('<')`
			`goodTokens = [] # tokens which contain xsschecker`
			`for token in tokens: # iterate over tokens`
			`if xsschecker in token: # if xsschecker is in token`
			`goodTokens.append(token) # add it to goodTokens list`
			`attrs = token.split(' ')`
			`for attr in attrs:`
			`if xsschecker in attr:`
			`attributes.append(attr.split('=')[0])`
			`break`
			`try:`
			`tag = re.search(r'\w+', goodTokens[num]).group() # finds the tag "inside" which input is refelcted`
handle wrong content type 2018-10-28 12:42:59 +05:30			`except IndexError:`
			`try:`
			`tag = re.search(r'\w+', goodTokens[num - 1]).group() # finds the tag "inside" which input is refelcted`
			`except IndexError:`
			`tag = 'null'`
Add files via upload 2018-10-27 18:58:52 +05:30			`tags.append(tag) # add the tag to the tags`
			`break`
			`elif toLook[loc] == '<':`
			`if toLook[loc + 1] == '/':`
			`tag = ''.join(toLook).split('</')[1].split('>')[0]`
			`if tag in badTags:`
			`environments.append('</' + tag + '/>')`
			`else:`
			`environments.append('')`
			`tags.append(tag)`
			`attributes.append('')`
			`break`
			`loc += 1`
			`num += 1`
			`occurences = {}`
Handle dynamic number of reflections (Fixes #78) 2018-10-30 16:28:56 +05:30			`for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions):`
Add files via upload 2018-10-27 18:58:52 +05:30			`occurences[i] = {}`
			`if loc == 'comment':`
			`value = '-->'`
Handle dynamic number of reflections (Fixes #78) 2018-10-30 16:28:56 +05:30			`occurences[i]['position'] = position`
Add files via upload 2018-10-27 18:58:52 +05:30			`occurences[i]['context'] = [loc, env, tag, attr]`
Handle dynamic number of reflections (Fixes #78) 2018-10-30 16:28:56 +05:30			`return [occurences, positions]`