import re from core.config import badTags from core.encoders import base64 from core.config import xsschecker def htmlParser(response, encoding): rawResponse = response response = response.text if encoding: response = response.replace(encoding(xsschecker), xsschecker) tags = [] # tags in which the input is reflected locations = [] # contexts in which the input is reflected attributes = [] # attribute names environments = [] # strings needed to break out of the context positions = [] for match in re.finditer(xsschecker, response): positions.append(match.start()) parts = response.split(xsschecker) parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements for part in parts: # iterate over the parts deep = part.split('>') if '')[0] if tag in badTags: environments.append('') else: environments.append('') tags.append(tag) attributes.append('') break loc += 1 num += 1 occurences = {} for i, loc, env, tag, attr, position in zip(range(len(locations)), locations, environments, tags, attributes, positions): occurences[i] = {} if loc == 'comment': value = '-->' occurences[i]['position'] = position occurences[i]['context'] = [loc, env, tag, attr] return [occurences, positions]