import re from core.config import badTags from core.config import xsschecker def htmlParser(response): tags = [] # tags in which the input is reflected locations = [] # contexts in which the input is reflected attributes = [] # attribute names environments = [] # strings needed to break out of the context parts = response.split(xsschecker) parts.remove(parts[0]) # remove first element since it doesn't contain xsschecker parts = [xsschecker + s for s in parts] # add xsschecker in front of all elements for part in parts: # iterate over the parts deep = part.split('>') if '')[0] if tag in badTags: environments.append('') else: environments.append('') tags.append(tag) attributes.append('') break loc += 1 num += 1 occurences = {} for i, loc, env, tag, attr in zip(range(len(locations)), locations, environments, tags, attributes): occurences[i] = {} if loc == 'comment': value = '-->' occurences[i]['context'] = [loc, env, tag, attr] return occurences