From ea5950bdebcde5e1e4c348fea0d284f9ff5e82db Mon Sep 17 00:00:00 2001 From: Dan McInerney Date: Sat, 13 Dec 2014 18:40:23 -0700 Subject: [PATCH] added url path xss detection, refactored url param functions --- .gitignore | 1 + xsscrapy/middlewares.py | 7 +- xsscrapy/pipelines.py | 38 ++++---- xsscrapy/spiders/xss_spider.py | 154 +++++++++++++++++++++++++-------- 4 files changed, 138 insertions(+), 62 deletions(-) diff --git a/.gitignore b/.gitignore index 270fadb..082d288 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ *.pyc xsscrapy-vulns* +*.txt *.swp *.swo *.png diff --git a/xsscrapy/middlewares.py b/xsscrapy/middlewares.py index 07b9d12..29737b8 100644 --- a/xsscrapy/middlewares.py +++ b/xsscrapy/middlewares.py @@ -35,16 +35,17 @@ class InjectedDupeFilter(object): def process_request(self, request, spider): meta = request.meta - if 'xss_place' not in meta or 'delim' not in meta: + if 'xss_place' not in meta: return delim = meta['delim'] # Injected URL dupe handling if meta['xss_place'] == 'url': + url = request.url #replace the delim characters with nothing so we only test the URL #with the payload - url = request.url.replace(delim, '') - if url in URLS_SEEN: + no_delim_url = url.replace(delim, '') + if no_delim_url in URLS_SEEN: raise IgnoreRequest spider.log('Sending payloaded URL: %s' % url) URLS_SEEN.add(url) diff --git a/xsscrapy/pipelines.py b/xsscrapy/pipelines.py index 36802ae..39503e2 100644 --- a/xsscrapy/pipelines.py +++ b/xsscrapy/pipelines.py @@ -10,12 +10,10 @@ import lxml.etree import lxml.html from lxml.html import soupparser, fromstring import itertools -#from IPython import embed +from IPython import embed class XSSCharFinder(object): def __init__(self): - self.redir_pld = 'JaVAscRIPT:prompt(99)' - self.test_str = '\'"(){}:/' self.url_param_xss_items = [] def process_item(self, item, spider): @@ -24,6 +22,7 @@ class XSSCharFinder(object): payload = meta['payload'] delim = meta['delim'] + param = meta['xss_param'] resp_url = response.url body = response.body mismatch = False @@ -32,12 +31,9 @@ class XSSCharFinder(object): # Regex: ( ) mean group 1 is within the parens, . means any char, # {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding # ? makes the search nongreedy so it stops after hitting its limits - #full_match = '%s.*?%s' % (delim, delim) full_match = '%s.{0,80}?%s' % (delim, delim) # matches with semicolon which sometimes cuts results off sc_full_match = '%s.{0,80}?%s;9' % (delim, delim) - #chars_between_delims = '%s(.*?)%s' % (delim, delim) - #chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim) # Quick sqli check based on DSSS dbms, regex = self.sqli_check(body, meta['orig_body']) @@ -52,11 +48,13 @@ class XSSCharFinder(object): # XSS detection starts here re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)]) - if re_matches: + if '/verifypasswd.php/1zqj' in resp_url: + embed() + + if len(re_matches) > 0: scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)]) lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim) if lxml_injs: - err = None if len(re_matches) != len(lxml_injs): spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url) @@ -64,7 +62,6 @@ class XSSCharFinder(object): mismatch = True inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch) - # If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError try: for offset in sorted(inj_data): @@ -239,8 +236,7 @@ class XSSCharFinder(object): tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection pl_delim = payload[:7] - #full_match = '%s.*?%s' % (pl_delim, pl_delim) - full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim) + full_match = '%s.{0,85}?%s' % (pl_delim, pl_delim) line = re.sub(full_match, 'INJECTION', line) all_chars_payloads = {} @@ -375,6 +371,9 @@ class XSSCharFinder(object): # javascript:alert(1) vulns # We do this slicing operation because ;9 might be at the end # although it's unnecessary for the payload + + # CHECK HERE, PASS DOWN THE ORIG ATTR VAL + #if delim+'subbed' in attr_val: if attr_val[:len(delim+'subbed')] == delim+'subbed': if tag == 'a' and attr == 'href': # Only need : ( and ) to use javascript:prompt(4) redir payload @@ -559,8 +558,7 @@ class XSSCharFinder(object): subbed_body = re.sub(full_match, sub, body) doc = self.html_parser(subbed_body, resp_url) lxml_injs = self.xpath_inj_points(sub, doc) - if lxml_injs: - return lxml_injs + return lxml_injs def html_parser(self, body, resp_url): try: @@ -657,8 +655,8 @@ class XSSCharFinder(object): #unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset) reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset) # Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs - if reflected_chars == '">': - reflected_chars = '' + #if reflected_chars == '">': + # reflected_chars = '' all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line] return all_inj_data @@ -869,20 +867,16 @@ class XSSCharFinder(object): # Make sure js payloads remove escaped ' and ", also remove ; # since ; will show up in html encoded entities. If ; is unfiltered # it will be added after this function - #escaped_chars = re.findall(r'\\(.)', chars) chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '') + #If injection is inside script tag, remove the escaped chars if tag == 'script' or attr in self.event_attributes(): chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '') else: + # If it's not a script then just remove the \'s otherwise they show up in Unfiltered in the item chars_between_delim = chars_between_delim.replace("\\", "") - # List for just the inj point - #for c in chars_found: - # if c in self.test_str: - # unfiltered_chars.append(c) - - # # Check if a colon needs to be added to the unfiltered chars + # Check if a colon needs to be added to the unfiltered chars for scolon_match in scolon_matches: # Confirm the string offset of the match is the same # Since scolon_match will only exist when ;9 was found diff --git a/xsscrapy/spiders/xss_spider.py b/xsscrapy/spiders/xss_spider.py index 14c7554..48a4b62 100644 --- a/xsscrapy/spiders/xss_spider.py +++ b/xsscrapy/spiders/xss_spider.py @@ -6,7 +6,7 @@ from scrapy.http import FormRequest, Request from scrapy.selector import Selector from xsscrapy.items import inj_resp from xsscrapy.loginform import fill_login_form -from urlparse import urlparse, parse_qsl, urljoin +from urlparse import urlparse, parse_qsl, urljoin, urlunparse, urlunsplit from scrapy.http.cookies import CookieJar from cookielib import Cookie @@ -32,7 +32,8 @@ class XSSspider(CrawlSpider): # If you're logging into a site with a logout link, you'll want to # uncomment the rule below and comment the shorter one right after to # prevent yourself from being logged out automatically - rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), ) + #rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), ) + rules = (Rule(LinkExtractor(deny='contactus'), callback='parse_resp', follow=True), ) def __init__(self, *args, **kwargs): # run using: scrapy crawl xss_spider -a url='http://example.com' @@ -41,7 +42,7 @@ class XSSspider(CrawlSpider): hostname = urlparse(self.start_urls[0]).hostname # With subdomains self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value - self.delim = '1zqjx' + self.delim = '1zqj' # semi colon goes on end because sometimes it cuts stuff off like # gruyere or the second cookie delim self.test_str = '\'"(){}:/' @@ -135,6 +136,10 @@ class XSSspider(CrawlSpider): reqs = [] orig_url = response.url body = response.body + parsed_url = urlparse(orig_url) + # parse_qsl rather than parse_qs in order to preserve order + # will always return a list + url_params = parse_qsl(parsed_url.query, keep_blank_values=True) try: # soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it @@ -177,11 +182,9 @@ class XSSspider(CrawlSpider): if form_reqs: reqs += form_reqs - # Test URL variables with xss strings - payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param - print 'URL:', payloaded_urls, url_delim_str + payloaded_urls = self.make_URLs(orig_url, parsed_url, url_params) if payloaded_urls: - url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str) + url_reqs = self.make_url_reqs(orig_url, payloaded_urls) if url_reqs: reqs += url_reqs @@ -243,10 +246,8 @@ class XSSspider(CrawlSpider): ''' Payload each form input in each input's own request ''' reqs = [] vals_urls_meths = [] - - two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) - delim_str = self.delim + two_rand_letters - payload = delim_str + payload + delim_str + ';9' + + payload = self.make_payload() for form in forms: if form.inputs: @@ -283,7 +284,7 @@ class XSSspider(CrawlSpider): 'orig_url':orig_url, 'xss_place':'form', 'POST_to':url, - 'delim':delim_str}, + 'delim':payload[:len(self.delim)+2]}, dont_filter=True, callback=self.xss_chars_finder) reqs.append(req) @@ -300,9 +301,7 @@ class XSSspider(CrawlSpider): def make_cookie_reqs(self, url, payload, xss_param): ''' Generate payloaded cookie header requests ''' - two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) - delim_str = self.delim + two_rand_letters - payload = delim_str + payload + delim_str + ';9' + payload = self.make_payload() reqs = [Request(url, meta={'xss_place':'header', @@ -310,7 +309,7 @@ class XSSspider(CrawlSpider): 'xss_param':xss_param, 'orig_url':url, 'payload':payload, - 'delim':delim_str}, + 'delim':payload[:len(self.delim)+2]}, cookies={'userinput':payload}, callback=self.xss_chars_finder, dont_filter=True)] @@ -318,33 +317,116 @@ class XSSspider(CrawlSpider): if len(reqs) > 0: return reqs - def make_URLs(self, url, payload): - ''' Add links with variables in them to the queue again but with XSS testing payloads - Will return a tuple: (url, injection point, payload) ''' + def make_URLs(self, orig_url, parsed_url, url_params): + """ + Create the URL parameter payloaded URLs + """ + payloaded_urls = [] + # Create 1 URL per payloaded param + new_query_strings = self.get_single_payload_queries(url_params) + if new_query_strings: + # Payload the parameters + for query in new_query_strings: + + query_str = query[0] + params = query[1] + payload = query[2] + # scheme #netlo #path #params #query (url params) #fragment + payloaded_url = urlunparse((parsed_url[0], parsed_url[1], parsed_url[2], parsed_url[3], query_str, parsed_url[5])) + payloaded_url = urllib.unquote(payloaded_url) + payloaded_urls.append((payloaded_url, params, payload)) + + # Payload the URL path + payloaded_url_path = self.payload_url_path(parsed_url) + payloaded_urls.append(payloaded_url_path) + else: + # Payload end of URL if there's no parameters + payloaded_end_of_url = self.payload_end_of_url(orig_url) + payloaded_urls.append(payloaded_end_of_url) + + if len(payloaded_urls) > 0: + return payloaded_urls + + def payload_url_path(self, parsed_url): + """ + Payload the URL path like: + http://example.com/page1.php?x=1&y=2 --> + http://example.com/page1.php/FUZZ/?x=1&y=2 + """ + # Remove / so that it doesn't think it's 2 folders in the fuzz chars + payload = self.make_payload().replace('/', '') + path = parsed_url[2] + if path.endswith('/'): + path = path + payload + '/' + else: + path = path + '/' + payload + '/' + #scheme, netloc, path, params, query (url params), fragment + payloaded_url = urlunparse((parsed_url[0], parsed_url[1], path, parsed_url[3], parsed_url[4], parsed_url[5])) + payloaded_url = urllib.unquote(payloaded_url) + payloaded_data = (payloaded_url, 'URL path', payload) + + return payloaded_data + + def get_single_payload_queries(self, url_params): + """ + Make a list of lists of tuples where each secondary list has 1 payloaded + param and the rest are original value + """ + new_payloaded_params = [] + changed_params = [] + modified = False + # Create a list of lists where num of lists = len(params) + for x in xrange(0, len(url_params)): + single_url_params = [] + + # Make the payload + payload = self.make_payload() + + for p in url_params: + param, value = p + + # if param has not been modified and we haven't changed a parameter for this loop + if param not in changed_params and modified == False: + # Do we need the original value there? Might be helpful sometimes but think about testing for + # versus and the xss payload javascript:alert(1) + new_param_val = (param, payload) + #new_param_val = (param, value+payload) + single_url_params.append(new_param_val) + changed_params.append(param) + modified = param + else: + single_url_params.append(p) + + # Add the modified, urlencoded params to the master list + new_payloaded_params.append((urllib.urlencode(single_url_params), modified, payload)) + # Reset the changed parameter tracker + modified = False + + if len(new_payloaded_params) > 0: + # [(payloaded params, payloaded param, payload), (payloaded params, payloaded param, payload)] + return new_payloaded_params + + def make_payload(self): + """ + Make the payload with a unique delim + """ two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) delim_str = self.delim + two_rand_letters - payload = delim_str + payload + delim_str + ';9' + payload = delim_str + self.test_str + delim_str + ';9' + return payload - if '=' in url and '?' in url: - # If URL has variables, payload them - payloaded_urls = self.payload_url_vars(url, payload) - else: - # If URL has no variables, tack payload onto end of URL - payloaded_urls = self.payload_end_of_url(url, payload) - - return payloaded_urls, delim_str - - def payload_end_of_url(self, url, payload): + def payload_end_of_url(self, url): ''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes ''' + payload = self.make_payload() # Make URL test and delim strings unique if url[-1] == '/': payloaded_url = url+payload else: payloaded_url = url+'/'+payload - return [(payloaded_url, 'end of url', payload)] + return (payloaded_url, 'end of url', payload) def payload_url_vars(self, url, payload): ''' Payload the URL variables ''' @@ -443,7 +525,7 @@ class XSSspider(CrawlSpider): return (netloc, protocol, doc_domain, path) - def make_url_reqs(self, orig_url, payloaded_urls, delim_str): + def make_url_reqs(self, orig_url, payloaded_urls): ''' Make the URL requests ''' reqs = [Request(url[0], @@ -451,7 +533,7 @@ class XSSspider(CrawlSpider): 'xss_param':url[1], 'orig_url':orig_url, 'payload':url[2], - 'delim':delim_str}, + 'delim':url[2][:len(self.delim)+2]}, callback = self.xss_chars_finder) for url in payloaded_urls] # Meta is the payload @@ -461,9 +543,7 @@ class XSSspider(CrawlSpider): def make_header_reqs(self, url, payload, inj_headers): ''' Generate header requests ''' - two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) - delim_str = self.delim + two_rand_letters - payload = delim_str + payload + delim_str + ';9' + payload = self.make_payload() reqs = [Request(url, headers={inj_header:payload}, @@ -471,7 +551,7 @@ class XSSspider(CrawlSpider): 'xss_param':inj_header, 'orig_url':url, 'payload':payload, - 'delim':delim_str, + 'delim':payload[:len(self.delim)+2], 'UA':self.get_user_agent(inj_header, payload)}, dont_filter=True, callback = self.xss_chars_finder)