From 22373a4514a7e6955852c9ae07a0d0ec72737e70 Mon Sep 17 00:00:00 2001 From: DanMcInerney Date: Tue, 29 Jul 2014 07:47:48 -0400 Subject: [PATCH] better logic --- xsscrapy/middlewares.py | 23 +++++ xsscrapy/settings.py | 8 +- xsscrapy/spiders/xss_spider.py | 184 ++++++++++++++++++++++++--------- 3 files changed, 168 insertions(+), 47 deletions(-) diff --git a/xsscrapy/middlewares.py b/xsscrapy/middlewares.py index dfc9cab..03486a5 100644 --- a/xsscrapy/middlewares.py +++ b/xsscrapy/middlewares.py @@ -1,6 +1,8 @@ from xsscrapy.settings import USER_AGENT_LIST import random from scrapy import log +from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware + class RandomUserAgentMiddleware(object): ''' Use a random user-agent for each request ''' @@ -8,3 +10,24 @@ class RandomUserAgentMiddleware(object): ua = random.choice(USER_AGENT_LIST) if ua: request.headers.setdefault('User-Agent', ua) + + +def new_process_request(self, request, spider): + if 'dont_merge_cookies' in request.meta: + return + + cl = request.headers.getlist('Cookie') + cookiejarkey = request.meta.get("cookiejar") + jar = self.jars[cookiejarkey] + cookies = self._get_request_cookies(jar, request) + for cookie in cookies: + jar.set_cookie_if_ok(cookie, request) + # set Cookie header + request.headers.pop('Cookie', None) + jar.add_cookie_header(request) + + #INSERT COOKIE MANIP HERE + + self._debug_cookie(request, spider) + +#CookiesMiddleware.process_request = new_process_request diff --git a/xsscrapy/settings.py b/xsscrapy/settings.py index 23c96a8..701f6e2 100644 --- a/xsscrapy/settings.py +++ b/xsscrapy/settings.py @@ -19,7 +19,11 @@ USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTM 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0'] DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.RandomUserAgentMiddleware': 400, - 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None,} + 'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, # disable stock user-agent middleware + 'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': None, # disable stock cookies middleware + 'xsscrapy.middlewares.CookiesMiddleware': 700} # 700 is in the downloader_middlewares_base + +COOKIES_ENABLED = True # prevent duplicate link crawling DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter' @@ -30,5 +34,7 @@ FEED_FORMAT = 'csv' FEED_URI = 'vulnerable-urls.txt' #COOKIES_DEBUG = True +CONCURRENT_REQUESTS = 16 + # Test for injection via headers #DEFAULT_REQUEST_HEADERS = {'Referer': '9zqjx', 'User-Agent':'9zqjx'} diff --git a/xsscrapy/spiders/xss_spider.py b/xsscrapy/spiders/xss_spider.py index 24ee47b..4d24129 100644 --- a/xsscrapy/spiders/xss_spider.py +++ b/xsscrapy/spiders/xss_spider.py @@ -36,11 +36,11 @@ cookie data control? TO DO --add DOM detection or static js analysis (check retire.js project) --the variable payload starts as the encoded payload and is eventually unescaped but not everuwhere? +-DirBuster! +-Cookie headers +-LONGTERM add DOM detection or static js analysis (check retire.js project) -cleanup xss_chars_finder(self, response) -prevent Requests from being URL encoded (line 57 of __init__ in Requests class) - ''' class XSSspider(CrawlSpider): @@ -59,10 +59,13 @@ class XSSspider(CrawlSpider): self.tag_pld = '()=<>' self.js_pld = '\'"(){}[];' self.redir_pld = 'JaVAscRIPT:prompt(99)' + self.start_url_cookie_xssed = False #attr_pld = generated once injection points are found (requires checking if single or double quotes ends html attribute values) self.form_requests_made = set() self.header_requests_made = set() self.url_requests_made = set() + self.cookie_requests_made = set() + self.var_val = None self.login_user = kwargs.get('user') self.login_pass = kwargs.get('pw') @@ -125,6 +128,60 @@ class XSSspider(CrawlSpider): self.log('Added robots.txt disallowed URL to our queue: '+r.url) return reqs + def make_cookie_reqs(self, orig_url, payloaded_cookies, payloads, quote): + reqs = [] + + if payloads[0] == self.test_str: + cb = self.payloaded_reqs + else: + cb = self.xss_chars_finder + + if self.start_url_cookie_xssed == False: + urls = [orig_url]+self.start_urls + else: + urls = [orig_url] + + for url in urls: + reqs += [Request(url, + meta={'payload':payload, + 'type':'header', + 'inj_point':'cookie', + 'quote':quote, + 'orig_url':orig_url}, + cookies=payloaded_cookie, + callback=cb, + dont_filter=True) + for payloaded_cookie in payloaded_cookies + for payload in payloads] + + if len(reqs) > 0: + return reqs + + def remove_cookie_dupes(self, reqs): + ''' We have dont_filter == True in self.make_cookie_reqs so we gotta filter em ourselves ''' + new_reqs = [] + for r in reqs: + url_payload_set = set([(r.url, r.payload)]) + if url_payload_set.issubset(self.cookie_requests_made): + continue + self.cookie_requests_made.add(url_payload_set) + new_reqs.append(r) + + if len(new_reqs) > 0: + return new_reqs + + def payload_cookies(self, payloads): + payloaded_cookies = [] + + for payload in payloads: + payloaded_cookie_dict = {} + for k in self.cookie_dict: + payloaded_cookie_dict[k] = self.cookie_dict[k]+payload + payloaded_cookies.append(payloaded_cookie_dict) + + if len(payloaded_cookies) > 0: + return payloaded_cookies + def parse_resp(self, response): reqs = [] orig_url = response.url @@ -136,7 +193,14 @@ class XSSspider(CrawlSpider): payloads = [payload] # Get any cookies (logic of this still needs working out) - cookies = response.headers.getlist('Set-Cookie') + #cookies = response.headers.getlist('Set-Cookie') + #if cookies: + # self.cookie_dict = self.make_cookie_dict(cookies) + # if self.cookie_dict: + # payloaded_cookies = self.payload_cookies(payloads) + # if payloaded_cookies: + # cookie_reqs = self.make_cookie_reqs(orig_url, payloaded_cookies, payloads, quote_enclosure) + # reqs += cookie_reqs # Edit a few select headers with injection string and resend request headers = ['Referer', 'User-Agent'] @@ -185,12 +249,10 @@ class XSSspider(CrawlSpider): # Make sure there are values to even change if len(values) == 0: - self.log('No values changed, aborting this form test') return # Make sure at least one value has been injected if not self.injected_val_confirmed(values, payload): - self.log('Form contains no injected values: %s' % orig_url) return # Sometimes lxml doesn't read the form.action right @@ -245,6 +307,9 @@ class XSSspider(CrawlSpider): self.form_requests_made.add(vam) return + def make_cookie_payloads(self, response): + pass + def make_form_payloads(self, response): ''' Create the payloads based on the injection points from the first test request''' orig_url = response.meta['orig_url'] @@ -290,6 +355,20 @@ class XSSspider(CrawlSpider): if len(payloaded_urls) > 0: return payloaded_urls + def make_cookie_dict(self, cookies): + cookie_dict = {} + for c in cookies: + try: + var, val = c.split(';', 1)[0].split('=', 1) + cookie_dict[var] = val + except Exception as e: + print str(e) + continue + + if len(cookie_dict) > 0: + return cookie_dict + + def getURLparams(self, url): ''' Parse out the URL parameters ''' parsedUrl = urlparse(url) @@ -418,24 +497,17 @@ class XSSspider(CrawlSpider): if self.tag_pld not in payloads: payloads.append(self.tag_pld) + # attribute payload is equal to tag payload just with a quote attached so + # this eliminates some overlap if self.tag_pld in payloads and attr_pld in payloads: payloads.remove(self.tag_pld) - for p in payloads: - if 'h' in payloads: - print '***PAYLOADS:', p - if inj_type == 'url': payloads.append(urllib.quote_plus(payloads[0])) payloads = self.delim_payloads(payloads) if len(payloads) > 0: - for p in payloads: - if 'h' in payloads: - print '***PAYLOADS:', p return payloads - else: - return def delim_payloads(self, payloads): ''' Surround the payload with a delimiter ''' @@ -461,16 +533,16 @@ class XSSspider(CrawlSpider): resp_url = response.url body = response.body # Regex: ( ) mean group 1 is within the parens, . means any char, {1,25} means match any char 1 to 25 times - chars_between_delims = '%s(.{1,25})%s' % (self.test_str, self.test_str) # self.js_pld is 21 chars, so added a little extra space + chars_between_delims = '%s(.*?)%s' % (self.test_str, self.test_str) # 25 since some payload is just under that escaped inj_num = len(injections) mismatch = False orig_payload = response.meta['payload'].strip(self.test_str) # xss char payload escaped_payload = self.unescape_payload(orig_payload) - break_tag_chars = set(['>', '<',]) - break_attr_chars = set([quote_enclosure]) - break_js_chars = set(['"', "'"]) + break_tag_chars = set(['>', '<', '(', ')']) + break_attr_chars = set([quote_enclosure, '(', ')']) + break_js_chars = set(['"', "'", '(', ')']) matches = re.findall(chars_between_delims, body) if matches: @@ -507,8 +579,6 @@ class XSSspider(CrawlSpider): # Attribute breakout if attr: - # Must pass a string search for the test+unesc_payload+test in at least one line of html and cannot be a mismatch - #if line_html and mismatch == False: if quote_enclosure in escaped_payload: if break_attr_chars.issubset(chars): return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item) @@ -519,15 +589,41 @@ class XSSspider(CrawlSpider): if break_tag_chars.issubset(chars): return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item) + # Check the entire body for exact match - if escaped_payload in body: - item['line'] = self.get_inj_line(body, escaped_payload, item) - item['xss_payload'] = orig_payload - item['unfiltered'] = payload - item['inj_point'] = inj_point - item['xss_type'] = xss_type - item['url'] = orig_url - return item + re_payload = escaped_payload.replace('(', '\(').replace(')', '\)').replace('"', '\\"').replace("'", "\\'") + re_payload = re_payload.replace('{', '\{').replace('}', '\}').replace(']', '\]').replace('[', '\[') + re_payload = '.{1}?'+re_payload + full_matches = re.findall(re_payload, body) + for f in full_matches: + unescaped_match = ''.join(self.get_unfiltered_chars(f, escaped_payload)) + print ' f:', f + print 'unesf:', unescaped_match + if unescaped_match == escaped_payload: + # Does not start with \ so it's not escaping any chars + #item['line'] = self.get_inj_line(body, escaped_payload, item) + item['line'] = self.get_inj_line(body, f, item) + item['xss_payload'] = orig_payload + item['unfiltered'] = escaped_payload + item['inj_point'] = inj_point + item['xss_type'] = xss_type + item['url'] = orig_url + print 'WAS FOUND IN ENTIRE BODY SEARCH' + return item + #except UnicodeDecodeError: + #self.log('Could not decode html off %s' % orig_url) + + def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, item): + ''' Create the vulnerable item ''' + + item['line'] = line + item['xss_payload'] = orig_payload + item['unfiltered'] = joined_chars + item['inj_point'] = inj_point + item['xss_type'] = xss_type + item['inj_tag'] = tag + item['url'] = orig_url + return item def get_inj_line(self, body, payload, item): lines = [] @@ -535,26 +631,14 @@ class XSSspider(CrawlSpider): for idx, line in enumerate(html_lines): line = line.strip() if payload in line: - if len(line) > 500: - line = line[:200]+'...' + #if len(line) > 500: + # line = line[:200]+'...' num_txt = (idx, line) lines.append(num_txt) if len(lines) > 0: return lines - def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, item): - ''' Create the vulnerable item ''' - - item['line'] = line - item['xss_type'] = xss_type - item['xss_payload'] = orig_payload - item['unfiltered'] = joined_chars - item['inj_tag'] = tag - item['inj_point'] = inj_point - item['url'] = orig_url - return item - def parse_injections(self, injection): attr = None attr_val = None @@ -632,7 +716,6 @@ class XSSspider(CrawlSpider): quote = re.search('