added url path xss detection, refactored url param functions

2014-12-13 18:40:23 -07:00
parent 60542c3429
commit ea5950bdeb
4 changed files with 138 additions and 62 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,6 @@
 *.pyc
 xsscrapy-vulns*
 *.txt
 *.swp
 *.swo
 *.png
--- a/xsscrapy/middlewares.py
+++ b/xsscrapy/middlewares.py
@@ -35,16 +35,17 @@ class InjectedDupeFilter(object):
    def process_request(self, request, spider):
        meta = request.meta
-        if 'xss_place' not in meta or 'delim' not in meta:
+        if 'xss_place' not in meta:
            return
        delim = meta['delim']
        # Injected URL dupe handling
        if meta['xss_place'] == 'url':
            url = request.url
            #replace the delim characters with nothing so we only test the URL
            #with the payload
-            url = request.url.replace(delim, '')
+            no_delim_url = url.replace(delim, '')
-            if url in URLS_SEEN:
+            if no_delim_url in URLS_SEEN:
                raise IgnoreRequest
            spider.log('Sending payloaded URL: %s' % url)
            URLS_SEEN.add(url)
--- a/xsscrapy/pipelines.py
+++ b/xsscrapy/pipelines.py
@@ -10,12 +10,10 @@ import lxml.etree
 import lxml.html
 from lxml.html import soupparser, fromstring
 import itertools
-#from IPython import embed
+from IPython import embed
 class XSSCharFinder(object):
    def __init__(self):
        self.redir_pld = 'JaVAscRIPT:prompt(99)'
        self.test_str = '\'"(){}<x>:/'
        self.url_param_xss_items = []
    def process_item(self, item, spider):
@@ -24,6 +22,7 @@ class XSSCharFinder(object):
        payload = meta['payload']
        delim = meta['delim']
        param = meta['xss_param']
        resp_url = response.url
        body = response.body
        mismatch = False
@@ -32,12 +31,9 @@ class XSSCharFinder(object):
        # Regex: ( ) mean group 1 is within the parens, . means any char,
        # {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding
        # ? makes the search nongreedy so it stops after hitting its limits
        #full_match = '%s.*?%s' % (delim, delim)
        full_match = '%s.{0,80}?%s' % (delim, delim)
        # matches with semicolon which sometimes cuts results off
        sc_full_match = '%s.{0,80}?%s;9' % (delim, delim)
        #chars_between_delims = '%s(.*?)%s' % (delim, delim)
        #chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)
        # Quick sqli check based on DSSS
        dbms, regex = self.sqli_check(body, meta['orig_body'])
@@ -52,11 +48,13 @@ class XSSCharFinder(object):
        # XSS detection starts here
        re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)])
-        if re_matches:
+        if '/verifypasswd.php/1zqj' in resp_url:
            embed()
        if len(re_matches) > 0:
            scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)])
            lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim)
            if lxml_injs:
                err = None
                if len(re_matches) != len(lxml_injs):
                    spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url)
@@ -64,7 +62,6 @@ class XSSCharFinder(object):
                    mismatch = True
                inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch)
                # If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError
                try:
                    for offset in sorted(inj_data):
@@ -239,8 +236,7 @@ class XSSCharFinder(object):
        tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection
        pl_delim = payload[:7]
-        #full_match = '%s.*?%s' % (pl_delim, pl_delim)
+        full_match = '%s.{0,85}?%s' % (pl_delim, pl_delim)
        full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim)
        line = re.sub(full_match, 'INJECTION', line)
        all_chars_payloads = {}
@@ -375,6 +371,9 @@ class XSSCharFinder(object):
        # javascript:alert(1) vulns
        # We do this slicing operation because ;9 might be at the end
        # although it's unnecessary for the payload
        # CHECK HERE, PASS DOWN THE ORIG ATTR VAL
        #if delim+'subbed' in attr_val:
        if attr_val[:len(delim+'subbed')] == delim+'subbed':
            if tag == 'a' and attr == 'href':
                # Only need : ( and ) to use javascript:prompt(4) redir payload
@@ -559,8 +558,7 @@ class XSSCharFinder(object):
        subbed_body = re.sub(full_match, sub, body)
        doc = self.html_parser(subbed_body, resp_url)
        lxml_injs = self.xpath_inj_points(sub, doc)
-        if lxml_injs:
+        return lxml_injs
            return lxml_injs
    def html_parser(self, body, resp_url):
        try:
@@ -657,8 +655,8 @@ class XSSCharFinder(object):
            #unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
            reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset)
            # Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs
-            if reflected_chars == '">':
+            #if reflected_chars == '">':
-                reflected_chars = ''
+            #    reflected_chars = ''
            all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line]
        return all_inj_data
@@ -869,20 +867,16 @@ class XSSCharFinder(object):
        # Make sure js payloads remove escaped ' and ", also remove ;
        # since ; will show up in html encoded entities. If ; is unfiltered
        # it will be added after this function
        #escaped_chars = re.findall(r'\\(.)', chars)
        chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
        #If injection is inside script tag, remove the escaped chars
        if tag == 'script' or attr in self.event_attributes():
            chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
        else:
            # If it's not a script then just remove the \'s otherwise they show up in Unfiltered in the item
            chars_between_delim = chars_between_delim.replace("\\", "")
-        # List for just the inj point
+        # Check if a colon needs to be added to the unfiltered chars
        #for c in chars_found:
        #    if c in self.test_str:
        #        unfiltered_chars.append(c)
 #       # Check if a colon needs to be added to the unfiltered chars
        for scolon_match in scolon_matches:
            # Confirm the string offset of the match is the same
            # Since scolon_match will only exist when ;9 was found
--- a/xsscrapy/spiders/xss_spider.py
+++ b/xsscrapy/spiders/xss_spider.py
@@ -6,7 +6,7 @@ from scrapy.http import FormRequest, Request
 from scrapy.selector import Selector
 from xsscrapy.items import inj_resp
 from xsscrapy.loginform import fill_login_form
-from urlparse import urlparse, parse_qsl, urljoin
+from urlparse import urlparse, parse_qsl, urljoin, urlunparse, urlunsplit
 from scrapy.http.cookies import CookieJar
 from cookielib import Cookie
@@ -32,7 +32,8 @@ class XSSspider(CrawlSpider):
    # If you're logging into a site with a logout link, you'll want to
    # uncomment the rule below and comment the shorter one right after to
    # prevent yourself from being logged out automatically
-    rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
+    #rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
    rules = (Rule(LinkExtractor(deny='contactus'), callback='parse_resp', follow=True), )
    def __init__(self, *args, **kwargs):
        # run using: scrapy crawl xss_spider -a url='http://example.com'
@@ -41,7 +42,7 @@ class XSSspider(CrawlSpider):
        hostname = urlparse(self.start_urls[0]).hostname
        # With subdomains
        self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value
-        self.delim = '1zqjx'
+        self.delim = '1zqj'
        # semi colon goes on end because sometimes it cuts stuff off like
        # gruyere or the second cookie delim
        self.test_str = '\'"(){}<x>:/'
@@ -135,6 +136,10 @@ class XSSspider(CrawlSpider):
        reqs = []
        orig_url = response.url
        body = response.body
        parsed_url = urlparse(orig_url)
        # parse_qsl rather than parse_qs in order to preserve order
        # will always return a list
        url_params = parse_qsl(parsed_url.query, keep_blank_values=True)
        try:
            # soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it
@@ -177,11 +182,9 @@ class XSSspider(CrawlSpider):
            if form_reqs:
                reqs += form_reqs
-        # Test URL variables with xss strings
+        payloaded_urls = self.make_URLs(orig_url, parsed_url, url_params)
        payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param
        print 'URL:', payloaded_urls, url_delim_str
        if payloaded_urls:
-            url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str)
+            url_reqs = self.make_url_reqs(orig_url, payloaded_urls)
            if url_reqs:
                reqs += url_reqs
@@ -243,10 +246,8 @@ class XSSspider(CrawlSpider):
        ''' Payload each form input in each input's own request '''
        reqs = []
        vals_urls_meths = []
-
+        
-        two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
+        payload = self.make_payload()
        delim_str = self.delim + two_rand_letters
        payload = delim_str + payload + delim_str + ';9'
        for form in forms:
            if form.inputs:
@@ -283,7 +284,7 @@ class XSSspider(CrawlSpider):
                                                    'orig_url':orig_url,
                                                    'xss_place':'form',
                                                    'POST_to':url,
-                                                    'delim':delim_str},
+                                                    'delim':payload[:len(self.delim)+2]},
                                              dont_filter=True,
                                              callback=self.xss_chars_finder)
                            reqs.append(req)
@@ -300,9 +301,7 @@ class XSSspider(CrawlSpider):
    def make_cookie_reqs(self, url, payload, xss_param):
        ''' Generate payloaded cookie header requests '''
-        two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
+        payload = self.make_payload()
        delim_str = self.delim + two_rand_letters
        payload = delim_str + payload + delim_str + ';9'
        reqs = [Request(url,
                        meta={'xss_place':'header',
@@ -310,7 +309,7 @@ class XSSspider(CrawlSpider):
                              'xss_param':xss_param,
                              'orig_url':url,
                              'payload':payload,
-                              'delim':delim_str},
+                              'delim':payload[:len(self.delim)+2]},
                        cookies={'userinput':payload},
                        callback=self.xss_chars_finder,
                        dont_filter=True)]
@@ -318,33 +317,116 @@ class XSSspider(CrawlSpider):
        if len(reqs) > 0:
            return reqs
-    def make_URLs(self, url, payload):
+    def make_URLs(self, orig_url, parsed_url, url_params):
-        ''' Add links with variables in them to the queue again but with XSS testing payloads 
+        """
-        Will return a tuple: (url, injection point, payload) '''
+        Create the URL parameter payloaded URLs
        """
        payloaded_urls = []
        # Create 1 URL per payloaded param
        new_query_strings = self.get_single_payload_queries(url_params)
        if new_query_strings:
            # Payload the parameters
            for query in new_query_strings:
                query_str =  query[0]
                params = query[1]
                payload = query[2]
                                           # scheme       #netlo         #path          #params        #query (url params) #fragment
                payloaded_url = urlunparse((parsed_url[0], parsed_url[1], parsed_url[2], parsed_url[3], query_str, parsed_url[5]))
                payloaded_url = urllib.unquote(payloaded_url)
                payloaded_urls.append((payloaded_url, params, payload))
            # Payload the URL path
            payloaded_url_path = self.payload_url_path(parsed_url)
            payloaded_urls.append(payloaded_url_path)
        else:
            # Payload end of URL if there's no parameters
            payloaded_end_of_url = self.payload_end_of_url(orig_url)
            payloaded_urls.append(payloaded_end_of_url)
        if len(payloaded_urls) > 0:
            return payloaded_urls
    def payload_url_path(self, parsed_url):
        """
        Payload the URL path like:
        http://example.com/page1.php?x=1&y=2 -->
        http://example.com/page1.php/FUZZ/?x=1&y=2
        """
        # Remove / so that it doesn't think it's 2 folders in the fuzz chars
        payload = self.make_payload().replace('/', '')
        path = parsed_url[2]
        if path.endswith('/'):
            path = path + payload + '/'
        else:
            path = path + '/' + payload + '/'
                                    #scheme, netloc, path, params, query (url params), fragment
        payloaded_url = urlunparse((parsed_url[0], parsed_url[1], path, parsed_url[3], parsed_url[4], parsed_url[5]))
        payloaded_url = urllib.unquote(payloaded_url)
        payloaded_data = (payloaded_url, 'URL path', payload)
        return payloaded_data
    def get_single_payload_queries(self, url_params):
        """
        Make a list of lists of tuples where each secondary list has 1 payloaded
        param and the rest are original value
        """
        new_payloaded_params = []
        changed_params = []
        modified = False
        # Create a list of lists where num of lists = len(params)
        for x in xrange(0, len(url_params)):
            single_url_params = []
            # Make the payload
            payload = self.make_payload()
            for p in url_params:
                param, value = p
                # if param has not been modified and we haven't changed a parameter for this loop
                if param not in changed_params and modified == False:
                    # Do we need the original value there? Might be helpful sometimes but think about testing for <frame src="FUZZCHARS">
                    # versus <frame src="http://something.com/FUZZCHARS"> and the xss payload javascript:alert(1)
                    new_param_val = (param, payload)
                    #new_param_val = (param, value+payload)
                    single_url_params.append(new_param_val)
                    changed_params.append(param)
                    modified = param
                else:
                    single_url_params.append(p)
            # Add the modified, urlencoded params to the master list
            new_payloaded_params.append((urllib.urlencode(single_url_params), modified, payload))
            # Reset the changed parameter tracker
            modified = False
        if len(new_payloaded_params) > 0:
            # [(payloaded params, payloaded param, payload), (payloaded params, payloaded param, payload)]
            return new_payloaded_params
    def make_payload(self):
        """
        Make the payload with a unique delim
        """
        two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
        delim_str = self.delim + two_rand_letters
-        payload = delim_str + payload + delim_str + ';9'
+        payload = delim_str + self.test_str + delim_str + ';9'
        return payload
-        if '=' in url and '?' in url:
+    def payload_end_of_url(self, url):
            # If URL has variables, payload them
            payloaded_urls = self.payload_url_vars(url, payload) 
        else:
            # If URL has no variables, tack payload onto end of URL
            payloaded_urls = self.payload_end_of_url(url, payload)
        return payloaded_urls, delim_str
    def payload_end_of_url(self, url, payload):
        ''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes '''
        payload = self.make_payload()
        # Make URL test and delim strings unique
        if url[-1] == '/':
            payloaded_url = url+payload
        else:
            payloaded_url = url+'/'+payload
-        return [(payloaded_url, 'end of url', payload)]
+        return (payloaded_url, 'end of url', payload)
    def payload_url_vars(self, url, payload):
        ''' Payload the URL variables '''
@@ -443,7 +525,7 @@ class XSSspider(CrawlSpider):
        return (netloc, protocol, doc_domain, path)
-    def make_url_reqs(self, orig_url, payloaded_urls, delim_str):
+    def make_url_reqs(self, orig_url, payloaded_urls):
        ''' Make the URL requests '''
        reqs = [Request(url[0],
@@ -451,7 +533,7 @@ class XSSspider(CrawlSpider):
                              'xss_param':url[1],
                              'orig_url':orig_url,
                              'payload':url[2],
-                              'delim':delim_str},
+                              'delim':url[2][:len(self.delim)+2]},
                        callback = self.xss_chars_finder)
                        for url in payloaded_urls] # Meta is the payload
@@ -461,9 +543,7 @@ class XSSspider(CrawlSpider):
    def make_header_reqs(self, url, payload, inj_headers):
        ''' Generate header requests '''
-        two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
+        payload = self.make_payload()
        delim_str = self.delim + two_rand_letters
        payload = delim_str + payload + delim_str + ';9'
        reqs = [Request(url,
                        headers={inj_header:payload},
@@ -471,7 +551,7 @@ class XSSspider(CrawlSpider):
                              'xss_param':inj_header,
                              'orig_url':url,
                              'payload':payload,
-                              'delim':delim_str,
+                              'delim':payload[:len(self.delim)+2],
                              'UA':self.get_user_agent(inj_header, payload)},
                        dont_filter=True,
                        callback = self.xss_chars_finder)