added error-based sqli detection

2014-10-03 03:27:00 -04:00
parent ef9831c210
commit 0790c5ca23
5 changed files with 117 additions and 147 deletions
--- a/README.md
+++ b/README.md
@@ -1,18 +1,19 @@
 xsscrapy
 ========

-Fast, thorough, XSS spider. Give it a URL and it'll test every link it finds for cross-site scripting vulnerabilities. 
+Fast, thorough, XSS/SQLi spider. Give it a URL and it'll test every link it finds for cross-site scripting and some SQL injection vulnerabilities. See FAQ for more details about SQLi detection.
+

 From within the main folder run:

-```
+```shell
 ./xsscrapy.py -u http://something.com
 ```


 If you wish to login then crawl:

-```
+```shell
 ./xsscrapy.py -u http://something.com/login_page -l loginname -p pa$$word
 ```

@@ -21,7 +22,9 @@ XSS vulnerabilities are reported in XSS-vulnerable.txt

 Dependencies
 -------
-```
+``` shell
+wget -O https://bootstrap.pypa.io/get-pip.py
+python get-pip.py
 pip install -r requirements.txt
 ```

@@ -38,6 +41,7 @@ FAQ
 -------

 * If it gives an error : ```ImportError: cannot import name LinkExtractor```. This means that you don't have the latest version of scrapy. You can install it using: ```sudo pip install --upgrade scrapy```.
+* It's called XSScrapy, so why SQL injection detection too? There is overlap between dangerous XSS chars and dangerous SQL injection characters, namely single and double quotes. Detecting SQL injection errors in a response is also simple and nonCPU-intensive. So although 99% of this script is strongly geared toward high and accurate detection of XSS adding simple SQL injection detection through error message discovery is a simple and effective addition. This script will not test for blind sql injection. Error messages it looks for come straight from w3af's sqli audit plugin.

 License
 -------
--- a/loginform.py
+++ b/loginform.py
@@ -1,102 +0,0 @@
-#!/usr/bin/env python
-import sys
-from argparse import ArgumentParser
-from collections import defaultdict
-from lxml import html
-
-
-__version__ = '1.0'  # also update setup.py
-
-
-def _form_score(form):
-    score = 0
-    # In case of user/pass or user/pass/remember-me
-    if len(form.inputs.keys()) in (2, 3):
-        score += 10
-
-    typecount = defaultdict(int)
-    for x in form.inputs:
-        type_ = x.type if isinstance(x, html.InputElement) else "other"
-        typecount[type_] += 1
-
-    if typecount['text'] > 1:
-        score += 10
-    if not typecount['text']:
-        score -= 10
-
-    if typecount['password'] == 1:
-        score += 10
-    if not typecount['password']:
-        score -= 10
-
-    if typecount['checkbox'] > 1:
-        score -= 10
-    if typecount['radio']:
-        score -= 10
-
-    return score
-
-
-def _pick_form(forms):
-    """Return the form most likely to be a login form"""
-    return sorted(forms, key=_form_score, reverse=True)[0]
-
-
-def _pick_fields(form):
-    """Return the most likely field names for username and password"""
-    userfield = passfield = emailfield = None
-    for x in form.inputs:
-        if not isinstance(x, html.InputElement):
-            continue
-
-        type_ = x.type
-        if type_ == 'password' and passfield is None:
-            passfield = x.name
-        elif type_ == 'text' and userfield is None:
-            userfield = x.name
-        elif type_ == 'email' and emailfield is None:
-            emailfield = x.name
-
-    return userfield or emailfield, passfield
-
-
-def submit_value(form):
-    """Returns the value for the submit input, if any"""
-    for x in form.inputs:
-        if x.type == "submit" and x.name:
-            return [(x.name, x.value)]
-    else:
-        return []
-
-
-def fill_login_form(url, body, username, password):
-    doc = html.document_fromstring(body, base_url=url)
-    form = _pick_form(doc.xpath('//form'))
-    userfield, passfield = _pick_fields(form)
-    form.fields[userfield] = username
-    form.fields[passfield] = password
-    form_values = form.form_values() + submit_value(form)
-    return form_values, form.action or form.base_url, form.method
-
-
-def main():
-    ap = ArgumentParser()
-    ap.add_argument('-u', '--username', default='username')
-    ap.add_argument('-p', '--password', default='secret')
-    ap.add_argument('url')
-    args = ap.parse_args()
-
-    try:
-        import requests
-    except ImportError:
-        print('requests library is required to use loginform as a tool')
-
-    r = requests.get(args.url)
-    values, action, method = fill_login_form(args.url, r.text, args.username, args.password)
-    print('url: {0}\nmethod: {1}\npayload:'.format(action, method))
-    for k, v in values:
-        print('- {0}: {1}'.format(k, v))
-
-
-if __name__ == '__main__':
-    sys.exit(main())
--- a/tests/form.html
+++ b/tests/form.html
@@ -1,22 +0,0 @@
- 
-<form method="post" action="reflect.php">
-  Email: <input name="email" type="text" /><br>
-  Message:<br>
-  <textarea name="message" rows="15" cols="40">
-  </textarea><br>
-  Link: <input name='link', type='text' /><br>
-  <input type="submit" />
-</form>
-
-<?php
-ob_start();
-$val = "http://$_SERVER[HTTP_HOST]$_SERVER[REQUEST_URI]";
-echo $val;
-setcookie("user",$val, time()+3600), "/tests/", ".danmcinerney.org");
-ob_end_flush();
-?>
-<a href='http://danmcinerney.org/tests/uaref.php'>ua and ref</a>
-
-
-</body>
-</html>
--- a/xsscrapy/pipelines.py
+++ b/xsscrapy/pipelines.py
@@ -21,17 +21,16 @@ class XSSCharFinder(object):
    def process_item(self, item, spider):
        response = item['resp']
        meta = response.meta
-        item = vuln()

        payload = meta['payload']
        delim = meta['delim']
        resp_url = response.url
-        body = response.body.lower()
+        body = response.body
        mismatch = False
        error = None
        orig_payload = payload.replace(delim, '').replace(';9', '') # xss char payload
        # Regex: ( ) mean group 1 is within the parens, . means any char,
-        # {1,50} means match any char 0 to 72 times, 72 chosen because double URL encoding
+        # {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding
        # ? makes the search nongreedy so it stops after hitting its limits
        #full_match = '%s.*?%s' % (delim, delim)
        full_match = '%s.{0,80}?%s' % (delim, delim)
@@ -40,6 +39,18 @@ class XSSCharFinder(object):
        #chars_between_delims = '%s(.*?)%s' % (delim, delim)
        chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)

+        # Quick sqli check based on w3af's sqli audit plugin
+        sqli_error = self.sqli_check(body)
+        msg = 'Possible SQL injection error! This error message fragment was found: "%s"' % sqli_error
+        if sqli_error:
+            item = self.make_item(meta, resp_url, msg, orig_payload, None)
+            self.write_to_file(item, spider)
+            item = None
+
+        # Now that we've checked for SQLi, we can lowercase the body
+        body = body.lower()
+
+        # XSS detection starts here
        re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)])
        if re_matches:
            scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)])
@@ -62,8 +73,7 @@ class XSSCharFinder(object):

                        item = self.xss_logic(inj_data[offset], meta, resp_url, error)
                        if item:
-                            if item['xss_place'] == 'url':
-                                item = self.url_item_filtering(item, spider)
+                            item = self.url_item_filtering(item, spider)
                            if mismatch:
                                item['error'] = 'Mismatch: html parsed vs regex parsed injections, %d vs %d. Higher chance of false positive' % (len(injections), len(full_matches))
                            self.write_to_file(item, spider)
@@ -84,13 +94,91 @@ class XSSCharFinder(object):
        if pl_lines_found:
            item = self.make_item(meta, resp_url, pl_lines_found, orig_payload, None)
            if item:
-                if item['xss_place'] == 'url':
-                    item = self.url_item_filtering(item, spider)
-                    item['error'] = 'Payload delims do not surround this injection point. Found via search for entire payload.'
+                item = self.url_item_filtering(item, spider)
+                item['error'] = 'Payload delims do not surround this injection point. Found via search for entire payload.'
                self.write_to_file(item, spider)
+                return item

        raise DropItem('No XSS vulns in %s. type = %s, %s' % (resp_url, meta['xss_place'], meta['xss_param']))

+    def sqli_check(self, body):
+        ''' Do a quick lookup in the response body for SQL errors '''
+        # Taken from w3af
+        SQL_errors = ("System.Data.OleDb.OleDbException",
+                      "[SQL Server]",
+                      "[Microsoft][ODBC SQL Server Driver]",
+                      "[SQLServer JDBC Driver]",
+                      "[SqlException",
+                      "System.Data.SqlClient.SqlException",
+                      "Unclosed quotation mark after the character string",
+                      "'80040e14'",
+                      "mssql_query()",
+                      "odbc_exec()",
+                      "Microsoft OLE DB Provider for ODBC Drivers",
+                      "Microsoft OLE DB Provider for SQL Server",
+                      "Incorrect syntax near",
+                      "Sintaxis incorrecta cerca de",
+                      "Syntax error in string in query expression",
+                      "ADODB.Field (0x800A0BCD)<br>",
+                      "ADODB.Recordset'",
+                      "Unclosed quotation mark before the character string",
+                      "'80040e07'",
+                      "Microsoft SQL Native Client error",
+                      "SQLCODE",
+                      "DB2 SQL error:",
+                      "SQLSTATE",
+                      "[CLI Driver]",
+                      "[DB2/6000]",
+                      "Sybase message:",
+                      "Sybase Driver",
+                      "[SYBASE]",
+                      "Syntax error in query expression",
+                      "Data type mismatch in criteria expression.",
+                      "Microsoft JET Database Engine",
+                      "[Microsoft][ODBC Microsoft Access Driver]",
+                      "Microsoft OLE DB Provider for Oracle",
+                      "wrong number or types",
+                      "PostgreSQL query failed:",
+                      "supplied argument is not a valid PostgreSQL result",
+                      "unterminated quoted string at or near",
+                      "pg_query() [:",
+                      "pg_exec() [:",
+                      "supplied argument is not a valid MySQL",
+                      "Column count doesn\'t match value count at row",
+                      "mysql_fetch_array()",
+                      "mysql_",
+                      "on MySQL result index",
+                      "You have an error in your SQL syntax;",
+                      "You have an error in your SQL syntax near",
+                      "MySQL server version for the right syntax to use",
+                      "Division by zero in",
+                      "not a valid MySQL result",
+                      "[MySQL][ODBC",
+                      "Column count doesn't match",
+                      "the used select statements have different number of columns",
+                      "DBD::mysql::st execute failed",
+                      "DBD::mysql::db do failed:",
+                      "com.informix.jdbc",
+                      "Dynamic Page Generation Error:",
+                      "An illegal character has been found in the statement",
+                      "[Informix]",
+                      "<b>Warning</b>:  ibase_",
+                      "Dynamic SQL Error",
+                      "[DM_QUERY_E_SYNTAX]",
+                      "has occurred in the vicinity of:",
+                      "A Parser Error (syntax error)",
+                      "java.sql.SQLException",
+                      "Unexpected end of command in statement",
+                      "[Macromedia][SQLServer JDBC Driver]",
+                      "could not prepare statement",
+                      "Unknown column",
+                      "where clause",
+                      "SqlServer",
+                      "syntax error")
+        for e in SQL_errors:
+            if e in body:
+                return e
+
    def xss_logic(self, injection, meta, resp_url, error):
        ''' XSS logic. Returns None if vulnerability not found 
        The breakout_chars var is a list(set()). This ensure we can
@@ -448,8 +536,10 @@ class XSSCharFinder(object):

    def html_parser(self, body, resp_url):
        try:
-            # You must use soupparser or else candyass webdevs who use identical 
+            # You must use lxml.html.soupparser or else candyass webdevs who use identical 
            # multiple html attributes with injections in them don't get caught
+            # That being said, soupparser is crazy slow and introduces a ton of
+            # new bugs so that is not an option at this point in time
            doc = lxml.html.fromstring(body, base_url=resp_url)
        except lxml.etree.ParserError:
            self.log('ParserError from lxml on %s' % resp_url)
@@ -476,7 +566,6 @@ class XSSCharFinder(object):
        #except lxml.etree.XMLSyntaxError:
        #    self.log('XMLSyntaxError from lxml on %s' % resp_url)
        #    return
-        
        return doc

    def combine_regex_lxml(self, lxml_injs, full_matches, scolon_matches, body, mismatch):
@@ -579,8 +668,8 @@ class XSSCharFinder(object):
        return pl_lines_found

    def make_item(self, meta, resp_url, line, unfiltered, sugg_payloads):
-        item = vuln()
        ''' Create the vuln item '''
+        item = vuln()

        if isinstance(line, str):
            item['line'] = line
@@ -597,7 +686,9 @@ class XSSCharFinder(object):
        if 'POST_to' in meta:
            item['POST_to'] = meta['POST_to']

-        return item
+        # Just make sure one of the options has been set
+        if item['unfiltered']:
+            return item

    def xpath_inj_points(self, search_str, doc):
        ''' Searches lxml doc for any text, attributes, or comments
@@ -825,7 +916,7 @@ class XSSCharFinder(object):
        return event_attributes

    def write_to_file(self, item, spider):
-        with open('XSS-vulnerable.txt', 'a+') as f:
+        with open('xsscrapy-vulns.txt', 'a+') as f:
            f.write('\n')

            f.write('URL: '+item['orig_url']+'\n')
--- a/xsscrapy/spiders/xss_spider.py
+++ b/xsscrapy/spiders/xss_spider.py
@@ -126,8 +126,8 @@ class XSSspider(CrawlSpider):
        body = response.body

        try:
-            # You must use soupparser or else candyass webdevs who use identical 
-            # multiple html attributes with injections in them don't get caught
+            # soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it
+            # in CPU cycles. Slows the script to a crawl and introduces more bugs.
            doc = lxml.html.fromstring(body, base_url=orig_url)
        except lxml.etree.ParserError:
            self.log('ParserError from lxml on %s' % orig_url)
@@ -145,6 +145,7 @@ class XSSspider(CrawlSpider):
            reqs += iframe_reqs

        # Edit a few select headers with injection string and resend request
+        # Left room to add more header injections too
        test_headers = []
        test_headers.append('Referer')
        if 'UA' in response.meta:
@@ -159,7 +160,7 @@ class XSSspider(CrawlSpider):
        if cookie_reqs:
            reqs += cookie_reqs

-      #  # Fill out forms with xss strings
+        # Fill out forms with xss strings
        if forms:
            form_reqs = self.make_form_reqs(orig_url, forms, payload)
            if form_reqs:
@@ -318,10 +319,9 @@ class XSSspider(CrawlSpider):
        return payloaded_urls, delim_str

    def payload_end_of_url(self, url, payload):
-        ''' Payload the end of the URL to catch some DOM and other reflected XSSes '''
+        ''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes '''

        # Make URL test and delim strings unique
-
        if url[-1] == '/':
            payloaded_url = url+payload
        else:
@@ -331,7 +331,6 @@ class XSSspider(CrawlSpider):

    def payload_url_vars(self, url, payload):
        ''' Payload the URL variables '''
-
        payloaded_urls = []
        params = self.getURLparams(url)
        modded_params = self.change_params(params, payload)