added --basic option for http basic auth

2014-10-24 07:12:40 -04:00
parent bc19ded59d
commit 68386dfc61
5 changed files with 35 additions and 14 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,5 @@
 *.pyc
-*.txt
+xsscrapy-vulns.txt
 *.swp
 *.swo
 *.png
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 Scrapy==0.24.4
 pybloom==1.1
-requests==2.3.0
+requests
 beautifulsoup
--- a/xsscrapy.py
+++ b/xsscrapy.py
@@ -16,6 +16,7 @@ def get_args():
    parser.add_argument('-l', '--login', help="Login name; -l danmcinerney")
    parser.add_argument('-p', '--password', help="Password; -p pa$$w0rd")
    parser.add_argument('-r', '--rate', help="Rate in requests per minute")
+    parser.add_argument('--basic', help="Use HTTP Basic Auth to login", action="store_true")
    args = parser.parse_args()
    return args

@@ -24,11 +25,15 @@ url = args.url
 user = args.login
 password = args.password
 rate = args.rate
+if args.basic:
+    basic = 'true'
+else:
+    basic = 'false'
 if rate is not None:
    delay = 60 / float(rate)
 else:
    delay = 0
 try:
-    execute(['scrapy', 'crawl', 'xsscrapy', '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a', 'pw=%s' % password, '-s', "DOWNLOAD_DELAY=%s" % delay])
+    execute(['scrapy', 'crawl', 'xsscrapy', '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a', 'pw=%s' % password, '-s', "DOWNLOAD_DELAY=%s" % delay, '-a', 'basic=%s' % basic])
 except KeyboardInterrupt:
    sys.exit()
--- a/xsscrapy/settings.py
+++ b/xsscrapy/settings.py
@@ -20,7 +20,8 @@ NEWSPIDER_MODULE = 'xsscrapy.spiders'
 # 100 (first): Make sure there's no duplicate requests that have some value changed
 # 200 (second): Make sure there's a random working User-Agent header set if that value's not injected with the test string
 DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.InjectedDupeFilter': 100,
-                          'xsscrapy.middlewares.RandomUserAgentMiddleware': 200}
+                          'xsscrapy.middlewares.RandomUserAgentMiddleware': 200,
+                          'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300}

 COOKIES_ENABLED = True
 #COOKIES_DEBUG = True
@@ -35,5 +36,5 @@ ITEM_PIPELINES = {'xsscrapy.pipelines.XSSCharFinder':100}
 #FEED_FORMAT = 'csv'
 #FEED_URI = 'example.txt'

-CONCURRENT_REQUESTS = 25 
+CONCURRENT_REQUESTS = 40 

--- a/xsscrapy/spiders/xss_spider.py
+++ b/xsscrapy/spiders/xss_spider.py
@@ -55,6 +55,7 @@ class XSSspider(CrawlSpider):
        # gruyere or the second cookie delim
        self.test_str = '\'"(){}<x>:'

+        # Login details
        self.login_user = kwargs.get('user')
        if self.login_user == 'None':
            self.login_user = None
@@ -63,6 +64,12 @@ class XSSspider(CrawlSpider):
        else:
            self.login_pass = kwargs.get('pw')

+        # HTTP Basic Auth
+        self.basic_auth = kwargs.get('basic')
+        if self.basic_auth == 'true':
+            self.http_user = self.login_user
+            self.http_pass = self.login_pass
+
    def parse_start_url(self, response):
        ''' Creates the XSS tester requests for the start URL as well as the request for robots.txt '''
        u = urlparse(response.url)
@@ -76,22 +83,30 @@ class XSSspider(CrawlSpider):

    #### Handle logging in if username and password are given as arguments ####
    def start_requests(self):
-        ''' If -a user= and -a pw= args are given, pass the first response to the login handler
+        ''' If user and pw args are given, pass the first response to the login handler
            otherwise pass it to the normal callback function '''
        if self.login_user and self.login_pass:
-            yield Request(url=self.start_urls[0], callback=self.login)
+            if self.basic_auth == 'true':
+                yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
+            else:
+                yield Request(url=self.start_urls[0], callback=self.login)
        else:
            yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback

    def login(self, response):
        ''' Fill out the login form and return the request'''
-        args, url, method = fill_login_form(response.url, response.body, self.login_user, self.login_pass)
        self.log('Logging in...')
-        return FormRequest(url,
-                           method=method,
-                           formdata=args,
-                           callback=self.confirm_login,
-                           dont_filter=True)
+        try:
+            args, url, method = fill_login_form(response.url, response.body, self.login_user, self.login_pass)
+            return FormRequest(url,
+                              method=method,
+                              formdata=args,
+                              callback=self.confirm_login,
+                              dont_filter=True)
+
+        except Exception:
+            self.log('Login failed') # Make this more specific eventually
+            return Request(url=self.start_urls[0], dont_filter=True) # Continue crawling

    def confirm_login(self, response):
        ''' Check that the username showed up in the response page '''
@@ -415,7 +430,7 @@ class XSSspider(CrawlSpider):
            protocol = parsed_url.scheme+'://'
            # Get the hostname (includes subdomains)
            hostname = parsed_url.hostname
-            # Get netlock (domain.com:8080)
+            # Get netloc (domain.com:8080)
            netloc = parsed_url.netloc
            # Get doc domain
            doc_domain = '.'.join(hostname.split('.')[-2:])