added --basic option for http basic auth

This commit is contained in:
Dan McInerney
2014-10-24 07:12:40 -04:00
parent bc19ded59d
commit 68386dfc61
5 changed files with 35 additions and 14 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
*.pyc
*.txt
xsscrapy-vulns.txt
*.swp
*.swo
*.png

View File

@@ -1,4 +1,4 @@
Scrapy==0.24.4
pybloom==1.1
requests==2.3.0
requests
beautifulsoup

View File

@@ -16,6 +16,7 @@ def get_args():
parser.add_argument('-l', '--login', help="Login name; -l danmcinerney")
parser.add_argument('-p', '--password', help="Password; -p pa$$w0rd")
parser.add_argument('-r', '--rate', help="Rate in requests per minute")
parser.add_argument('--basic', help="Use HTTP Basic Auth to login", action="store_true")
args = parser.parse_args()
return args
@@ -24,11 +25,15 @@ url = args.url
user = args.login
password = args.password
rate = args.rate
if args.basic:
basic = 'true'
else:
basic = 'false'
if rate is not None:
delay = 60 / float(rate)
else:
delay = 0
try:
execute(['scrapy', 'crawl', 'xsscrapy', '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a', 'pw=%s' % password, '-s', "DOWNLOAD_DELAY=%s" % delay])
execute(['scrapy', 'crawl', 'xsscrapy', '-a', 'url=%s' % url, '-a', 'user=%s' % user, '-a', 'pw=%s' % password, '-s', "DOWNLOAD_DELAY=%s" % delay, '-a', 'basic=%s' % basic])
except KeyboardInterrupt:
sys.exit()

View File

@@ -20,7 +20,8 @@ NEWSPIDER_MODULE = 'xsscrapy.spiders'
# 100 (first): Make sure there's no duplicate requests that have some value changed
# 200 (second): Make sure there's a random working User-Agent header set if that value's not injected with the test string
DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.InjectedDupeFilter': 100,
'xsscrapy.middlewares.RandomUserAgentMiddleware': 200}
'xsscrapy.middlewares.RandomUserAgentMiddleware': 200,
'scrapy.contrib.downloadermiddleware.httpauth.HttpAuthMiddleware': 300}
COOKIES_ENABLED = True
#COOKIES_DEBUG = True
@@ -35,5 +36,5 @@ ITEM_PIPELINES = {'xsscrapy.pipelines.XSSCharFinder':100}
#FEED_FORMAT = 'csv'
#FEED_URI = 'example.txt'
CONCURRENT_REQUESTS = 25
CONCURRENT_REQUESTS = 40

View File

@@ -55,6 +55,7 @@ class XSSspider(CrawlSpider):
# gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:'
# Login details
self.login_user = kwargs.get('user')
if self.login_user == 'None':
self.login_user = None
@@ -63,6 +64,12 @@ class XSSspider(CrawlSpider):
else:
self.login_pass = kwargs.get('pw')
# HTTP Basic Auth
self.basic_auth = kwargs.get('basic')
if self.basic_auth == 'true':
self.http_user = self.login_user
self.http_pass = self.login_pass
def parse_start_url(self, response):
''' Creates the XSS tester requests for the start URL as well as the request for robots.txt '''
u = urlparse(response.url)
@@ -76,22 +83,30 @@ class XSSspider(CrawlSpider):
#### Handle logging in if username and password are given as arguments ####
def start_requests(self):
''' If -a user= and -a pw= args are given, pass the first response to the login handler
''' If user and pw args are given, pass the first response to the login handler
otherwise pass it to the normal callback function '''
if self.login_user and self.login_pass:
yield Request(url=self.start_urls[0], callback=self.login)
if self.basic_auth == 'true':
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
else:
yield Request(url=self.start_urls[0], callback=self.login)
else:
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
def login(self, response):
''' Fill out the login form and return the request'''
args, url, method = fill_login_form(response.url, response.body, self.login_user, self.login_pass)
self.log('Logging in...')
return FormRequest(url,
method=method,
formdata=args,
callback=self.confirm_login,
dont_filter=True)
try:
args, url, method = fill_login_form(response.url, response.body, self.login_user, self.login_pass)
return FormRequest(url,
method=method,
formdata=args,
callback=self.confirm_login,
dont_filter=True)
except Exception:
self.log('Login failed') # Make this more specific eventually
return Request(url=self.start_urls[0], dont_filter=True) # Continue crawling
def confirm_login(self, response):
''' Check that the username showed up in the response page '''
@@ -415,7 +430,7 @@ class XSSspider(CrawlSpider):
protocol = parsed_url.scheme+'://'
# Get the hostname (includes subdomains)
hostname = parsed_url.hostname
# Get netlock (domain.com:8080)
# Get netloc (domain.com:8080)
netloc = parsed_url.netloc
# Get doc domain
doc_domain = '.'.join(hostname.split('.')[-2:])