This commit is contained in:
DanMcInerney
2014-08-03 05:50:12 -04:00
parent 499d9b590d
commit f4cf5cbc20
4 changed files with 96 additions and 47 deletions

View File

@@ -14,3 +14,4 @@ class vuln(Item):
inj_point = Field()
error = Field()
line = Field()
POST_to = Field()

View File

@@ -1,33 +1,61 @@
from xsscrapy.settings import USER_AGENT_LIST
import random
from scrapy import log
from scrapy.contrib.downloadermiddleware.cookies import CookiesMiddleware
from urlparse import unquote
import re
class RandomUserAgentMiddleware(object):
''' Use a random user-agent for each request '''
def process_request(self, request, spider):
ua = random.choice(USER_AGENT_LIST)
if ua:
ua = random.choice(USER_AGENT_LIST)
if 'User-Agent' in request.headers:
# Make sure we're not testing the UA for injection before setting a random UA
if '9zqjx' not in request.headers['User-Agent']:
request.headers.setdefault('User-Agent', ua)
else:
request.headers.setdefault('User-Agent', ua)
#class Unencode_url(object):
# ''' Fails. Not spectacularly, just doesn't do what we want '''
# def process_request(self, request, spider):
# test_str = '9zqjx'
#
# try:
# p = request.meta['payload']
# if p == test_str:
# return
# except KeyError:
# return
#
# #regex = re.compile('%s(.*?)%s' % (test_str, test_str)
# chars_between_delims = '%s(.*?)%s' % (test_str, test_str)
# matches = re.findall(chars_between_delims, request.url)
# for m in matches:
# unq = unquote(m)
# unq_url = request.url.replace(m, unq)
# print ' ', unq_url
# req = request.replace(url=unq_url)
# print ' ', req.url
def new_process_request(self, request, spider):
if 'dont_merge_cookies' in request.meta:
return
cl = request.headers.getlist('Cookie')
cookiejarkey = request.meta.get("cookiejar")
jar = self.jars[cookiejarkey]
cookies = self._get_request_cookies(jar, request)
for cookie in cookies:
jar.set_cookie_if_ok(cookie, request)
# set Cookie header
request.headers.pop('Cookie', None)
jar.add_cookie_header(request)
#INSERT COOKIE MANIP HERE
self._debug_cookie(request, spider)
#def new_process_request(self, request, spider):
# if 'dont_merge_cookies' in request.meta:
# return
#
# cl = request.headers.getlist('Cookie')
# cookiejarkey = request.meta.get("cookiejar")
# jar = self.jars[cookiejarkey]
# cookies = self._get_request_cookies(jar, request)
# for cookie in cookies:
# jar.set_cookie_if_ok(cookie, request)
# # set Cookie header
# request.headers.pop('Cookie', None)
# jar.add_cookie_header(request)
#
# #INSERT COOKIE MANIP HERE
#
# self._debug_cookie(request, spider)
#CookiesMiddleware.process_request = new_process_request

View File

@@ -9,7 +9,6 @@ BOT_NAME = 'xsscrapy'
SPIDER_MODULES = ['xsscrapy.spiders']
NEWSPIDER_MODULE = 'xsscrapy.spiders'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
#USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
# Get a random user agent for each crawled page
USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
@@ -18,12 +17,14 @@ USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTM
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0']
DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.RandomUserAgentMiddleware': 400,
'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, # disable stock user-agent middleware
'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': None, # disable stock cookies middleware
'xsscrapy.middlewares.CookiesMiddleware': 700} # 700 is in the downloader_middlewares_base
DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.RandomUserAgentMiddleware': 100}
#'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, # disable stock user-agent middleware
#'xsscrapy.middlewares.Unencode_url': 100', # disable stock user-agent middleware
#'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': None, # disable stock cookies middleware
#'xsscrapy.middlewares.CookiesMiddleware': 700} # 700 is in the downloader_middlewares_base
COOKIES_ENABLED = True
#COOKIES_DEBUG = True
# prevent duplicate link crawling
DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter'
@@ -32,9 +33,5 @@ ITEM_PIPELINES = {'xsscrapy.pipelines.XSS_pipeline':100} # Look into what the 10
FEED_FORMAT = 'csv'
FEED_URI = 'vulnerable-urls.txt'
#COOKIES_DEBUG = True
CONCURRENT_REQUESTS = 12
# Test for injection via headers
#DEFAULT_REQUEST_HEADERS = {'Referer': '9zqjx', 'User-Agent':'9zqjx'}

View File

@@ -18,15 +18,6 @@ import HTMLParser
import requests
import Cookie
#import w3lib.url
#def new_safe_url_string(url, encoding='utf8'):
# _safe_chars += '<>(){}="\''
# s = unicode_to_str(url, encoding)
# return moves.urllib.parse.quote(s, _safe_chars)
#
#w3lib.url.safe_url_string = new_safe_url_string
from IPython import embed
__author__ = 'Dan McInerney danhmcinerney@gmail.com'
@@ -38,7 +29,6 @@ data control?
TO DO
-DirBuster!
-Cookie headers
-LONGTERM add DOM detection or static js analysis (check retire.js project)
-cleanup xss_chars_finder(self, response)
-prevent Requests from being URL encoded (line 57 of __init__ in Requests class)
@@ -202,7 +192,14 @@ class XSSspider(CrawlSpider):
reqs = []
orig_url = response.url
body = response.body
doc = lxml.html.fromstring(body, base_url=orig_url)
try:
doc = lxml.html.fromstring(body, base_url=orig_url)
except lxml.etree.ParseError:
return
except lxml.etree.XMLSyntaxError:
return
forms = doc.xpath('//form')
quote_enclosure = self.single_or_double_quote(body)
payload = self.test_str
@@ -363,6 +360,10 @@ class XSSspider(CrawlSpider):
joinedParams = urllib.urlencode(params, doseq=1) # doseq maps the params back together
newURL = urllib.unquote(protocol+hostname+path+'?'+joinedParams)
# Prevent nonpayloaded URLs
if self.test_str not in newURL:
continue
# Prevent URL dupes since we have dont_filter set to True for payloaded urls
if set(newURL).issubset(self.url_requests_made):
continue
@@ -371,6 +372,7 @@ class XSSspider(CrawlSpider):
for p in params:
if p[1] == payload:
changed_value = p[0]
payloaded_urls.append((newURL, changed_value, payload))
if len(payloaded_urls) > 0:
@@ -465,9 +467,11 @@ class XSSspider(CrawlSpider):
attr_inj = self.parse_attr_xpath(attr_xss)
text_xss = doc.xpath("//*[contains(text(), '%s')]" % payload)
tag_inj = self.parse_tag_xpath(text_xss)
#anywhere_text = doc.xpath("//*/text()")
# If the response page is just plain text then tag_inj might miss some reflected payloads
anywhere_text = doc.xpath("//text()")
try:
anywhere_text = doc.xpath("//text()")
except UnicodeDecodeError:
self.log('Could not utf8 decode character')
any_text_inj = self.parse_anytext_xpath(anywhere_text, payload)
if len(attr_inj) > 0:
@@ -559,6 +563,10 @@ class XSSspider(CrawlSpider):
chars_between_delims = '%s(.*?)%s' % (self.test_str, self.test_str) # 25 since some payload is just under that escaped
inj_num = len(injections)
mismatch = False
if xss_type == 'form':
POST_to = response.meta['POST_to']
else:
POST_to = None
orig_payload = response.meta['payload'].strip(self.test_str) # xss char payload
escaped_payload = self.unescape_payload(orig_payload)
@@ -593,27 +601,28 @@ class XSSspider(CrawlSpider):
###### XSS RULES ########
# Redirect
if 'javascript:prompt(99)' == joined_chars.lower(): # redir
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item)
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, POST_to, item)
# JS breakout
if self.js_pld == escaped_payload: #js chars
if break_js_chars.issubset(chars):
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item)
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, POST_to, item)
# Attribute breakout
if attr:
if quote_enclosure in escaped_payload:
if break_attr_chars.issubset(chars):
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item)
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, POST_to, item)
# Tag breakout
else:
if '<' and '>' in escaped_payload:
if break_tag_chars.issubset(chars):
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, item)
return self.make_item(joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line_html, POST_to, item)
# Check the entire body for exact match
# Escape out all the special regex characters to search for the payload in the html body
re_payload = escaped_payload.replace('(', '\(').replace(')', '\)').replace('"', '\\"').replace("'", "\\'")
re_payload = re_payload.replace('{', '\{').replace('}', '\}').replace(']', '\]').replace('[', '\[')
re_payload = '.{1}?'+re_payload
@@ -623,17 +632,20 @@ class XSSspider(CrawlSpider):
if unescaped_match == escaped_payload:
# Does not start with \ so it's not escaping any chars
#item['line'] = self.get_inj_line(body, escaped_payload, item)
item['err'] = 'Response passed injection point specific search without success, checking for exact payload match in body (higher chance of false positive here)'
item['line'] = self.get_inj_line(body, f, item)
item['xss_payload'] = orig_payload
item['unfiltered'] = escaped_payload
item['inj_point'] = inj_point
item['xss_type'] = xss_type
item['url'] = orig_url
if POST_to:
item['POST_to'] = POST_to
return item
#except UnicodeDecodeError:
#self.log('Could not decode html off %s' % orig_url)
def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, item):
def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, POST_to, item):
''' Create the vulnerable item '''
item['line'] = line
@@ -643,6 +655,8 @@ class XSSspider(CrawlSpider):
item['xss_type'] = xss_type
item['inj_tag'] = tag
item['url'] = orig_url
if POST_to:
item['POST_to'] = POST_to
return item
def get_inj_line(self, body, payload, item):
@@ -874,7 +888,13 @@ class XSSspider(CrawlSpider):
payload = response.meta['payload']
quote_enclosure = response.meta['quote']
body = response.body
doc = lxml.html.fromstring(body)
try:
doc = lxml.html.fromstring(body)
except lxml.html.XMLSyntaxError:
self.log('Python html-parsing library lxml failed to parse %s' % orig_url)
self.log('Was attempting to run payload %s against the above URL in a %s injection' % (payload, inj_type))
forms = doc.xpath('//form')
resp_url = response.url
reqs = []
@@ -952,7 +972,8 @@ class XSSspider(CrawlSpider):
'quote':quote_enclosure,
'orig_url':orig_url,
'forms':forms,
'type':'form'},
'type':'form',
'POST_to':url},
dont_filter = True)
if req.callback == self.xss_chars_finder:
@@ -963,3 +984,5 @@ class XSSspider(CrawlSpider):
return reqs
else:
return