Added Bloom filtering, much easier on memory

This commit is contained in:
DanMcInerney
2014-08-05 06:53:55 -04:00
parent f4cf5cbc20
commit 659ef89faa
6 changed files with 299 additions and 249 deletions

23
xsscrapy/bloom.py Normal file
View File

@@ -0,0 +1,23 @@
from pybloom import BloomFilter
from scrapy.utils.job import job_dir
from scrapy.dupefilter import BaseDupeFilter
class BloomURLDupeFilter(BaseDupeFilter):
"""Request Fingerprint duplicates filter"""
def __init__(self, path=None):
self.file = None
self.fingerprints = BloomFilter(2000000, 0.00001)
@classmethod
def from_settings(cls, settings):
return cls(job_dir(settings))
def request_seen(self, request):
fp = request.url
if fp in self.fingerprints:
return True
self.fingerprints.add(fp)
def close(self, reason):
self.fingerprints = None

View File

@@ -15,3 +15,10 @@ class vuln(Item):
error = Field() error = Field()
line = Field() line = Field()
POST_to = Field() POST_to = Field()
class inj_chars(Item):
resp = Field()
def __str__(self):
''' Prevent the item from being printed to output during debugging '''
return ''

View File

@@ -1,9 +1,19 @@
from xsscrapy.settings import USER_AGENT_LIST
import random import random
from scrapy import log from scrapy import log
from scrapy.exceptions import IgnoreRequest
from urlparse import unquote from urlparse import unquote
import re import re
from pybloom import BloomFilter
# Filter out duplicate requests with Bloom filters since they're much easier on memory
URLS_FORMS_HEADERS = BloomFilter(3000000, 0.00001)
USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0']
class RandomUserAgentMiddleware(object): class RandomUserAgentMiddleware(object):
''' Use a random user-agent for each request ''' ''' Use a random user-agent for each request '''
@@ -16,6 +26,61 @@ class RandomUserAgentMiddleware(object):
else: else:
request.headers.setdefault('User-Agent', ua) request.headers.setdefault('User-Agent', ua)
return
class InjectedDupeFilter(object):
''' Filter payloaded URLs, headers, and forms since all of those have dont_filter = True '''
def process_request(self, request, spider):
meta = request.meta
if not 'type' in meta:
return
if meta['type'] == 'url':
url = request.url
if url in URLS_FORMS_HEADERS:
print ' FILTERED URL DUPE!!', url
raise IgnoreRequest
if request.callback == spider.xss_chars_finder:
spider.log('Sending payloaded URL: %s' % url)
URLS_FORMS_HEADERS.add(url)
return
elif meta['type'] == 'form':
u = request.url
v = meta['values']
m = request.method
# URL, input, payload, values
u_v_m = (u, v, m)
if u_v_m in URLS_FORMS_HEADERS:
raise IgnoreRequest
if request.callback == spider.xss_chars_finder:
spider.log('Sending request for possibly vulnerable form to %s' % u)
URLS_FORMS_HEADERS.add(u_v_m)
return
elif meta['type'] == 'header':
u = request.url
h = meta['inj_point']
p = meta['payload']
# URL, changed eader, payload
u_h_p = (u, h, p)
if u_h_p in URLS_FORMS_HEADERS:
raise IgnoreRequest
if request.callback == spider.xss_chars_finder:
spider.log('Sending payloaded %s header, payload: %s' % (h, p))
URLS_FORMS_HEADERS.add(u_h_p)
return
#class Unencode_url(object): #class Unencode_url(object):
# ''' Fails. Not spectacularly, just doesn't do what we want ''' # ''' Fails. Not spectacularly, just doesn't do what we want '''
# def process_request(self, request, spider): # def process_request(self, request, spider):

View File

@@ -51,6 +51,10 @@ class XSS_pipeline(object):
def write_to_file(self, item): def write_to_file(self, item):
with open('formatted-vulns.txt', 'a+') as f: with open('formatted-vulns.txt', 'a+') as f:
f.write('\n') f.write('\n')
if 'error' in item:
f.write('Error: '+item['error']+'\n')
if 'POST_to' in item:
f.write('POST url: '+item['POST_to']+'\n')
f.write('URL: '+item['url']+'\n') f.write('URL: '+item['url']+'\n')
f.write('Unfiltered: '+item['unfiltered']+'\n') f.write('Unfiltered: '+item['unfiltered']+'\n')
f.write('Payload: '+item['xss_payload']+'\n') f.write('Payload: '+item['xss_payload']+'\n')

View File

@@ -11,13 +11,8 @@ NEWSPIDER_MODULE = 'xsscrapy.spiders'
#USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36' #USER_AGENT = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
# Get a random user agent for each crawled page # Get a random user agent for each crawled page
USER_AGENT_LIST = ['Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36', DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.InjectedDupeFilter': 100,
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36', 'xsscrapy.middlewares.RandomUserAgentMiddleware': 200}
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:28.0) Gecko/20100101 Firefox/28.0']
DOWNLOADER_MIDDLEWARES = {'xsscrapy.middlewares.RandomUserAgentMiddleware': 100}
#'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, # disable stock user-agent middleware #'scrapy.contrib.downloadermiddleware.useragent.UserAgentMiddleware': None, # disable stock user-agent middleware
#'xsscrapy.middlewares.Unencode_url': 100', # disable stock user-agent middleware #'xsscrapy.middlewares.Unencode_url': 100', # disable stock user-agent middleware
#'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': None, # disable stock cookies middleware #'scrapy.contrib.downloadermiddleware.cookies.CookiesMiddleware': None, # disable stock cookies middleware
@@ -27,11 +22,12 @@ COOKIES_ENABLED = True
#COOKIES_DEBUG = True #COOKIES_DEBUG = True
# prevent duplicate link crawling # prevent duplicate link crawling
DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter' #DUPEFILTER_CLASS = 'scrapy.dupefilter.RFPDupeFilter'
DUPEFILTER_CLASS = 'xsscrapy.bloom.BloomURLDupeFilter'
ITEM_PIPELINES = {'xsscrapy.pipelines.XSS_pipeline':100} # Look into what the 100 is doing (I know lower is higher priority, 0-1000) ITEM_PIPELINES = {'xsscrapy.pipelines.XSS_pipeline':100} # Look into what the 100 is doing (I know lower is higher priority, 0-1000)
FEED_FORMAT = 'csv' FEED_FORMAT = 'csv'
FEED_URI = 'vulnerable-urls.txt' FEED_URI = 'vulnerable-urls.txt'
CONCURRENT_REQUESTS = 12 #CONCURRENT_REQUESTS = 12

View File

@@ -5,18 +5,18 @@ from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.selector import Selector from scrapy.selector import Selector
from scrapy.http import Request, FormRequest from scrapy.http import Request, FormRequest
from xsscrapy.items import vuln from xsscrapy.items import vuln#, inj_resp
from loginform import fill_login_form from loginform import fill_login_form
from urlparse import urlparse, parse_qsl from urlparse import urlparse, parse_qsl
import lxml.html import lxml.html
import lxml.etree
import urllib import urllib
import re import re
import sys import sys
import cgi import cgi
import HTMLParser import HTMLParser
import requests import requests
import Cookie
from IPython import embed from IPython import embed
@@ -24,11 +24,9 @@ __author__ = 'Dan McInerney danhmcinerney@gmail.com'
''' '''
xss headers: xss headers:
cookie
data control? data control?
TO DO TO DO
-DirBuster!
-LONGTERM add DOM detection or static js analysis (check retire.js project) -LONGTERM add DOM detection or static js analysis (check retire.js project)
-cleanup xss_chars_finder(self, response) -cleanup xss_chars_finder(self, response)
-prevent Requests from being URL encoded (line 57 of __init__ in Requests class) -prevent Requests from being URL encoded (line 57 of __init__ in Requests class)
@@ -50,13 +48,7 @@ class XSSspider(CrawlSpider):
self.tag_pld = '()=<>' self.tag_pld = '()=<>'
self.js_pld = '\'"(){}[];' self.js_pld = '\'"(){}[];'
self.redir_pld = 'JaVAscRIPT:prompt(99)' self.redir_pld = 'JaVAscRIPT:prompt(99)'
self.start_url_cookie_xssed = False
#attr_pld = generated once injection points are found (requires checking if single or double quotes ends html attribute values) #attr_pld = generated once injection points are found (requires checking if single or double quotes ends html attribute values)
self.form_requests_made = set()
self.header_requests_made = set()
self.url_requests_made = set()
self.cookie_requests_made = set()
self.var_val = None
self.login_user = kwargs.get('user') self.login_user = kwargs.get('user')
self.login_pass = kwargs.get('pw') self.login_pass = kwargs.get('pw')
@@ -119,76 +111,9 @@ class XSSspider(CrawlSpider):
self.log('Added robots.txt disallowed URL to our queue: '+r.url) self.log('Added robots.txt disallowed URL to our queue: '+r.url)
return reqs return reqs
def make_cookie_reqs(self, orig_url, payloaded_cookies, payloads, quote, cookies):
reqs = []
if payloads[0] == self.test_str:
cb = self.payloaded_reqs
else:
cb = self.xss_chars_finder
if self.start_url_cookie_xssed == False:
u = urlparse(self.start_urls[0])
base_start_url = u.scheme+'://'+u.hostname
urls = [orig_url]+[base_start_url]
self.start_url_cookie_xssed = True
else:
urls = [orig_url]
for url in urls:
reqs += [Request(url,
meta={'payload':payload,
'type':'header',
'inj_point':'cookie',
'quote':quote,
'orig_url':orig_url,
'cookies':cookies},
cookies=payloaded_cookie,
callback=cb,
dont_filter=True)
for payloaded_cookie in payloaded_cookies
for payload in payloads]
if len(reqs) > 0:
for r in reqs:
if r.callback == self.xss_chars_finder:
self.log('Sending payloaded cookies to %s' % r.url)
return reqs
def remove_cookie_dupes(self, reqs):
''' We have dont_filter == True in self.make_cookie_reqs so we gotta filter em ourselves '''
new_reqs = []
for r in reqs:
url_payload_set = set([(r.url, r.payload)])
if url_payload_set.issubset(self.cookie_requests_made):
continue
self.cookie_requests_made.add(url_payload_set)
new_reqs.append(r)
if len(new_reqs) > 0:
return new_reqs
def payload_cookies(self, cookies, payloads):
''' Add payload to each cookie value '''
all_cookies = []
payloaded_cookie_dict = {}
for payload in payloads:
for cookie in cookies:
c = Cookie.SimpleCookie(cookie)
for k in c:
c[k].value = c[k].value+payload
payloaded_cookie_dict[k] = c[k].value
all_cookies.append(payloaded_cookie_dict)
payloaded_cookie_dict = {}
if len(all_cookies) > 0:
return all_cookies
def parse_resp(self, response): def parse_resp(self, response):
''' The main response parsing function, called on every response from a new URL ''' The main response parsing function, called on every response from a new URL
Checks for XSS in headers, cookies, and url''' Checks for XSS in headers and url'''
reqs = [] reqs = []
orig_url = response.url orig_url = response.url
body = response.body body = response.body
@@ -196,8 +121,10 @@ class XSSspider(CrawlSpider):
try: try:
doc = lxml.html.fromstring(body, base_url=orig_url) doc = lxml.html.fromstring(body, base_url=orig_url)
except lxml.etree.ParseError: except lxml.etree.ParseError:
self.log('Parse error on %s' % orig_url)
return return
except lxml.etree.XMLSyntaxError: except lxml.etree.XMLSyntaxError:
self.log('XML syntax error on %s' % orig_url)
return return
forms = doc.xpath('//form') forms = doc.xpath('//form')
@@ -205,24 +132,6 @@ class XSSspider(CrawlSpider):
payload = self.test_str payload = self.test_str
payloads = [payload] payloads = [payload]
# Get any cookies
# It seems to be up and working (no thorough testing) but the cons are outweighing the pros for inclusion
# Changing the cookies often leads to error pages or automatically sending the payloaded cookies
# for responses that shouldn't be payloaded. All this plus it's a barely-exploitable refl xss anyway
#cookies = response.headers.getlist('Set-Cookie')
## Make sure the cookie isn't already payloaded
#for c in cookies:
# if self.test_str in c:
# cookies = None
# break
#if cookies:
# payloaded_cookies = self.payload_cookies(cookies, payloads)
# if payloaded_cookies:
# cookie_reqs = self.make_cookie_reqs(orig_url, payloaded_cookies, payloads, quote_enclosure, cookies)
# reqs += cookie_reqs
# Edit a few select headers with injection string and resend request # Edit a few select headers with injection string and resend request
headers = ['Referer', 'User-Agent'] headers = ['Referer', 'User-Agent']
header_reqs = self.make_header_reqs(orig_url, payloads, headers, quote_enclosure, None) header_reqs = self.make_header_reqs(orig_url, payloads, headers, quote_enclosure, None)
@@ -246,20 +155,17 @@ class XSSspider(CrawlSpider):
# Each Request here will be given a specific callback relative to whether it was URL variables or form inputs that were XSS payloaded # Each Request here will be given a specific callback relative to whether it was URL variables or form inputs that were XSS payloaded
return reqs return reqs
def get_payloads(self, payloads, method): def encode_payloads(self, payloads, method):
''' HTML encode the payload and URL encode it if the form method is GET ''' ''' HTML encode the payload and URL encode it if the form method is GET '''
html_encoded = cgi.escape(payloads[0], quote=True) html_encoded = cgi.escape(payloads[0], quote=True)
if html_encoded != payloads[0]:
payloads.append(html_encoded) payloads.append(html_encoded)
if method == 'GET': if method == 'GET':
url_encoded = urllib.quote_plus(payloads[0]) url_encoded = urllib.quote_plus(payloads[0])
if url_encoded != payloads[0]:
payloads.append(url_encoded) payloads.append(url_encoded)
return payloads return payloads
if len(reqs) > 0:
return reqs
else:
return
def check_form_validity(self, values, url, payload, orig_url): def check_form_validity(self, values, url, payload, orig_url):
''' Make sure the form action url and values are valid/exist ''' ''' Make sure the form action url and values are valid/exist '''
@@ -336,8 +242,12 @@ class XSSspider(CrawlSpider):
inj_type = response.meta['type'] inj_type = response.meta['type']
forms = response.meta['forms'] forms = response.meta['forms']
body = response.body body = response.body
doc = lxml.html.fromstring(body)
resp_url = response.url resp_url = response.url
try:
doc = lxml.html.fromstring(body)
except lxml.etree.XMLSyntaxError:
self.log('XML Syntax Error on %s' % resp_url)
return
injections = self.inj_points(payload, doc) injections = self.inj_points(payload, doc)
if injections: if injections:
@@ -364,11 +274,6 @@ class XSSspider(CrawlSpider):
if self.test_str not in newURL: if self.test_str not in newURL:
continue continue
# Prevent URL dupes since we have dont_filter set to True for payloaded urls
if set(newURL).issubset(self.url_requests_made):
continue
self.url_requests_made.add(newURL)
for p in params: for p in params:
if p[1] == payload: if p[1] == payload:
changed_value = p[0] changed_value = p[0]
@@ -378,22 +283,6 @@ class XSSspider(CrawlSpider):
if len(payloaded_urls) > 0: if len(payloaded_urls) > 0:
return payloaded_urls return payloaded_urls
def make_cookie_dict(self, cookies):
cookie_dict = {}
for c in cookies:
cookie = Cookie.SimpleCookie(c)
for k in cookie:
cookie_dict[k] = cookie[k]+s
try:
var, val = c.split(';', 1)[0].split('=', 1)
cookie_dict[var] = val
except Exception as e:
print str(e)
continue
if len(cookie_dict) > 0:
return cookie_dict
def getURLparams(self, url): def getURLparams(self, url):
''' Parse out the URL parameters ''' ''' Parse out the URL parameters '''
parsedUrl = urlparse(url) parsedUrl = urlparse(url)
@@ -472,6 +361,7 @@ class XSSspider(CrawlSpider):
anywhere_text = doc.xpath("//text()") anywhere_text = doc.xpath("//text()")
except UnicodeDecodeError: except UnicodeDecodeError:
self.log('Could not utf8 decode character') self.log('Could not utf8 decode character')
return
any_text_inj = self.parse_anytext_xpath(anywhere_text, payload) any_text_inj = self.parse_anytext_xpath(anywhere_text, payload)
if len(attr_inj) > 0: if len(attr_inj) > 0:
@@ -551,6 +441,10 @@ class XSSspider(CrawlSpider):
# Test: http://www.securitysift.com/quotes-and-xss-planning-your-escape/ for attribute xss without <> # Test: http://www.securitysift.com/quotes-and-xss-planning-your-escape/ for attribute xss without <>
# namely: meta tag with content attr, a tag with href attribute (onmouseover payload), option tag any attr (onmouseover payload) # namely: meta tag with content attr, a tag with href attribute (onmouseover payload), option tag any attr (onmouseover payload)
#item = inj_resp()
#item['resp'] = response
#return item
item = vuln() item = vuln()
xss_type = response.meta['type'] xss_type = response.meta['type']
orig_url = response.meta['orig_url'] orig_url = response.meta['orig_url']
@@ -560,7 +454,7 @@ class XSSspider(CrawlSpider):
resp_url = response.url resp_url = response.url
body = response.body body = response.body
# Regex: ( ) mean group 1 is within the parens, . means any char, {1,25} means match any char 1 to 25 times # Regex: ( ) mean group 1 is within the parens, . means any char, {1,25} means match any char 1 to 25 times
chars_between_delims = '%s(.*?)%s' % (self.test_str, self.test_str) # 25 since some payload is just under that escaped chars_between_delims = '%s(.{1,75}?)%s' % (self.test_str, self.test_str)
inj_num = len(injections) inj_num = len(injections)
mismatch = False mismatch = False
if xss_type == 'form': if xss_type == 'form':
@@ -591,7 +485,6 @@ class XSSspider(CrawlSpider):
line, tag, attr, attr_val = self.parse_injections(injections[idx]) line, tag, attr, attr_val = self.parse_injections(injections[idx])
except IndexError: except IndexError:
# Mismatch in num of test injections and num of payloads found # Mismatch in num of test injections and num of payloads found
# I feel like I could put a break instead of a continue here but I am so fearful of false negatives
break break
joined_chars = ''.join(unfiltered_chars) joined_chars = ''.join(unfiltered_chars)
@@ -630,9 +523,9 @@ class XSSspider(CrawlSpider):
for f in full_matches: for f in full_matches:
unescaped_match = ''.join(self.get_unfiltered_chars(f, escaped_payload)) unescaped_match = ''.join(self.get_unfiltered_chars(f, escaped_payload))
if unescaped_match == escaped_payload: if unescaped_match == escaped_payload:
# Does not start with \ so it's not escaping any chars #if '\\' == unescaped_match[0]:
#item['line'] = self.get_inj_line(body, escaped_payload, item) # continue
item['err'] = 'Response passed injection point specific search without success, checking for exact payload match in body (higher chance of false positive here)' item['error'] = 'Response passed injection point specific search without success, checking for exact payload match in body (higher chance of false positive here)'
item['line'] = self.get_inj_line(body, f, item) item['line'] = self.get_inj_line(body, f, item)
item['xss_payload'] = orig_payload item['xss_payload'] = orig_payload
item['unfiltered'] = escaped_payload item['unfiltered'] = escaped_payload
@@ -642,8 +535,6 @@ class XSSspider(CrawlSpider):
if POST_to: if POST_to:
item['POST_to'] = POST_to item['POST_to'] = POST_to
return item return item
#except UnicodeDecodeError:
#self.log('Could not decode html off %s' % orig_url)
def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, POST_to, item): def make_item(self, joined_chars, xss_type, orig_payload, tag, orig_url, inj_point, line, POST_to, item):
''' Create the vulnerable item ''' ''' Create the vulnerable item '''
@@ -747,12 +638,12 @@ class XSSspider(CrawlSpider):
def single_or_double_quote(self, body): def single_or_double_quote(self, body):
''' I feel like this function is poorly written. At least it seems reliable. ''' ''' I feel like this function is poorly written. At least it seems reliable. '''
quote = re.search('<a href=(.)', body) #quote = re.search('<a href=(.)', body)
if quote == None: #if quote == None:
quote = re.search('<link href=(.)', body) # quote = re.search('<meta.+=(.)', body)
try: #try:
quote = quote.group(1) # quote = quote.group(1)
except AttributeError: #except AttributeError:
squote = re.findall('.=(\')', body) squote = re.findall('.=(\')', body)
dquote = re.findall('.=(")', body) dquote = re.findall('.=(")', body)
if len(squote) > len(dquote): if len(squote) > len(dquote):
@@ -760,11 +651,31 @@ class XSSspider(CrawlSpider):
else: else:
quote = '"' quote = '"'
if quote not in ['"', "'"]: #if quote not in ['"', "'"]:
quote = '"' # quote = '"'
return quote return quote
# def single_or_double_quote(self, body):
# ''' I feel like this function is poorly written. At least it seems reliable. '''
# quote = re.search('<a href=(.)', body)
# if quote == None:
# quote = re.search('<link href=(.)', body)
# try:
# quote = quote.group(1)
# except AttributeError:
# squote = re.findall('.=(\')', body)
# dquote = re.findall('.=(")', body)
# if len(squote) > len(dquote):
# quote = "'"
# else:
# quote = '"'
#
# if quote not in ['"', "'"]:
# quote = '"'
#
# return quote
def event_attributes(self): def event_attributes(self):
''' HTML tag attributes that allow javascript ''' ''' HTML tag attributes that allow javascript '''
@@ -787,7 +698,7 @@ class XSSspider(CrawlSpider):
return event_attributes return event_attributes
def make_url_reqs(self, orig_url, payloaded_urls, quote_enclosure, injections): def make_url_reqs(self, orig_url, payloaded_urls, quote_enclosure, injections):
''' Make the URL requests and filter out dupes ''' ''' Make the URL requests '''
reqs = [Request(url[0], reqs = [Request(url[0],
meta={'type':'url', meta={'type':'url',
@@ -797,25 +708,11 @@ class XSSspider(CrawlSpider):
'quote':quote_enclosure}) 'quote':quote_enclosure})
for url in payloaded_urls] # Meta is the payload for url in payloaded_urls] # Meta is the payload
for url in payloaded_urls:
if url[2] == self.test_str:
break
reqs = self.add_callback(injections, reqs) reqs = self.add_callback(injections, reqs)
reqs = self.url_dupe_filter(reqs)
if reqs: if reqs:
return reqs return reqs
def url_dupe_filter(self, reqs):
for r in reqs:
# Make sure we're only showing payloaded URLs, not tester URLs
if r.callback == self.xss_chars_finder:
# Don't filter payloaded ones, but do filter reqs with the payload == self.test_str
r.dont_filter = True
self.log('Sending payloaded URL: '+r.url)
return reqs
def make_header_reqs(self, url, payloads, headers, quote_enclosure, injections): def make_header_reqs(self, url, payloads, headers, quote_enclosure, injections):
''' Generate header requests ''' ''' Generate header requests '''
@@ -830,36 +727,11 @@ class XSSspider(CrawlSpider):
for header in headers for header in headers
for payload in payloads] for payload in payloads]
reqs = self.remove_header_dupes(reqs)
reqs = self.add_callback(injections, reqs) reqs = self.add_callback(injections, reqs)
if len(reqs) > 0: if len(reqs) > 0:
for r in reqs:
# Make sure we're only showing payloaded URLs, not tester URLs
if r.callback == self.xss_chars_finder:
self.log('Sending payloaded header %s with payload %s' % (r.meta['inj_point'], r.meta['payload']))
return reqs return reqs
def remove_header_dupes(self, reqs):
''' Put all header requests made into a tuple of (url, header, payload) and
compare new header requests to this master set to prevent dupes '''
new_reqs =[]
for r in reqs:
for h in r.headers:
header = h # Referer or User-Agent
break
payload = r.headers[header]
u_h_p = (r.url, header, payload)
if set(u_h_p).issubset(self.header_requests_made):
continue
else:
self.header_requests_made.add(u_h_p)
new_reqs.append(r)
if len(reqs) > 0:
return new_reqs
def add_callback(self, injections, reqs): def add_callback(self, injections, reqs):
''' Add the callback to the requests depending on if it's a test req or payloaded req ''' ''' Add the callback to the requests depending on if it's a test req or payloaded req '''
@@ -873,14 +745,6 @@ class XSSspider(CrawlSpider):
return reqs return reqs
def make_cookie_payloads(self, payloads):
new_ploads = []
for p in payloads:
p.replace('"', '\"').replace("'", "\'")
#p = '"'+p+'"'
new_ploads.append(p)
return new_ploads
def payloaded_reqs(self, response): def payloaded_reqs(self, response):
inj_type = response.meta['type'] inj_type = response.meta['type']
inj_point = response.meta['inj_point'] #header for header req, just 'form' for form req, url param for url req inj_point = response.meta['inj_point'] #header for header req, just 'form' for form req, url param for url req
@@ -891,7 +755,7 @@ class XSSspider(CrawlSpider):
try: try:
doc = lxml.html.fromstring(body) doc = lxml.html.fromstring(body)
except lxml.html.XMLSyntaxError: except lxml.etree.XMLSyntaxError:
self.log('Python html-parsing library lxml failed to parse %s' % orig_url) self.log('Python html-parsing library lxml failed to parse %s' % orig_url)
self.log('Was attempting to run payload %s against the above URL in a %s injection' % (payload, inj_type)) self.log('Was attempting to run payload %s against the above URL in a %s injection' % (payload, inj_type))
@@ -906,16 +770,7 @@ class XSSspider(CrawlSpider):
if inj_type == 'header': if inj_type == 'header':
headers = [inj_point] headers = [inj_point]
# Cookies
if headers[0] == 'cookie':
cookies = response.meta['cookies']
c_payloads = self.make_cookie_payloads(payloads)
payloaded_cookies = self.payload_cookies(cookies, c_payloads)
cookie_reqs = self.make_cookie_reqs(orig_url, payloaded_cookies, c_payloads, quote_enclosure, cookies) # Headers = None
if cookie_reqs:
reqs += cookie_reqs
# Referer and User-Agent # Referer and User-Agent
else:
header_reqs = self.make_header_reqs(orig_url, payloads, headers, quote_enclosure, injections) header_reqs = self.make_header_reqs(orig_url, payloads, headers, quote_enclosure, injections)
if header_reqs: if header_reqs:
reqs += header_reqs reqs += header_reqs
@@ -942,18 +797,15 @@ class XSSspider(CrawlSpider):
confirm that value + url + POST/GET has not been made into a request before, finally send the request ''' confirm that value + url + POST/GET has not been made into a request before, finally send the request '''
reqs = [] reqs = []
vals_urls_meths = [] vals_urls_meths = []
url = None
for form in forms: for form in forms:
payloads = self.get_payloads(payloads, form.method) payloads = self.encode_payloads(payloads, form.method)
for payload in payloads: for payload in payloads:
url = None
values, url, method = self.fill_form(orig_url, form, payload) values, url, method = self.fill_form(orig_url, form, payload)
url = self.check_form_validity(values, url, payload, orig_url) url = self.check_form_validity(values, url, payload, orig_url)
if not url: if not url:
continue continue
# Check for duplicate form submissions by comparing (values, url, method) to previously sent (values, url, method)
if not self.dupe_form(values, url, method, payload):
# Get form field names # Get form field names
form_fields = ', '.join([f for f in form.fields]) form_fields = ', '.join([f for f in form.fields])
if payload == self.test_str: if payload == self.test_str:
@@ -961,7 +813,7 @@ class XSSspider(CrawlSpider):
else: else:
cb = self.xss_chars_finder cb = self.xss_chars_finder
# Make the payloaded requests, dont_filter = True because scrapy treats url encoded data as == to nonurl encoded # Make the payloaded requests
req = FormRequest(url, req = FormRequest(url,
callback=cb, callback=cb,
formdata=values, formdata=values,
@@ -973,16 +825,119 @@ class XSSspider(CrawlSpider):
'orig_url':orig_url, 'orig_url':orig_url,
'forms':forms, 'forms':forms,
'type':'form', 'type':'form',
'POST_to':url}, 'POST_to':url,
'values':values},
dont_filter = True) dont_filter = True)
if req.callback == self.xss_chars_finder:
self.log('Sending request for possibly vulnerable form using payload: %s' % payload)
reqs.append(req) reqs.append(req)
if len(reqs) > 0: if len(reqs) > 0:
return reqs return reqs
else:
return
# def make_cookie_reqs(self, orig_url, payloaded_cookies, payloads, quote, cookies):
# reqs = []
# if payloads[0] == self.test_str:
# cb = self.payloaded_reqs
# else:
# cb = self.xss_chars_finder
# if self.start_url_cookie_xssed == False:
# u = urlparse(self.start_urls[0])
# base_start_url = u.scheme+'://'+u.hostname
# urls = [orig_url]+[base_start_url]
# self.start_url_cookie_xssed = True
# else:
# urls = [orig_url]
# for url in urls:
# reqs += [Request(url,
# meta={'payload':payload,
# 'type':'header',
# 'inj_point':'cookie',
# 'quote':quote,
# 'orig_url':orig_url,
# 'cookies':cookies},
# cookies=payloaded_cookie,
# callback=cb,
# dont_filter=True)
# for payloaded_cookie in payloaded_cookies
# for payload in payloads]
# if len(reqs) > 0:
# for r in reqs:
# if r.callback == self.xss_chars_finder:
# self.log('Sending payloaded cookies to %s' % r.url)
# return reqs
# def payload_cookies(self, cookies, payloads):
# ''' Add payload to each cookie value '''
# all_cookies = []
# payloaded_cookie_dict = {}
# for payload in payloads:
# for cookie in cookies:
# c = Cookie.SimpleCookie(cookie)
# for k in c:
# c[k].value = c[k].value+payload
# payloaded_cookie_dict[k] = c[k].value
# all_cookies.append(payloaded_cookie_dict)
# payloaded_cookie_dict = {}
# if len(all_cookies) > 0:
# return all_cookies
# if headers[0] == 'cookie':
# cookies = response.meta['cookies']
# c_payloads = self.make_cookie_payloads(payloads)
# payloaded_cookies = self.payload_cookies(cookies, c_payloads)
# cookie_reqs = self.make_cookie_reqs(orig_url, payloaded_cookies, c_payloads, quote_enclosure, cookies) # Headers = None
# if cookie_reqs:
# reqs += cookie_reqs
# Get any cookies
# It seems to be up and working (no thorough testing) but the cons are outweighing the pros for inclusion
# Changing the cookies often leads to error pages or automatically sending the payloaded cookies
# for responses that shouldn't be payloaded. All this plus it's a barely-exploitable refl xss anyway
#cookies = response.headers.getlist('Set-Cookie')
## Make sure the cookie isn't already payloaded
#for c in cookies:
# if self.test_str in c:
# cookies = None
# break
#if cookies:
# payloaded_cookies = self.payload_cookies(cookies, payloads)
# if payloaded_cookies:
# cookie_reqs = self.make_cookie_reqs(orig_url, payloaded_cookies, payloads, quote_enclosure, cookies)
# reqs += cookie_reqs
# def make_cookie_dict(self, cookies):
# cookie_dict = {}
# for c in cookies:
# cookie = Cookie.SimpleCookie(c)
# for k in cookie:
# cookie_dict[k] = cookie[k]+s
# try:
# var, val = c.split(';', 1)[0].split('=', 1)
# cookie_dict[var] = val
# except Exception as e:
# print str(e)
# continue
#
# if len(cookie_dict) > 0:
# return cookie_dict
#
# def make_cookie_payloads(self, payloads):
# new_ploads = []
# for p in payloads:
# p.replace('"', '\"').replace("'", "\'")
# #p = '"'+p+'"'
# new_ploads.append(p)
# return new_ploads