added url path xss detection, refactored url param functions

This commit is contained in:
Dan McInerney
2014-12-13 18:40:23 -07:00
parent 60542c3429
commit ea5950bdeb
4 changed files with 138 additions and 62 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
*.pyc
xsscrapy-vulns*
*.txt
*.swp
*.swo
*.png

View File

@@ -35,16 +35,17 @@ class InjectedDupeFilter(object):
def process_request(self, request, spider):
meta = request.meta
if 'xss_place' not in meta or 'delim' not in meta:
if 'xss_place' not in meta:
return
delim = meta['delim']
# Injected URL dupe handling
if meta['xss_place'] == 'url':
url = request.url
#replace the delim characters with nothing so we only test the URL
#with the payload
url = request.url.replace(delim, '')
if url in URLS_SEEN:
no_delim_url = url.replace(delim, '')
if no_delim_url in URLS_SEEN:
raise IgnoreRequest
spider.log('Sending payloaded URL: %s' % url)
URLS_SEEN.add(url)

View File

@@ -10,12 +10,10 @@ import lxml.etree
import lxml.html
from lxml.html import soupparser, fromstring
import itertools
#from IPython import embed
from IPython import embed
class XSSCharFinder(object):
def __init__(self):
self.redir_pld = 'JaVAscRIPT:prompt(99)'
self.test_str = '\'"(){}<x>:/'
self.url_param_xss_items = []
def process_item(self, item, spider):
@@ -24,6 +22,7 @@ class XSSCharFinder(object):
payload = meta['payload']
delim = meta['delim']
param = meta['xss_param']
resp_url = response.url
body = response.body
mismatch = False
@@ -32,12 +31,9 @@ class XSSCharFinder(object):
# Regex: ( ) mean group 1 is within the parens, . means any char,
# {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding
# ? makes the search nongreedy so it stops after hitting its limits
#full_match = '%s.*?%s' % (delim, delim)
full_match = '%s.{0,80}?%s' % (delim, delim)
# matches with semicolon which sometimes cuts results off
sc_full_match = '%s.{0,80}?%s;9' % (delim, delim)
#chars_between_delims = '%s(.*?)%s' % (delim, delim)
#chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)
# Quick sqli check based on DSSS
dbms, regex = self.sqli_check(body, meta['orig_body'])
@@ -52,11 +48,13 @@ class XSSCharFinder(object):
# XSS detection starts here
re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)])
if re_matches:
if '/verifypasswd.php/1zqj' in resp_url:
embed()
if len(re_matches) > 0:
scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)])
lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim)
if lxml_injs:
err = None
if len(re_matches) != len(lxml_injs):
spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url)
@@ -64,7 +62,6 @@ class XSSCharFinder(object):
mismatch = True
inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch)
# If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError
try:
for offset in sorted(inj_data):
@@ -239,8 +236,7 @@ class XSSCharFinder(object):
tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection
pl_delim = payload[:7]
#full_match = '%s.*?%s' % (pl_delim, pl_delim)
full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim)
full_match = '%s.{0,85}?%s' % (pl_delim, pl_delim)
line = re.sub(full_match, 'INJECTION', line)
all_chars_payloads = {}
@@ -375,6 +371,9 @@ class XSSCharFinder(object):
# javascript:alert(1) vulns
# We do this slicing operation because ;9 might be at the end
# although it's unnecessary for the payload
# CHECK HERE, PASS DOWN THE ORIG ATTR VAL
#if delim+'subbed' in attr_val:
if attr_val[:len(delim+'subbed')] == delim+'subbed':
if tag == 'a' and attr == 'href':
# Only need : ( and ) to use javascript:prompt(4) redir payload
@@ -559,7 +558,6 @@ class XSSCharFinder(object):
subbed_body = re.sub(full_match, sub, body)
doc = self.html_parser(subbed_body, resp_url)
lxml_injs = self.xpath_inj_points(sub, doc)
if lxml_injs:
return lxml_injs
def html_parser(self, body, resp_url):
@@ -657,8 +655,8 @@ class XSSCharFinder(object):
#unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset)
# Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs
if reflected_chars == '">':
reflected_chars = ''
#if reflected_chars == '">':
# reflected_chars = ''
all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line]
return all_inj_data
@@ -869,20 +867,16 @@ class XSSCharFinder(object):
# Make sure js payloads remove escaped ' and ", also remove ;
# since ; will show up in html encoded entities. If ; is unfiltered
# it will be added after this function
#escaped_chars = re.findall(r'\\(.)', chars)
chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
#If injection is inside script tag, remove the escaped chars
if tag == 'script' or attr in self.event_attributes():
chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
else:
# If it's not a script then just remove the \'s otherwise they show up in Unfiltered in the item
chars_between_delim = chars_between_delim.replace("\\", "")
# List for just the inj point
#for c in chars_found:
# if c in self.test_str:
# unfiltered_chars.append(c)
# # Check if a colon needs to be added to the unfiltered chars
# Check if a colon needs to be added to the unfiltered chars
for scolon_match in scolon_matches:
# Confirm the string offset of the match is the same
# Since scolon_match will only exist when ;9 was found

View File

@@ -6,7 +6,7 @@ from scrapy.http import FormRequest, Request
from scrapy.selector import Selector
from xsscrapy.items import inj_resp
from xsscrapy.loginform import fill_login_form
from urlparse import urlparse, parse_qsl, urljoin
from urlparse import urlparse, parse_qsl, urljoin, urlunparse, urlunsplit
from scrapy.http.cookies import CookieJar
from cookielib import Cookie
@@ -32,7 +32,8 @@ class XSSspider(CrawlSpider):
# If you're logging into a site with a logout link, you'll want to
# uncomment the rule below and comment the shorter one right after to
# prevent yourself from being logged out automatically
rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
#rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
rules = (Rule(LinkExtractor(deny='contactus'), callback='parse_resp', follow=True), )
def __init__(self, *args, **kwargs):
# run using: scrapy crawl xss_spider -a url='http://example.com'
@@ -41,7 +42,7 @@ class XSSspider(CrawlSpider):
hostname = urlparse(self.start_urls[0]).hostname
# With subdomains
self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value
self.delim = '1zqjx'
self.delim = '1zqj'
# semi colon goes on end because sometimes it cuts stuff off like
# gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:/'
@@ -135,6 +136,10 @@ class XSSspider(CrawlSpider):
reqs = []
orig_url = response.url
body = response.body
parsed_url = urlparse(orig_url)
# parse_qsl rather than parse_qs in order to preserve order
# will always return a list
url_params = parse_qsl(parsed_url.query, keep_blank_values=True)
try:
# soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it
@@ -177,11 +182,9 @@ class XSSspider(CrawlSpider):
if form_reqs:
reqs += form_reqs
# Test URL variables with xss strings
payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param
print 'URL:', payloaded_urls, url_delim_str
payloaded_urls = self.make_URLs(orig_url, parsed_url, url_params)
if payloaded_urls:
url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str)
url_reqs = self.make_url_reqs(orig_url, payloaded_urls)
if url_reqs:
reqs += url_reqs
@@ -244,9 +247,7 @@ class XSSspider(CrawlSpider):
reqs = []
vals_urls_meths = []
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
payload = self.make_payload()
for form in forms:
if form.inputs:
@@ -283,7 +284,7 @@ class XSSspider(CrawlSpider):
'orig_url':orig_url,
'xss_place':'form',
'POST_to':url,
'delim':delim_str},
'delim':payload[:len(self.delim)+2]},
dont_filter=True,
callback=self.xss_chars_finder)
reqs.append(req)
@@ -300,9 +301,7 @@ class XSSspider(CrawlSpider):
def make_cookie_reqs(self, url, payload, xss_param):
''' Generate payloaded cookie header requests '''
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
payload = self.make_payload()
reqs = [Request(url,
meta={'xss_place':'header',
@@ -310,7 +309,7 @@ class XSSspider(CrawlSpider):
'xss_param':xss_param,
'orig_url':url,
'payload':payload,
'delim':delim_str},
'delim':payload[:len(self.delim)+2]},
cookies={'userinput':payload},
callback=self.xss_chars_finder,
dont_filter=True)]
@@ -318,33 +317,116 @@ class XSSspider(CrawlSpider):
if len(reqs) > 0:
return reqs
def make_URLs(self, url, payload):
''' Add links with variables in them to the queue again but with XSS testing payloads
Will return a tuple: (url, injection point, payload) '''
def make_URLs(self, orig_url, parsed_url, url_params):
"""
Create the URL parameter payloaded URLs
"""
payloaded_urls = []
# Create 1 URL per payloaded param
new_query_strings = self.get_single_payload_queries(url_params)
if new_query_strings:
# Payload the parameters
for query in new_query_strings:
query_str = query[0]
params = query[1]
payload = query[2]
# scheme #netlo #path #params #query (url params) #fragment
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], parsed_url[2], parsed_url[3], query_str, parsed_url[5]))
payloaded_url = urllib.unquote(payloaded_url)
payloaded_urls.append((payloaded_url, params, payload))
# Payload the URL path
payloaded_url_path = self.payload_url_path(parsed_url)
payloaded_urls.append(payloaded_url_path)
else:
# Payload end of URL if there's no parameters
payloaded_end_of_url = self.payload_end_of_url(orig_url)
payloaded_urls.append(payloaded_end_of_url)
if len(payloaded_urls) > 0:
return payloaded_urls
def payload_url_path(self, parsed_url):
"""
Payload the URL path like:
http://example.com/page1.php?x=1&y=2 -->
http://example.com/page1.php/FUZZ/?x=1&y=2
"""
# Remove / so that it doesn't think it's 2 folders in the fuzz chars
payload = self.make_payload().replace('/', '')
path = parsed_url[2]
if path.endswith('/'):
path = path + payload + '/'
else:
path = path + '/' + payload + '/'
#scheme, netloc, path, params, query (url params), fragment
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], path, parsed_url[3], parsed_url[4], parsed_url[5]))
payloaded_url = urllib.unquote(payloaded_url)
payloaded_data = (payloaded_url, 'URL path', payload)
return payloaded_data
def get_single_payload_queries(self, url_params):
"""
Make a list of lists of tuples where each secondary list has 1 payloaded
param and the rest are original value
"""
new_payloaded_params = []
changed_params = []
modified = False
# Create a list of lists where num of lists = len(params)
for x in xrange(0, len(url_params)):
single_url_params = []
# Make the payload
payload = self.make_payload()
for p in url_params:
param, value = p
# if param has not been modified and we haven't changed a parameter for this loop
if param not in changed_params and modified == False:
# Do we need the original value there? Might be helpful sometimes but think about testing for <frame src="FUZZCHARS">
# versus <frame src="http://something.com/FUZZCHARS"> and the xss payload javascript:alert(1)
new_param_val = (param, payload)
#new_param_val = (param, value+payload)
single_url_params.append(new_param_val)
changed_params.append(param)
modified = param
else:
single_url_params.append(p)
# Add the modified, urlencoded params to the master list
new_payloaded_params.append((urllib.urlencode(single_url_params), modified, payload))
# Reset the changed parameter tracker
modified = False
if len(new_payloaded_params) > 0:
# [(payloaded params, payloaded param, payload), (payloaded params, payloaded param, payload)]
return new_payloaded_params
def make_payload(self):
"""
Make the payload with a unique delim
"""
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
payload = delim_str + self.test_str + delim_str + ';9'
return payload
if '=' in url and '?' in url:
# If URL has variables, payload them
payloaded_urls = self.payload_url_vars(url, payload)
else:
# If URL has no variables, tack payload onto end of URL
payloaded_urls = self.payload_end_of_url(url, payload)
return payloaded_urls, delim_str
def payload_end_of_url(self, url, payload):
def payload_end_of_url(self, url):
''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes '''
payload = self.make_payload()
# Make URL test and delim strings unique
if url[-1] == '/':
payloaded_url = url+payload
else:
payloaded_url = url+'/'+payload
return [(payloaded_url, 'end of url', payload)]
return (payloaded_url, 'end of url', payload)
def payload_url_vars(self, url, payload):
''' Payload the URL variables '''
@@ -443,7 +525,7 @@ class XSSspider(CrawlSpider):
return (netloc, protocol, doc_domain, path)
def make_url_reqs(self, orig_url, payloaded_urls, delim_str):
def make_url_reqs(self, orig_url, payloaded_urls):
''' Make the URL requests '''
reqs = [Request(url[0],
@@ -451,7 +533,7 @@ class XSSspider(CrawlSpider):
'xss_param':url[1],
'orig_url':orig_url,
'payload':url[2],
'delim':delim_str},
'delim':url[2][:len(self.delim)+2]},
callback = self.xss_chars_finder)
for url in payloaded_urls] # Meta is the payload
@@ -461,9 +543,7 @@ class XSSspider(CrawlSpider):
def make_header_reqs(self, url, payload, inj_headers):
''' Generate header requests '''
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
payload = self.make_payload()
reqs = [Request(url,
headers={inj_header:payload},
@@ -471,7 +551,7 @@ class XSSspider(CrawlSpider):
'xss_param':inj_header,
'orig_url':url,
'payload':payload,
'delim':delim_str,
'delim':payload[:len(self.delim)+2],
'UA':self.get_user_agent(inj_header, payload)},
dont_filter=True,
callback = self.xss_chars_finder)