added url path xss detection, refactored url param functions

This commit is contained in:
Dan McInerney
2014-12-13 18:40:23 -07:00
parent 60542c3429
commit ea5950bdeb
4 changed files with 138 additions and 62 deletions

1
.gitignore vendored
View File

@@ -1,5 +1,6 @@
*.pyc *.pyc
xsscrapy-vulns* xsscrapy-vulns*
*.txt
*.swp *.swp
*.swo *.swo
*.png *.png

View File

@@ -35,16 +35,17 @@ class InjectedDupeFilter(object):
def process_request(self, request, spider): def process_request(self, request, spider):
meta = request.meta meta = request.meta
if 'xss_place' not in meta or 'delim' not in meta: if 'xss_place' not in meta:
return return
delim = meta['delim'] delim = meta['delim']
# Injected URL dupe handling # Injected URL dupe handling
if meta['xss_place'] == 'url': if meta['xss_place'] == 'url':
url = request.url
#replace the delim characters with nothing so we only test the URL #replace the delim characters with nothing so we only test the URL
#with the payload #with the payload
url = request.url.replace(delim, '') no_delim_url = url.replace(delim, '')
if url in URLS_SEEN: if no_delim_url in URLS_SEEN:
raise IgnoreRequest raise IgnoreRequest
spider.log('Sending payloaded URL: %s' % url) spider.log('Sending payloaded URL: %s' % url)
URLS_SEEN.add(url) URLS_SEEN.add(url)

View File

@@ -10,12 +10,10 @@ import lxml.etree
import lxml.html import lxml.html
from lxml.html import soupparser, fromstring from lxml.html import soupparser, fromstring
import itertools import itertools
#from IPython import embed from IPython import embed
class XSSCharFinder(object): class XSSCharFinder(object):
def __init__(self): def __init__(self):
self.redir_pld = 'JaVAscRIPT:prompt(99)'
self.test_str = '\'"(){}<x>:/'
self.url_param_xss_items = [] self.url_param_xss_items = []
def process_item(self, item, spider): def process_item(self, item, spider):
@@ -24,6 +22,7 @@ class XSSCharFinder(object):
payload = meta['payload'] payload = meta['payload']
delim = meta['delim'] delim = meta['delim']
param = meta['xss_param']
resp_url = response.url resp_url = response.url
body = response.body body = response.body
mismatch = False mismatch = False
@@ -32,12 +31,9 @@ class XSSCharFinder(object):
# Regex: ( ) mean group 1 is within the parens, . means any char, # Regex: ( ) mean group 1 is within the parens, . means any char,
# {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding # {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding
# ? makes the search nongreedy so it stops after hitting its limits # ? makes the search nongreedy so it stops after hitting its limits
#full_match = '%s.*?%s' % (delim, delim)
full_match = '%s.{0,80}?%s' % (delim, delim) full_match = '%s.{0,80}?%s' % (delim, delim)
# matches with semicolon which sometimes cuts results off # matches with semicolon which sometimes cuts results off
sc_full_match = '%s.{0,80}?%s;9' % (delim, delim) sc_full_match = '%s.{0,80}?%s;9' % (delim, delim)
#chars_between_delims = '%s(.*?)%s' % (delim, delim)
#chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)
# Quick sqli check based on DSSS # Quick sqli check based on DSSS
dbms, regex = self.sqli_check(body, meta['orig_body']) dbms, regex = self.sqli_check(body, meta['orig_body'])
@@ -52,11 +48,13 @@ class XSSCharFinder(object):
# XSS detection starts here # XSS detection starts here
re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)]) re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)])
if re_matches: if '/verifypasswd.php/1zqj' in resp_url:
embed()
if len(re_matches) > 0:
scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)]) scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)])
lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim) lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim)
if lxml_injs: if lxml_injs:
err = None err = None
if len(re_matches) != len(lxml_injs): if len(re_matches) != len(lxml_injs):
spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url) spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url)
@@ -64,7 +62,6 @@ class XSSCharFinder(object):
mismatch = True mismatch = True
inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch) inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch)
# If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError # If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError
try: try:
for offset in sorted(inj_data): for offset in sorted(inj_data):
@@ -239,8 +236,7 @@ class XSSCharFinder(object):
tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection
pl_delim = payload[:7] pl_delim = payload[:7]
#full_match = '%s.*?%s' % (pl_delim, pl_delim) full_match = '%s.{0,85}?%s' % (pl_delim, pl_delim)
full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim)
line = re.sub(full_match, 'INJECTION', line) line = re.sub(full_match, 'INJECTION', line)
all_chars_payloads = {} all_chars_payloads = {}
@@ -375,6 +371,9 @@ class XSSCharFinder(object):
# javascript:alert(1) vulns # javascript:alert(1) vulns
# We do this slicing operation because ;9 might be at the end # We do this slicing operation because ;9 might be at the end
# although it's unnecessary for the payload # although it's unnecessary for the payload
# CHECK HERE, PASS DOWN THE ORIG ATTR VAL
#if delim+'subbed' in attr_val:
if attr_val[:len(delim+'subbed')] == delim+'subbed': if attr_val[:len(delim+'subbed')] == delim+'subbed':
if tag == 'a' and attr == 'href': if tag == 'a' and attr == 'href':
# Only need : ( and ) to use javascript:prompt(4) redir payload # Only need : ( and ) to use javascript:prompt(4) redir payload
@@ -559,8 +558,7 @@ class XSSCharFinder(object):
subbed_body = re.sub(full_match, sub, body) subbed_body = re.sub(full_match, sub, body)
doc = self.html_parser(subbed_body, resp_url) doc = self.html_parser(subbed_body, resp_url)
lxml_injs = self.xpath_inj_points(sub, doc) lxml_injs = self.xpath_inj_points(sub, doc)
if lxml_injs: return lxml_injs
return lxml_injs
def html_parser(self, body, resp_url): def html_parser(self, body, resp_url):
try: try:
@@ -657,8 +655,8 @@ class XSSCharFinder(object):
#unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset) #unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset) reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset)
# Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs # Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs
if reflected_chars == '">': #if reflected_chars == '">':
reflected_chars = '' # reflected_chars = ''
all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line] all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line]
return all_inj_data return all_inj_data
@@ -869,20 +867,16 @@ class XSSCharFinder(object):
# Make sure js payloads remove escaped ' and ", also remove ; # Make sure js payloads remove escaped ' and ", also remove ;
# since ; will show up in html encoded entities. If ; is unfiltered # since ; will show up in html encoded entities. If ; is unfiltered
# it will be added after this function # it will be added after this function
#escaped_chars = re.findall(r'\\(.)', chars)
chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '') chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
#If injection is inside script tag, remove the escaped chars #If injection is inside script tag, remove the escaped chars
if tag == 'script' or attr in self.event_attributes(): if tag == 'script' or attr in self.event_attributes():
chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '') chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
else: else:
# If it's not a script then just remove the \'s otherwise they show up in Unfiltered in the item
chars_between_delim = chars_between_delim.replace("\\", "") chars_between_delim = chars_between_delim.replace("\\", "")
# List for just the inj point # Check if a colon needs to be added to the unfiltered chars
#for c in chars_found:
# if c in self.test_str:
# unfiltered_chars.append(c)
# # Check if a colon needs to be added to the unfiltered chars
for scolon_match in scolon_matches: for scolon_match in scolon_matches:
# Confirm the string offset of the match is the same # Confirm the string offset of the match is the same
# Since scolon_match will only exist when ;9 was found # Since scolon_match will only exist when ;9 was found

View File

@@ -6,7 +6,7 @@ from scrapy.http import FormRequest, Request
from scrapy.selector import Selector from scrapy.selector import Selector
from xsscrapy.items import inj_resp from xsscrapy.items import inj_resp
from xsscrapy.loginform import fill_login_form from xsscrapy.loginform import fill_login_form
from urlparse import urlparse, parse_qsl, urljoin from urlparse import urlparse, parse_qsl, urljoin, urlunparse, urlunsplit
from scrapy.http.cookies import CookieJar from scrapy.http.cookies import CookieJar
from cookielib import Cookie from cookielib import Cookie
@@ -32,7 +32,8 @@ class XSSspider(CrawlSpider):
# If you're logging into a site with a logout link, you'll want to # If you're logging into a site with a logout link, you'll want to
# uncomment the rule below and comment the shorter one right after to # uncomment the rule below and comment the shorter one right after to
# prevent yourself from being logged out automatically # prevent yourself from being logged out automatically
rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), ) #rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
rules = (Rule(LinkExtractor(deny='contactus'), callback='parse_resp', follow=True), )
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
# run using: scrapy crawl xss_spider -a url='http://example.com' # run using: scrapy crawl xss_spider -a url='http://example.com'
@@ -41,7 +42,7 @@ class XSSspider(CrawlSpider):
hostname = urlparse(self.start_urls[0]).hostname hostname = urlparse(self.start_urls[0]).hostname
# With subdomains # With subdomains
self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value
self.delim = '1zqjx' self.delim = '1zqj'
# semi colon goes on end because sometimes it cuts stuff off like # semi colon goes on end because sometimes it cuts stuff off like
# gruyere or the second cookie delim # gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:/' self.test_str = '\'"(){}<x>:/'
@@ -135,6 +136,10 @@ class XSSspider(CrawlSpider):
reqs = [] reqs = []
orig_url = response.url orig_url = response.url
body = response.body body = response.body
parsed_url = urlparse(orig_url)
# parse_qsl rather than parse_qs in order to preserve order
# will always return a list
url_params = parse_qsl(parsed_url.query, keep_blank_values=True)
try: try:
# soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it # soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it
@@ -177,11 +182,9 @@ class XSSspider(CrawlSpider):
if form_reqs: if form_reqs:
reqs += form_reqs reqs += form_reqs
# Test URL variables with xss strings payloaded_urls = self.make_URLs(orig_url, parsed_url, url_params)
payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param
print 'URL:', payloaded_urls, url_delim_str
if payloaded_urls: if payloaded_urls:
url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str) url_reqs = self.make_url_reqs(orig_url, payloaded_urls)
if url_reqs: if url_reqs:
reqs += url_reqs reqs += url_reqs
@@ -243,10 +246,8 @@ class XSSspider(CrawlSpider):
''' Payload each form input in each input's own request ''' ''' Payload each form input in each input's own request '''
reqs = [] reqs = []
vals_urls_meths = [] vals_urls_meths = []
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) payload = self.make_payload()
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
for form in forms: for form in forms:
if form.inputs: if form.inputs:
@@ -283,7 +284,7 @@ class XSSspider(CrawlSpider):
'orig_url':orig_url, 'orig_url':orig_url,
'xss_place':'form', 'xss_place':'form',
'POST_to':url, 'POST_to':url,
'delim':delim_str}, 'delim':payload[:len(self.delim)+2]},
dont_filter=True, dont_filter=True,
callback=self.xss_chars_finder) callback=self.xss_chars_finder)
reqs.append(req) reqs.append(req)
@@ -300,9 +301,7 @@ class XSSspider(CrawlSpider):
def make_cookie_reqs(self, url, payload, xss_param): def make_cookie_reqs(self, url, payload, xss_param):
''' Generate payloaded cookie header requests ''' ''' Generate payloaded cookie header requests '''
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) payload = self.make_payload()
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
reqs = [Request(url, reqs = [Request(url,
meta={'xss_place':'header', meta={'xss_place':'header',
@@ -310,7 +309,7 @@ class XSSspider(CrawlSpider):
'xss_param':xss_param, 'xss_param':xss_param,
'orig_url':url, 'orig_url':url,
'payload':payload, 'payload':payload,
'delim':delim_str}, 'delim':payload[:len(self.delim)+2]},
cookies={'userinput':payload}, cookies={'userinput':payload},
callback=self.xss_chars_finder, callback=self.xss_chars_finder,
dont_filter=True)] dont_filter=True)]
@@ -318,33 +317,116 @@ class XSSspider(CrawlSpider):
if len(reqs) > 0: if len(reqs) > 0:
return reqs return reqs
def make_URLs(self, url, payload): def make_URLs(self, orig_url, parsed_url, url_params):
''' Add links with variables in them to the queue again but with XSS testing payloads """
Will return a tuple: (url, injection point, payload) ''' Create the URL parameter payloaded URLs
"""
payloaded_urls = []
# Create 1 URL per payloaded param
new_query_strings = self.get_single_payload_queries(url_params)
if new_query_strings:
# Payload the parameters
for query in new_query_strings:
query_str = query[0]
params = query[1]
payload = query[2]
# scheme #netlo #path #params #query (url params) #fragment
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], parsed_url[2], parsed_url[3], query_str, parsed_url[5]))
payloaded_url = urllib.unquote(payloaded_url)
payloaded_urls.append((payloaded_url, params, payload))
# Payload the URL path
payloaded_url_path = self.payload_url_path(parsed_url)
payloaded_urls.append(payloaded_url_path)
else:
# Payload end of URL if there's no parameters
payloaded_end_of_url = self.payload_end_of_url(orig_url)
payloaded_urls.append(payloaded_end_of_url)
if len(payloaded_urls) > 0:
return payloaded_urls
def payload_url_path(self, parsed_url):
"""
Payload the URL path like:
http://example.com/page1.php?x=1&y=2 -->
http://example.com/page1.php/FUZZ/?x=1&y=2
"""
# Remove / so that it doesn't think it's 2 folders in the fuzz chars
payload = self.make_payload().replace('/', '')
path = parsed_url[2]
if path.endswith('/'):
path = path + payload + '/'
else:
path = path + '/' + payload + '/'
#scheme, netloc, path, params, query (url params), fragment
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], path, parsed_url[3], parsed_url[4], parsed_url[5]))
payloaded_url = urllib.unquote(payloaded_url)
payloaded_data = (payloaded_url, 'URL path', payload)
return payloaded_data
def get_single_payload_queries(self, url_params):
"""
Make a list of lists of tuples where each secondary list has 1 payloaded
param and the rest are original value
"""
new_payloaded_params = []
changed_params = []
modified = False
# Create a list of lists where num of lists = len(params)
for x in xrange(0, len(url_params)):
single_url_params = []
# Make the payload
payload = self.make_payload()
for p in url_params:
param, value = p
# if param has not been modified and we haven't changed a parameter for this loop
if param not in changed_params and modified == False:
# Do we need the original value there? Might be helpful sometimes but think about testing for <frame src="FUZZCHARS">
# versus <frame src="http://something.com/FUZZCHARS"> and the xss payload javascript:alert(1)
new_param_val = (param, payload)
#new_param_val = (param, value+payload)
single_url_params.append(new_param_val)
changed_params.append(param)
modified = param
else:
single_url_params.append(p)
# Add the modified, urlencoded params to the master list
new_payloaded_params.append((urllib.urlencode(single_url_params), modified, payload))
# Reset the changed parameter tracker
modified = False
if len(new_payloaded_params) > 0:
# [(payloaded params, payloaded param, payload), (payloaded params, payloaded param, payload)]
return new_payloaded_params
def make_payload(self):
"""
Make the payload with a unique delim
"""
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
delim_str = self.delim + two_rand_letters delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9' payload = delim_str + self.test_str + delim_str + ';9'
return payload
if '=' in url and '?' in url: def payload_end_of_url(self, url):
# If URL has variables, payload them
payloaded_urls = self.payload_url_vars(url, payload)
else:
# If URL has no variables, tack payload onto end of URL
payloaded_urls = self.payload_end_of_url(url, payload)
return payloaded_urls, delim_str
def payload_end_of_url(self, url, payload):
''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes ''' ''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes '''
payload = self.make_payload()
# Make URL test and delim strings unique # Make URL test and delim strings unique
if url[-1] == '/': if url[-1] == '/':
payloaded_url = url+payload payloaded_url = url+payload
else: else:
payloaded_url = url+'/'+payload payloaded_url = url+'/'+payload
return [(payloaded_url, 'end of url', payload)] return (payloaded_url, 'end of url', payload)
def payload_url_vars(self, url, payload): def payload_url_vars(self, url, payload):
''' Payload the URL variables ''' ''' Payload the URL variables '''
@@ -443,7 +525,7 @@ class XSSspider(CrawlSpider):
return (netloc, protocol, doc_domain, path) return (netloc, protocol, doc_domain, path)
def make_url_reqs(self, orig_url, payloaded_urls, delim_str): def make_url_reqs(self, orig_url, payloaded_urls):
''' Make the URL requests ''' ''' Make the URL requests '''
reqs = [Request(url[0], reqs = [Request(url[0],
@@ -451,7 +533,7 @@ class XSSspider(CrawlSpider):
'xss_param':url[1], 'xss_param':url[1],
'orig_url':orig_url, 'orig_url':orig_url,
'payload':url[2], 'payload':url[2],
'delim':delim_str}, 'delim':url[2][:len(self.delim)+2]},
callback = self.xss_chars_finder) callback = self.xss_chars_finder)
for url in payloaded_urls] # Meta is the payload for url in payloaded_urls] # Meta is the payload
@@ -461,9 +543,7 @@ class XSSspider(CrawlSpider):
def make_header_reqs(self, url, payload, inj_headers): def make_header_reqs(self, url, payload, inj_headers):
''' Generate header requests ''' ''' Generate header requests '''
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase) payload = self.make_payload()
delim_str = self.delim + two_rand_letters
payload = delim_str + payload + delim_str + ';9'
reqs = [Request(url, reqs = [Request(url,
headers={inj_header:payload}, headers={inj_header:payload},
@@ -471,7 +551,7 @@ class XSSspider(CrawlSpider):
'xss_param':inj_header, 'xss_param':inj_header,
'orig_url':url, 'orig_url':url,
'payload':payload, 'payload':payload,
'delim':delim_str, 'delim':payload[:len(self.delim)+2],
'UA':self.get_user_agent(inj_header, payload)}, 'UA':self.get_user_agent(inj_header, payload)},
dont_filter=True, dont_filter=True,
callback = self.xss_chars_finder) callback = self.xss_chars_finder)