added url path xss detection, refactored url param functions
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,5 +1,6 @@
|
||||
*.pyc
|
||||
xsscrapy-vulns*
|
||||
*.txt
|
||||
*.swp
|
||||
*.swo
|
||||
*.png
|
||||
|
||||
@@ -35,16 +35,17 @@ class InjectedDupeFilter(object):
|
||||
def process_request(self, request, spider):
|
||||
|
||||
meta = request.meta
|
||||
if 'xss_place' not in meta or 'delim' not in meta:
|
||||
if 'xss_place' not in meta:
|
||||
return
|
||||
delim = meta['delim']
|
||||
|
||||
# Injected URL dupe handling
|
||||
if meta['xss_place'] == 'url':
|
||||
url = request.url
|
||||
#replace the delim characters with nothing so we only test the URL
|
||||
#with the payload
|
||||
url = request.url.replace(delim, '')
|
||||
if url in URLS_SEEN:
|
||||
no_delim_url = url.replace(delim, '')
|
||||
if no_delim_url in URLS_SEEN:
|
||||
raise IgnoreRequest
|
||||
spider.log('Sending payloaded URL: %s' % url)
|
||||
URLS_SEEN.add(url)
|
||||
|
||||
@@ -10,12 +10,10 @@ import lxml.etree
|
||||
import lxml.html
|
||||
from lxml.html import soupparser, fromstring
|
||||
import itertools
|
||||
#from IPython import embed
|
||||
from IPython import embed
|
||||
|
||||
class XSSCharFinder(object):
|
||||
def __init__(self):
|
||||
self.redir_pld = 'JaVAscRIPT:prompt(99)'
|
||||
self.test_str = '\'"(){}<x>:/'
|
||||
self.url_param_xss_items = []
|
||||
|
||||
def process_item(self, item, spider):
|
||||
@@ -24,6 +22,7 @@ class XSSCharFinder(object):
|
||||
|
||||
payload = meta['payload']
|
||||
delim = meta['delim']
|
||||
param = meta['xss_param']
|
||||
resp_url = response.url
|
||||
body = response.body
|
||||
mismatch = False
|
||||
@@ -32,12 +31,9 @@ class XSSCharFinder(object):
|
||||
# Regex: ( ) mean group 1 is within the parens, . means any char,
|
||||
# {1,80} means match any char 0 to 80 times, 80 chosen because double URL encoding
|
||||
# ? makes the search nongreedy so it stops after hitting its limits
|
||||
#full_match = '%s.*?%s' % (delim, delim)
|
||||
full_match = '%s.{0,80}?%s' % (delim, delim)
|
||||
# matches with semicolon which sometimes cuts results off
|
||||
sc_full_match = '%s.{0,80}?%s;9' % (delim, delim)
|
||||
#chars_between_delims = '%s(.*?)%s' % (delim, delim)
|
||||
#chars_between_delims = '%s(.{0,80}?)%s' % (delim, delim)
|
||||
|
||||
# Quick sqli check based on DSSS
|
||||
dbms, regex = self.sqli_check(body, meta['orig_body'])
|
||||
@@ -52,11 +48,13 @@ class XSSCharFinder(object):
|
||||
|
||||
# XSS detection starts here
|
||||
re_matches = sorted([(m.start(), m.group()) for m in re.finditer(full_match, body)])
|
||||
if re_matches:
|
||||
if '/verifypasswd.php/1zqj' in resp_url:
|
||||
embed()
|
||||
|
||||
if len(re_matches) > 0:
|
||||
scolon_matches = sorted([(m.start(), m.group()) for m in re.finditer(sc_full_match, body)])
|
||||
lxml_injs = self.get_lxml_matches(full_match, body, resp_url, delim)
|
||||
if lxml_injs:
|
||||
|
||||
err = None
|
||||
if len(re_matches) != len(lxml_injs):
|
||||
spider.log('Error: mismatch in injections found by lxml and regex. Higher chance of false positive for %s' % resp_url)
|
||||
@@ -64,7 +62,6 @@ class XSSCharFinder(object):
|
||||
mismatch = True
|
||||
|
||||
inj_data = self.combine_regex_lxml(lxml_injs, re_matches, scolon_matches, body, mismatch)
|
||||
|
||||
# If mismatch is True, then "for offset in sorted(inj_data)" will fail with TypeError
|
||||
try:
|
||||
for offset in sorted(inj_data):
|
||||
@@ -239,8 +236,7 @@ class XSSCharFinder(object):
|
||||
|
||||
tag_index, tag, attr, attr_val, payload, reflected_chars, line = injection
|
||||
pl_delim = payload[:7]
|
||||
#full_match = '%s.*?%s' % (pl_delim, pl_delim)
|
||||
full_match = '%s.{0,80}?%s' % (pl_delim, pl_delim)
|
||||
full_match = '%s.{0,85}?%s' % (pl_delim, pl_delim)
|
||||
line = re.sub(full_match, 'INJECTION', line)
|
||||
|
||||
all_chars_payloads = {}
|
||||
@@ -375,6 +371,9 @@ class XSSCharFinder(object):
|
||||
# javascript:alert(1) vulns
|
||||
# We do this slicing operation because ;9 might be at the end
|
||||
# although it's unnecessary for the payload
|
||||
|
||||
# CHECK HERE, PASS DOWN THE ORIG ATTR VAL
|
||||
#if delim+'subbed' in attr_val:
|
||||
if attr_val[:len(delim+'subbed')] == delim+'subbed':
|
||||
if tag == 'a' and attr == 'href':
|
||||
# Only need : ( and ) to use javascript:prompt(4) redir payload
|
||||
@@ -559,8 +558,7 @@ class XSSCharFinder(object):
|
||||
subbed_body = re.sub(full_match, sub, body)
|
||||
doc = self.html_parser(subbed_body, resp_url)
|
||||
lxml_injs = self.xpath_inj_points(sub, doc)
|
||||
if lxml_injs:
|
||||
return lxml_injs
|
||||
return lxml_injs
|
||||
|
||||
def html_parser(self, body, resp_url):
|
||||
try:
|
||||
@@ -657,8 +655,8 @@ class XSSCharFinder(object):
|
||||
#unfiltered_chars = self.get_unfiltered_chars(payload, pl_delim, scolon_matches, match_offset)
|
||||
reflected_chars = self.get_reflected_chars(tag, attr, payload, pl_delim, scolon_matches, match_offset)
|
||||
# Common false+ shows only "> as unfiltered if script parses the chars between 2 unrelated delim strs
|
||||
if reflected_chars == '">':
|
||||
reflected_chars = ''
|
||||
#if reflected_chars == '">':
|
||||
# reflected_chars = ''
|
||||
all_inj_data[match_offset] = [tag_index, tag, attr, attr_val, payload, reflected_chars, line]
|
||||
|
||||
return all_inj_data
|
||||
@@ -869,20 +867,16 @@ class XSSCharFinder(object):
|
||||
# Make sure js payloads remove escaped ' and ", also remove ;
|
||||
# since ; will show up in html encoded entities. If ; is unfiltered
|
||||
# it will be added after this function
|
||||
#escaped_chars = re.findall(r'\\(.)', chars)
|
||||
chars_between_delim = payload.replace(delim, '')#.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
|
||||
|
||||
#If injection is inside script tag, remove the escaped chars
|
||||
if tag == 'script' or attr in self.event_attributes():
|
||||
chars_between_delim = chars_between_delim.replace("\\'", "").replace('\\"', '').replace(';', '').replace('\\>', '').replace('\\<', '').replace('\\/', '')
|
||||
else:
|
||||
# If it's not a script then just remove the \'s otherwise they show up in Unfiltered in the item
|
||||
chars_between_delim = chars_between_delim.replace("\\", "")
|
||||
|
||||
# List for just the inj point
|
||||
#for c in chars_found:
|
||||
# if c in self.test_str:
|
||||
# unfiltered_chars.append(c)
|
||||
|
||||
# # Check if a colon needs to be added to the unfiltered chars
|
||||
# Check if a colon needs to be added to the unfiltered chars
|
||||
for scolon_match in scolon_matches:
|
||||
# Confirm the string offset of the match is the same
|
||||
# Since scolon_match will only exist when ;9 was found
|
||||
|
||||
@@ -6,7 +6,7 @@ from scrapy.http import FormRequest, Request
|
||||
from scrapy.selector import Selector
|
||||
from xsscrapy.items import inj_resp
|
||||
from xsscrapy.loginform import fill_login_form
|
||||
from urlparse import urlparse, parse_qsl, urljoin
|
||||
from urlparse import urlparse, parse_qsl, urljoin, urlunparse, urlunsplit
|
||||
|
||||
from scrapy.http.cookies import CookieJar
|
||||
from cookielib import Cookie
|
||||
@@ -32,7 +32,8 @@ class XSSspider(CrawlSpider):
|
||||
# If you're logging into a site with a logout link, you'll want to
|
||||
# uncomment the rule below and comment the shorter one right after to
|
||||
# prevent yourself from being logged out automatically
|
||||
rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
|
||||
#rules = (Rule(LinkExtractor(), callback='parse_resp', follow=True), )
|
||||
rules = (Rule(LinkExtractor(deny='contactus'), callback='parse_resp', follow=True), )
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
# run using: scrapy crawl xss_spider -a url='http://example.com'
|
||||
@@ -41,7 +42,7 @@ class XSSspider(CrawlSpider):
|
||||
hostname = urlparse(self.start_urls[0]).hostname
|
||||
# With subdomains
|
||||
self.allowed_domains = [hostname] # adding [] around the value seems to allow it to crawl subdomain of value
|
||||
self.delim = '1zqjx'
|
||||
self.delim = '1zqj'
|
||||
# semi colon goes on end because sometimes it cuts stuff off like
|
||||
# gruyere or the second cookie delim
|
||||
self.test_str = '\'"(){}<x>:/'
|
||||
@@ -135,6 +136,10 @@ class XSSspider(CrawlSpider):
|
||||
reqs = []
|
||||
orig_url = response.url
|
||||
body = response.body
|
||||
parsed_url = urlparse(orig_url)
|
||||
# parse_qsl rather than parse_qs in order to preserve order
|
||||
# will always return a list
|
||||
url_params = parse_qsl(parsed_url.query, keep_blank_values=True)
|
||||
|
||||
try:
|
||||
# soupparser will handle broken HTML better (like identical attributes) but god damn will you pay for it
|
||||
@@ -177,11 +182,9 @@ class XSSspider(CrawlSpider):
|
||||
if form_reqs:
|
||||
reqs += form_reqs
|
||||
|
||||
# Test URL variables with xss strings
|
||||
payloaded_urls, url_delim_str = self.make_URLs(orig_url, payload) # list of tuples where item[0]=url, and item[1]=changed param
|
||||
print 'URL:', payloaded_urls, url_delim_str
|
||||
payloaded_urls = self.make_URLs(orig_url, parsed_url, url_params)
|
||||
if payloaded_urls:
|
||||
url_reqs = self.make_url_reqs(orig_url, payloaded_urls, url_delim_str)
|
||||
url_reqs = self.make_url_reqs(orig_url, payloaded_urls)
|
||||
if url_reqs:
|
||||
reqs += url_reqs
|
||||
|
||||
@@ -243,10 +246,8 @@ class XSSspider(CrawlSpider):
|
||||
''' Payload each form input in each input's own request '''
|
||||
reqs = []
|
||||
vals_urls_meths = []
|
||||
|
||||
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
|
||||
delim_str = self.delim + two_rand_letters
|
||||
payload = delim_str + payload + delim_str + ';9'
|
||||
|
||||
payload = self.make_payload()
|
||||
|
||||
for form in forms:
|
||||
if form.inputs:
|
||||
@@ -283,7 +284,7 @@ class XSSspider(CrawlSpider):
|
||||
'orig_url':orig_url,
|
||||
'xss_place':'form',
|
||||
'POST_to':url,
|
||||
'delim':delim_str},
|
||||
'delim':payload[:len(self.delim)+2]},
|
||||
dont_filter=True,
|
||||
callback=self.xss_chars_finder)
|
||||
reqs.append(req)
|
||||
@@ -300,9 +301,7 @@ class XSSspider(CrawlSpider):
|
||||
def make_cookie_reqs(self, url, payload, xss_param):
|
||||
''' Generate payloaded cookie header requests '''
|
||||
|
||||
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
|
||||
delim_str = self.delim + two_rand_letters
|
||||
payload = delim_str + payload + delim_str + ';9'
|
||||
payload = self.make_payload()
|
||||
|
||||
reqs = [Request(url,
|
||||
meta={'xss_place':'header',
|
||||
@@ -310,7 +309,7 @@ class XSSspider(CrawlSpider):
|
||||
'xss_param':xss_param,
|
||||
'orig_url':url,
|
||||
'payload':payload,
|
||||
'delim':delim_str},
|
||||
'delim':payload[:len(self.delim)+2]},
|
||||
cookies={'userinput':payload},
|
||||
callback=self.xss_chars_finder,
|
||||
dont_filter=True)]
|
||||
@@ -318,33 +317,116 @@ class XSSspider(CrawlSpider):
|
||||
if len(reqs) > 0:
|
||||
return reqs
|
||||
|
||||
def make_URLs(self, url, payload):
|
||||
''' Add links with variables in them to the queue again but with XSS testing payloads
|
||||
Will return a tuple: (url, injection point, payload) '''
|
||||
def make_URLs(self, orig_url, parsed_url, url_params):
|
||||
"""
|
||||
Create the URL parameter payloaded URLs
|
||||
"""
|
||||
payloaded_urls = []
|
||||
|
||||
# Create 1 URL per payloaded param
|
||||
new_query_strings = self.get_single_payload_queries(url_params)
|
||||
if new_query_strings:
|
||||
# Payload the parameters
|
||||
for query in new_query_strings:
|
||||
|
||||
query_str = query[0]
|
||||
params = query[1]
|
||||
payload = query[2]
|
||||
# scheme #netlo #path #params #query (url params) #fragment
|
||||
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], parsed_url[2], parsed_url[3], query_str, parsed_url[5]))
|
||||
payloaded_url = urllib.unquote(payloaded_url)
|
||||
payloaded_urls.append((payloaded_url, params, payload))
|
||||
|
||||
# Payload the URL path
|
||||
payloaded_url_path = self.payload_url_path(parsed_url)
|
||||
payloaded_urls.append(payloaded_url_path)
|
||||
else:
|
||||
# Payload end of URL if there's no parameters
|
||||
payloaded_end_of_url = self.payload_end_of_url(orig_url)
|
||||
payloaded_urls.append(payloaded_end_of_url)
|
||||
|
||||
if len(payloaded_urls) > 0:
|
||||
return payloaded_urls
|
||||
|
||||
def payload_url_path(self, parsed_url):
|
||||
"""
|
||||
Payload the URL path like:
|
||||
http://example.com/page1.php?x=1&y=2 -->
|
||||
http://example.com/page1.php/FUZZ/?x=1&y=2
|
||||
"""
|
||||
# Remove / so that it doesn't think it's 2 folders in the fuzz chars
|
||||
payload = self.make_payload().replace('/', '')
|
||||
path = parsed_url[2]
|
||||
if path.endswith('/'):
|
||||
path = path + payload + '/'
|
||||
else:
|
||||
path = path + '/' + payload + '/'
|
||||
#scheme, netloc, path, params, query (url params), fragment
|
||||
payloaded_url = urlunparse((parsed_url[0], parsed_url[1], path, parsed_url[3], parsed_url[4], parsed_url[5]))
|
||||
payloaded_url = urllib.unquote(payloaded_url)
|
||||
payloaded_data = (payloaded_url, 'URL path', payload)
|
||||
|
||||
return payloaded_data
|
||||
|
||||
def get_single_payload_queries(self, url_params):
|
||||
"""
|
||||
Make a list of lists of tuples where each secondary list has 1 payloaded
|
||||
param and the rest are original value
|
||||
"""
|
||||
new_payloaded_params = []
|
||||
changed_params = []
|
||||
modified = False
|
||||
# Create a list of lists where num of lists = len(params)
|
||||
for x in xrange(0, len(url_params)):
|
||||
single_url_params = []
|
||||
|
||||
# Make the payload
|
||||
payload = self.make_payload()
|
||||
|
||||
for p in url_params:
|
||||
param, value = p
|
||||
|
||||
# if param has not been modified and we haven't changed a parameter for this loop
|
||||
if param not in changed_params and modified == False:
|
||||
# Do we need the original value there? Might be helpful sometimes but think about testing for <frame src="FUZZCHARS">
|
||||
# versus <frame src="http://something.com/FUZZCHARS"> and the xss payload javascript:alert(1)
|
||||
new_param_val = (param, payload)
|
||||
#new_param_val = (param, value+payload)
|
||||
single_url_params.append(new_param_val)
|
||||
changed_params.append(param)
|
||||
modified = param
|
||||
else:
|
||||
single_url_params.append(p)
|
||||
|
||||
# Add the modified, urlencoded params to the master list
|
||||
new_payloaded_params.append((urllib.urlencode(single_url_params), modified, payload))
|
||||
# Reset the changed parameter tracker
|
||||
modified = False
|
||||
|
||||
if len(new_payloaded_params) > 0:
|
||||
# [(payloaded params, payloaded param, payload), (payloaded params, payloaded param, payload)]
|
||||
return new_payloaded_params
|
||||
|
||||
def make_payload(self):
|
||||
"""
|
||||
Make the payload with a unique delim
|
||||
"""
|
||||
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
|
||||
delim_str = self.delim + two_rand_letters
|
||||
payload = delim_str + payload + delim_str + ';9'
|
||||
payload = delim_str + self.test_str + delim_str + ';9'
|
||||
return payload
|
||||
|
||||
if '=' in url and '?' in url:
|
||||
# If URL has variables, payload them
|
||||
payloaded_urls = self.payload_url_vars(url, payload)
|
||||
else:
|
||||
# If URL has no variables, tack payload onto end of URL
|
||||
payloaded_urls = self.payload_end_of_url(url, payload)
|
||||
|
||||
return payloaded_urls, delim_str
|
||||
|
||||
def payload_end_of_url(self, url, payload):
|
||||
def payload_end_of_url(self, url):
|
||||
''' Payload the end of the URL to catch some DOM(?) and other reflected XSSes '''
|
||||
|
||||
payload = self.make_payload()
|
||||
# Make URL test and delim strings unique
|
||||
if url[-1] == '/':
|
||||
payloaded_url = url+payload
|
||||
else:
|
||||
payloaded_url = url+'/'+payload
|
||||
|
||||
return [(payloaded_url, 'end of url', payload)]
|
||||
return (payloaded_url, 'end of url', payload)
|
||||
|
||||
def payload_url_vars(self, url, payload):
|
||||
''' Payload the URL variables '''
|
||||
@@ -443,7 +525,7 @@ class XSSspider(CrawlSpider):
|
||||
|
||||
return (netloc, protocol, doc_domain, path)
|
||||
|
||||
def make_url_reqs(self, orig_url, payloaded_urls, delim_str):
|
||||
def make_url_reqs(self, orig_url, payloaded_urls):
|
||||
''' Make the URL requests '''
|
||||
|
||||
reqs = [Request(url[0],
|
||||
@@ -451,7 +533,7 @@ class XSSspider(CrawlSpider):
|
||||
'xss_param':url[1],
|
||||
'orig_url':orig_url,
|
||||
'payload':url[2],
|
||||
'delim':delim_str},
|
||||
'delim':url[2][:len(self.delim)+2]},
|
||||
callback = self.xss_chars_finder)
|
||||
for url in payloaded_urls] # Meta is the payload
|
||||
|
||||
@@ -461,9 +543,7 @@ class XSSspider(CrawlSpider):
|
||||
def make_header_reqs(self, url, payload, inj_headers):
|
||||
''' Generate header requests '''
|
||||
|
||||
two_rand_letters = random.choice(string.lowercase) + random.choice(string.lowercase)
|
||||
delim_str = self.delim + two_rand_letters
|
||||
payload = delim_str + payload + delim_str + ';9'
|
||||
payload = self.make_payload()
|
||||
|
||||
reqs = [Request(url,
|
||||
headers={inj_header:payload},
|
||||
@@ -471,7 +551,7 @@ class XSSspider(CrawlSpider):
|
||||
'xss_param':inj_header,
|
||||
'orig_url':url,
|
||||
'payload':payload,
|
||||
'delim':delim_str,
|
||||
'delim':payload[:len(self.delim)+2],
|
||||
'UA':self.get_user_agent(inj_header, payload)},
|
||||
dont_filter=True,
|
||||
callback = self.xss_chars_finder)
|
||||
|
||||
Reference in New Issue
Block a user