better heuristics and refactoring
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
arjun.egg-info
|
||||
__pycache__
|
||||
build
|
||||
dist
|
||||
@@ -19,22 +19,22 @@ arjun_dir = compatible_path(mem.__file__.replace('/core/config.py', ''))
|
||||
|
||||
parser = argparse.ArgumentParser() # defines the parser
|
||||
# Arguments that can be supplied
|
||||
parser.add_argument('-u', help='target url', dest='url')
|
||||
parser.add_argument('-o', '-oJ', help='path for json output file', dest='json_file')
|
||||
parser.add_argument('-oT', help='path for text output file', dest='text_file')
|
||||
parser.add_argument('-oB', help='port for burp suite proxy', dest='burp_port')
|
||||
parser.add_argument('-d', help='delay between requests', dest='delay', type=float, default=0)
|
||||
parser.add_argument('-t', help='number of threads', dest='threads', type=int, default=2)
|
||||
parser.add_argument('-w', help='wordlist path', dest='wordlist', default=arjun_dir+'/db/default.txt')
|
||||
parser.add_argument('-m', help='request method: GET/POST/XML/JSON', dest='method', default='GET')
|
||||
parser.add_argument('-i', help='import targets from file', dest='import_file', nargs='?', const=True)
|
||||
parser.add_argument('-T', help='http request timeout', dest='timeout', type=float, default=15)
|
||||
parser.add_argument('-c', help='chunk size/number of parameters to be sent at once', type=int, dest='chunks', default=500)
|
||||
parser.add_argument('-q', help='quiet mode, no output', dest='quiet', action='store_true')
|
||||
parser.add_argument('--headers', help='add headers', dest='headers', nargs='?', const=True)
|
||||
parser.add_argument('--passive', help='collect parameter names from passive sources', dest='passive')
|
||||
parser.add_argument('--stable', help='prefer stability over speed', dest='stable', action='store_true')
|
||||
parser.add_argument('--include', help='include this data in every request', dest='include', default={})
|
||||
parser.add_argument('-u', help='Target URL', dest='url')
|
||||
parser.add_argument('-o', '-oJ', help='Path for json output file.', dest='json_file')
|
||||
parser.add_argument('-oT', help='Path for text output file.', dest='text_file')
|
||||
parser.add_argument('-oB', help='Port for output to Burp Suite Proxy. Default port is 8080.', dest='burp_port', nargs='?', const=8080)
|
||||
parser.add_argument('-d', help='Delay between requests in seconds. (default: 0)', dest='delay', type=float, default=0)
|
||||
parser.add_argument('-t', help='Number of concurrent threads. (default: 2)', dest='threads', type=int, default=2)
|
||||
parser.add_argument('-w', help='Wordlist file path. (default: {arjundir}/db/default.txt)', dest='wordlist', default=arjun_dir+'/db/default.txt')
|
||||
parser.add_argument('-m', help='Request method to use: GET/POST/XML/JSON. (default: GET)', dest='method', default='GET')
|
||||
parser.add_argument('-i', help='Import target URLs from file.', dest='import_file', nargs='?', const=True)
|
||||
parser.add_argument('-T', help='HTTP request timeout in seconds. (default: 15)', dest='timeout', type=float, default=15)
|
||||
parser.add_argument('-c', help='Chunk size. The number of parameters to be sent at once', type=int, dest='chunks', default=500)
|
||||
parser.add_argument('-q', help='Quiet mode. No output.', dest='quiet', action='store_true')
|
||||
parser.add_argument('--headers', help='Add headers. Separate multiple headers with a new line.', dest='headers', nargs='?', const=True)
|
||||
parser.add_argument('--passive', help='Collect parameter names from passive sources like wayback, commoncrawl and otx.', dest='passive', nargs='?', const='-')
|
||||
parser.add_argument('--stable', help='Prefer stability over speed.', dest='stable', action='store_true')
|
||||
parser.add_argument('--include', help='Include this data in every request.', dest='include', default={})
|
||||
args = parser.parse_args() # arguments to be parsed
|
||||
|
||||
if args.quiet:
|
||||
|
||||
@@ -158,11 +158,12 @@ def reader(path, mode='string'):
|
||||
return ''.join([line for line in file])
|
||||
|
||||
|
||||
re_extract_js = re.compile(r'(?si)<script[^>]*>([^<].+?)</script')
|
||||
def extract_js(response):
|
||||
"""
|
||||
extracts javascript from a given string
|
||||
"""
|
||||
return re.findall(r'(?s)<script[^>]+>([^<].+?)</script', response.lower(), re.I)
|
||||
return re_extract_js.findall(response)
|
||||
|
||||
|
||||
def parse_headers(string):
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
def commoncrawl(host, page=0):
|
||||
these_params = set()
|
||||
response = requests.get('http://index.commoncrawl.org/CC-MAIN-2020-29-index?url=*.%s&fl=url&page=%s&limit=10000' % (host, page)).text
|
||||
response = requests.get('http://index.commoncrawl.org/CC-MAIN-2020-29-index?url=*.%s&fl=url&page=%s&limit=10000' % (host, page), verify=False).text
|
||||
if response.startswith('<!DOCTYPE html>'):
|
||||
return ([], False, 'commoncrawl')
|
||||
urls = response.split('\n')
|
||||
|
||||
@@ -2,25 +2,43 @@ import re
|
||||
|
||||
from arjun.core.utils import extract_js
|
||||
|
||||
def is_not_junk(string):
|
||||
return re.match(r'^[A-Za-z0-9_]+$', string)
|
||||
|
||||
def insert_words(words, wordlist, found):
|
||||
if words:
|
||||
for var in words:
|
||||
if var not in found and is_not_junk(var):
|
||||
found.append(var)
|
||||
if var in wordlist:
|
||||
wordlist.remove(var)
|
||||
wordlist.insert(0, var)
|
||||
re_not_junk = re.compile(r'^[A-Za-z0-9_]+$')
|
||||
def is_not_junk(param):
|
||||
return (re_not_junk.match(param) is not None)
|
||||
|
||||
# TODO: for map keys, javascript tolerates { param: "value" }
|
||||
re_input_names = re.compile(r'''(?i)<input.+?name=["']?([^"'\s>]+)''')
|
||||
re_input_ids = re.compile(r'''(?i)<input.+?id=["']?([^"'\s>]+)''')
|
||||
re_empty_vars = re.compile(r'''([^\s!=<>]+)\s*=\s*(?:['"`]{2}|true|false|null)''')
|
||||
re_map_keys = re.compile(r'''([^'"]+)['"]\s*:\s*['"`]''')
|
||||
def heuristic(response, wordlist):
|
||||
found = []
|
||||
inputs = re.findall(r'(?i)<input.+?name=["\']?([^"\'\s>]+)', response)
|
||||
insert_words(inputs, wordlist, found)
|
||||
potential_params = []
|
||||
|
||||
# Parse Inputs
|
||||
input_names = re_input_names.findall(response)
|
||||
potential_params += input_names
|
||||
|
||||
input_ids = re_input_ids.findall(response)
|
||||
potential_params += input_ids
|
||||
|
||||
# Parse Scripts
|
||||
for script in extract_js(response):
|
||||
empty_vars = re.findall(r'([^\s!=<>]+)\s*=\s*[\'"`][\'"`]', script)
|
||||
insert_words(empty_vars, wordlist, found)
|
||||
map_keys = re.findall(r'([^\'"]+)[\'"]:\s?[\'"]', script)
|
||||
insert_words(map_keys, wordlist, found)
|
||||
return found
|
||||
empty_vars = re_empty_vars.findall(script)
|
||||
potential_params += empty_vars
|
||||
|
||||
map_keys = re_map_keys.findall(script)
|
||||
potential_params += map_keys
|
||||
|
||||
if len(potential_params) == 0:
|
||||
return []
|
||||
|
||||
found = set()
|
||||
for word in potential_params:
|
||||
if is_not_junk(word) and (word not in found):
|
||||
found.add(word)
|
||||
|
||||
if word in wordlist:
|
||||
wordlist.remove(word)
|
||||
wordlist.insert(0, word)
|
||||
|
||||
return list(found)
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
def otx(host, page):
|
||||
these_params = set()
|
||||
data = requests.get('https://otx.alienvault.com/api/v1/indicators/hostname/%s/url_list?limit=50&page=%d' % (host, page)).json()
|
||||
data = requests.get('https://otx.alienvault.com/api/v1/indicators/hostname/%s/url_list?limit=50&page=%d' % (host, page), verify=False).json()
|
||||
if 'url_list' not in data:
|
||||
return (these_params, False, 'otx')
|
||||
for obj in data['url_list']:
|
||||
|
||||
@@ -19,7 +19,8 @@ def wayback(host, page):
|
||||
response = requests.get(
|
||||
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
|
||||
params=payload,
|
||||
headers=headers
|
||||
headers=headers,
|
||||
verify=False
|
||||
).text
|
||||
if not response:
|
||||
return (these_params, False, 'wayback')
|
||||
|
||||
Reference in New Issue
Block a user