better heuristics and refactoring
This commit is contained in:
4
.gitignore
vendored
Normal file
4
.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
arjun.egg-info
|
||||||
|
__pycache__
|
||||||
|
build
|
||||||
|
dist
|
||||||
@@ -19,22 +19,22 @@ arjun_dir = compatible_path(mem.__file__.replace('/core/config.py', ''))
|
|||||||
|
|
||||||
parser = argparse.ArgumentParser() # defines the parser
|
parser = argparse.ArgumentParser() # defines the parser
|
||||||
# Arguments that can be supplied
|
# Arguments that can be supplied
|
||||||
parser.add_argument('-u', help='target url', dest='url')
|
parser.add_argument('-u', help='Target URL', dest='url')
|
||||||
parser.add_argument('-o', '-oJ', help='path for json output file', dest='json_file')
|
parser.add_argument('-o', '-oJ', help='Path for json output file.', dest='json_file')
|
||||||
parser.add_argument('-oT', help='path for text output file', dest='text_file')
|
parser.add_argument('-oT', help='Path for text output file.', dest='text_file')
|
||||||
parser.add_argument('-oB', help='port for burp suite proxy', dest='burp_port')
|
parser.add_argument('-oB', help='Port for output to Burp Suite Proxy. Default port is 8080.', dest='burp_port', nargs='?', const=8080)
|
||||||
parser.add_argument('-d', help='delay between requests', dest='delay', type=float, default=0)
|
parser.add_argument('-d', help='Delay between requests in seconds. (default: 0)', dest='delay', type=float, default=0)
|
||||||
parser.add_argument('-t', help='number of threads', dest='threads', type=int, default=2)
|
parser.add_argument('-t', help='Number of concurrent threads. (default: 2)', dest='threads', type=int, default=2)
|
||||||
parser.add_argument('-w', help='wordlist path', dest='wordlist', default=arjun_dir+'/db/default.txt')
|
parser.add_argument('-w', help='Wordlist file path. (default: {arjundir}/db/default.txt)', dest='wordlist', default=arjun_dir+'/db/default.txt')
|
||||||
parser.add_argument('-m', help='request method: GET/POST/XML/JSON', dest='method', default='GET')
|
parser.add_argument('-m', help='Request method to use: GET/POST/XML/JSON. (default: GET)', dest='method', default='GET')
|
||||||
parser.add_argument('-i', help='import targets from file', dest='import_file', nargs='?', const=True)
|
parser.add_argument('-i', help='Import target URLs from file.', dest='import_file', nargs='?', const=True)
|
||||||
parser.add_argument('-T', help='http request timeout', dest='timeout', type=float, default=15)
|
parser.add_argument('-T', help='HTTP request timeout in seconds. (default: 15)', dest='timeout', type=float, default=15)
|
||||||
parser.add_argument('-c', help='chunk size/number of parameters to be sent at once', type=int, dest='chunks', default=500)
|
parser.add_argument('-c', help='Chunk size. The number of parameters to be sent at once', type=int, dest='chunks', default=500)
|
||||||
parser.add_argument('-q', help='quiet mode, no output', dest='quiet', action='store_true')
|
parser.add_argument('-q', help='Quiet mode. No output.', dest='quiet', action='store_true')
|
||||||
parser.add_argument('--headers', help='add headers', dest='headers', nargs='?', const=True)
|
parser.add_argument('--headers', help='Add headers. Separate multiple headers with a new line.', dest='headers', nargs='?', const=True)
|
||||||
parser.add_argument('--passive', help='collect parameter names from passive sources', dest='passive')
|
parser.add_argument('--passive', help='Collect parameter names from passive sources like wayback, commoncrawl and otx.', dest='passive', nargs='?', const='-')
|
||||||
parser.add_argument('--stable', help='prefer stability over speed', dest='stable', action='store_true')
|
parser.add_argument('--stable', help='Prefer stability over speed.', dest='stable', action='store_true')
|
||||||
parser.add_argument('--include', help='include this data in every request', dest='include', default={})
|
parser.add_argument('--include', help='Include this data in every request.', dest='include', default={})
|
||||||
args = parser.parse_args() # arguments to be parsed
|
args = parser.parse_args() # arguments to be parsed
|
||||||
|
|
||||||
if args.quiet:
|
if args.quiet:
|
||||||
|
|||||||
@@ -158,11 +158,12 @@ def reader(path, mode='string'):
|
|||||||
return ''.join([line for line in file])
|
return ''.join([line for line in file])
|
||||||
|
|
||||||
|
|
||||||
|
re_extract_js = re.compile(r'(?si)<script[^>]*>([^<].+?)</script')
|
||||||
def extract_js(response):
|
def extract_js(response):
|
||||||
"""
|
"""
|
||||||
extracts javascript from a given string
|
extracts javascript from a given string
|
||||||
"""
|
"""
|
||||||
return re.findall(r'(?s)<script[^>]+>([^<].+?)</script', response.lower(), re.I)
|
return re_extract_js.findall(response)
|
||||||
|
|
||||||
|
|
||||||
def parse_headers(string):
|
def parse_headers(string):
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
def commoncrawl(host, page=0):
|
def commoncrawl(host, page=0):
|
||||||
these_params = set()
|
these_params = set()
|
||||||
response = requests.get('http://index.commoncrawl.org/CC-MAIN-2020-29-index?url=*.%s&fl=url&page=%s&limit=10000' % (host, page)).text
|
response = requests.get('http://index.commoncrawl.org/CC-MAIN-2020-29-index?url=*.%s&fl=url&page=%s&limit=10000' % (host, page), verify=False).text
|
||||||
if response.startswith('<!DOCTYPE html>'):
|
if response.startswith('<!DOCTYPE html>'):
|
||||||
return ([], False, 'commoncrawl')
|
return ([], False, 'commoncrawl')
|
||||||
urls = response.split('\n')
|
urls = response.split('\n')
|
||||||
|
|||||||
@@ -2,25 +2,43 @@ import re
|
|||||||
|
|
||||||
from arjun.core.utils import extract_js
|
from arjun.core.utils import extract_js
|
||||||
|
|
||||||
def is_not_junk(string):
|
re_not_junk = re.compile(r'^[A-Za-z0-9_]+$')
|
||||||
return re.match(r'^[A-Za-z0-9_]+$', string)
|
def is_not_junk(param):
|
||||||
|
return (re_not_junk.match(param) is not None)
|
||||||
def insert_words(words, wordlist, found):
|
|
||||||
if words:
|
|
||||||
for var in words:
|
|
||||||
if var not in found and is_not_junk(var):
|
|
||||||
found.append(var)
|
|
||||||
if var in wordlist:
|
|
||||||
wordlist.remove(var)
|
|
||||||
wordlist.insert(0, var)
|
|
||||||
|
|
||||||
|
# TODO: for map keys, javascript tolerates { param: "value" }
|
||||||
|
re_input_names = re.compile(r'''(?i)<input.+?name=["']?([^"'\s>]+)''')
|
||||||
|
re_input_ids = re.compile(r'''(?i)<input.+?id=["']?([^"'\s>]+)''')
|
||||||
|
re_empty_vars = re.compile(r'''([^\s!=<>]+)\s*=\s*(?:['"`]{2}|true|false|null)''')
|
||||||
|
re_map_keys = re.compile(r'''([^'"]+)['"]\s*:\s*['"`]''')
|
||||||
def heuristic(response, wordlist):
|
def heuristic(response, wordlist):
|
||||||
found = []
|
potential_params = []
|
||||||
inputs = re.findall(r'(?i)<input.+?name=["\']?([^"\'\s>]+)', response)
|
|
||||||
insert_words(inputs, wordlist, found)
|
# Parse Inputs
|
||||||
|
input_names = re_input_names.findall(response)
|
||||||
|
potential_params += input_names
|
||||||
|
|
||||||
|
input_ids = re_input_ids.findall(response)
|
||||||
|
potential_params += input_ids
|
||||||
|
|
||||||
|
# Parse Scripts
|
||||||
for script in extract_js(response):
|
for script in extract_js(response):
|
||||||
empty_vars = re.findall(r'([^\s!=<>]+)\s*=\s*[\'"`][\'"`]', script)
|
empty_vars = re_empty_vars.findall(script)
|
||||||
insert_words(empty_vars, wordlist, found)
|
potential_params += empty_vars
|
||||||
map_keys = re.findall(r'([^\'"]+)[\'"]:\s?[\'"]', script)
|
|
||||||
insert_words(map_keys, wordlist, found)
|
map_keys = re_map_keys.findall(script)
|
||||||
return found
|
potential_params += map_keys
|
||||||
|
|
||||||
|
if len(potential_params) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
found = set()
|
||||||
|
for word in potential_params:
|
||||||
|
if is_not_junk(word) and (word not in found):
|
||||||
|
found.add(word)
|
||||||
|
|
||||||
|
if word in wordlist:
|
||||||
|
wordlist.remove(word)
|
||||||
|
wordlist.insert(0, word)
|
||||||
|
|
||||||
|
return list(found)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
def otx(host, page):
|
def otx(host, page):
|
||||||
these_params = set()
|
these_params = set()
|
||||||
data = requests.get('https://otx.alienvault.com/api/v1/indicators/hostname/%s/url_list?limit=50&page=%d' % (host, page)).json()
|
data = requests.get('https://otx.alienvault.com/api/v1/indicators/hostname/%s/url_list?limit=50&page=%d' % (host, page), verify=False).json()
|
||||||
if 'url_list' not in data:
|
if 'url_list' not in data:
|
||||||
return (these_params, False, 'otx')
|
return (these_params, False, 'otx')
|
||||||
for obj in data['url_list']:
|
for obj in data['url_list']:
|
||||||
|
|||||||
@@ -19,7 +19,8 @@ def wayback(host, page):
|
|||||||
response = requests.get(
|
response = requests.get(
|
||||||
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
|
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
|
||||||
params=payload,
|
params=payload,
|
||||||
headers=headers
|
headers=headers,
|
||||||
|
verify=False
|
||||||
).text
|
).text
|
||||||
if not response:
|
if not response:
|
||||||
return (these_params, False, 'wayback')
|
return (these_params, False, 'wayback')
|
||||||
|
|||||||
Reference in New Issue
Block a user