From 0f5be57c3f82e6004f1224f9c797f2fca838493c Mon Sep 17 00:00:00 2001
From: s0md3v <s0md3v@gmail.com>
Date: Mon, 4 Apr 2022 14:47:27 +0530
Subject: [PATCH] lots of fixes

---
 arjun/__main__.py                    | 16 ++++++----
 arjun/core/anomaly.py                | 16 +++++++---
 arjun/core/bruter.py                 |  1 +
 arjun/core/error_handler.py          | 19 ++++++------
 arjun/core/requester.py              | 46 +++++++++++++++++++++++-----
 arjun/core/utils.py                  | 16 +++++++---
 arjun/db/{params.txt => large.txt}   |  0
 arjun/db/{default.txt => medium.txt} |  0
 arjun/plugins/heuristic.py           |  4 +--
 9 files changed, 84 insertions(+), 34 deletions(-)
 rename arjun/db/{params.txt => large.txt} (100%)
 rename arjun/db/{default.txt => medium.txt} (100%)

diff --git a/arjun/__main__.py b/arjun/__main__.py
index c93c65d..154efe4 100644
--- a/arjun/__main__.py
+++ b/arjun/__main__.py
@@ -25,16 +25,17 @@ parser.add_argument('-oT', help='Path for text output file.', dest='text_file')
 parser.add_argument('-oB', help='Port for output to Burp Suite Proxy. Default port is 8080.', dest='burp_port', nargs='?', const=8080)
 parser.add_argument('-d', help='Delay between requests in seconds. (default: 0)', dest='delay', type=float, default=0)
 parser.add_argument('-t', help='Number of concurrent threads. (default: 2)', dest='threads', type=int, default=2)
-parser.add_argument('-w', help='Wordlist file path. (default: {arjundir}/db/default.txt)', dest='wordlist', default=arjun_dir+'/db/default.txt')
+parser.add_argument('-w', help='Wordlist file path. (default: {arjundir}/db/large.txt)', dest='wordlist', default=arjun_dir+'/db/large.txt')
 parser.add_argument('-m', help='Request method to use: GET/POST/XML/JSON. (default: GET)', dest='method', default='GET')
 parser.add_argument('-i', help='Import target URLs from file.', dest='import_file', nargs='?', const=True)
 parser.add_argument('-T', help='HTTP request timeout in seconds. (default: 15)', dest='timeout', type=float, default=15)
-parser.add_argument('-c', help='Chunk size. The number of parameters to be sent at once', type=int, dest='chunks', default=500)
+parser.add_argument('-c', help='Chunk size. The number of parameters to be sent at once', type=int, dest='chunks', default=300)
 parser.add_argument('-q', help='Quiet mode. No output.', dest='quiet', action='store_true')
 parser.add_argument('--headers', help='Add headers. Separate multiple headers with a new line.', dest='headers', nargs='?', const=True)
 parser.add_argument('--passive', help='Collect parameter names from passive sources like wayback, commoncrawl and otx.', dest='passive', nargs='?', const='-')
 parser.add_argument('--stable', help='Prefer stability over speed.', dest='stable', action='store_true')
 parser.add_argument('--include', help='Include this data in every request.', dest='include', default={})
+parser.add_argument('--disable-redirects', help='Include this data in every request.', dest='disable_redirects', action='store_true')
 args = parser.parse_args() # arguments to be parsed
 
 if args.quiet:
@@ -58,6 +59,8 @@ mem.var['method'] = mem.var['method'].upper()
 
 if mem.var['stable'] or mem.var['delay']:
     mem.var['threads'] = 1
+if mem.var['wordlist'] in ('large', 'medium', 'small'):
+    mem.var['wordlist'] = f'{arjun_dir}/db/{mem.var["wordlist"]}.txt'
 
 try:
     wordlist_file = arjun_dir + '/db/small.txt' if args.wordlist == 'small' else args.wordlist
@@ -93,8 +96,9 @@ def narrower(request, factors, param_groups):
     for i, result in enumerate(as_completed(futures)):
         if result.result():
             anomalous_params.extend(slicer(result.result()))
-        if not mem.var['kill']:
-            print('%s Processing chunks: %i/%-6i' % (info, i + 1, len(param_groups)), end='\r')
+        if mem.var['kill']:
+            return anomalous_params
+        print('%s Processing chunks: %i/%-6i' % (info, i + 1, len(param_groups)), end='\r')
     return anomalous_params
 
 
@@ -108,8 +112,8 @@ def initialize(request, wordlist):
         print('%s %s is not a valid URL' % (bad, url))
         return 'skipped'
     print('%s Probing the target for stability' % run)
-    stable = stable_request(url, request['headers'])
-    if not stable:
+    request['url'] = stable_request(url, request['headers'])
+    if not request['url']:
         return 'skipped'
     else:
         fuzz = random_str(6)
diff --git a/arjun/core/anomaly.py b/arjun/core/anomaly.py
index 52b224c..3fc7ca6 100644
--- a/arjun/core/anomaly.py
+++ b/arjun/core/anomaly.py
@@ -1,6 +1,7 @@
 import re
 import requests
 
+from urllib.parse import urlparse
 from arjun.core.utils import diff_map, remove_tags
 
 
@@ -26,8 +27,9 @@ def define(response_1, response_2, param, value, wordlist):
             factors['same_code'] = response_1.status_code
         if response_1.headers.keys() == response_2.headers.keys():
             factors['same_headers'] = list(response_1.headers.keys())
-        if response_1.url == response_2.url:
-            factors['same_redirect'] = response_1.url
+            factors['same_headers'].sort()
+        if response_1.headers.get('Location', '') == response_2.headers.get('Location', ''):
+            factors['same_redirect'] = urlparse(response_1.headers.get('Location', '')).path
         if response_1.text == response_2.text:
             factors['same_body'] = response_1.text
         elif response_1.text.count('\n') == response_2.text.count('\n'):
@@ -48,11 +50,13 @@ def compare(response, factors, params):
     detects anomalies by comparing a HTTP response against a rule list
     returns string, list (anomaly, list of parameters that caused it)
     """
+    these_headers = list(response.headers.keys())
+    these_headers.sort()
     if factors['same_code'] and response.status_code != factors['same_code']:
         return ('http code', params)
-    if factors['same_headers'] and list(response.headers.keys()) != factors['same_headers']:
+    if factors['same_headers'] and these_headers != factors['same_headers']:
         return ('http headers', params)
-    if factors['same_redirect'] and response.url != factors['same_redirect']:
+    if factors['same_redirect'] and urlparse(response.headers.get('Location', '')).path != factors['same_redirect']:
         return ('redirection', params)
     if factors['same_body'] and response.text != factors['same_body']:
         return ('body length', params)
@@ -66,7 +70,9 @@ def compare(response, factors, params):
                 return ('lines', params)
     if type(factors['param_missing']) == list:
         for param in params.keys():
-            if param in response.text and param not in factors['param_missing'] and re.search(r'[\'"\s]%s[\'"\s]' % param, response.text):
+            if len(param) < 5:
+                continue
+            if param not in factors['param_missing'] and re.search(r'[\'"\s]%s[\'"\s]' % param, response.text):
                 return ('param name reflection', params)
     if factors['value_missing']:
         for value in params.values():
diff --git a/arjun/core/bruter.py b/arjun/core/bruter.py
index d76ed77..0e02655 100644
--- a/arjun/core/bruter.py
+++ b/arjun/core/bruter.py
@@ -17,6 +17,7 @@ def bruter(request, factors, params, mode='bruteforce'):
     if conclusion == 'retry':
         return bruter(request, factors, params, mode=mode)
     elif conclusion == 'kill':
+        mem.var['kill'] = True
         return []
     comparison_result = compare(response, factors, params)
     if mode == 'verify':
diff --git a/arjun/core/error_handler.py b/arjun/core/error_handler.py
index 1c67b89..2ed8bb5 100644
--- a/arjun/core/error_handler.py
+++ b/arjun/core/error_handler.py
@@ -25,20 +25,21 @@ def error_handler(response, factors):
 		'kill': stop processing this target
 	returns str
 	"""
-	if type(response) != str and response.status_code in (400, 503, 429):
-		if response.status_code == 400:
-			if factors['same_code'] != 400:
+	if type(response) != str and response.status_code in (400, 413, 418, 429, 503):
+		if response.status_code == 503:
+			mem.var['kill'] = True
+			print('%s Target is unable to process requests, try --stable switch' % bad)
+			return 'kill'
+		elif response.status_code in (429, 418):
+			print('%s Target has a rate limit in place, try --stable switch' % bad)
+			return 'kill'
+		else:
+			if factors['same_code'] != response.status_code:
 				mem.var['kill'] = True
 				print('%s Server received a bad request. Try decreasing the chunk size with -c option' % bad)
 				return 'kill'
 			else:
 				return 'ok'
-		elif response.status_code == 503:
-			mem.var['kill'] = True
-			print('%s Target is unable to process requests, try --stable switch' % bad)
-			return 'kill'
-		elif response.status_code == 429:
-			return connection_refused()
 	else:
 		if 'Timeout' in response:
 			if mem.var['timeout'] > 20:
diff --git a/arjun/core/requester.py b/arjun/core/requester.py
index 8af0621..e2f680f 100644
--- a/arjun/core/requester.py
+++ b/arjun/core/requester.py
@@ -25,20 +25,52 @@ def requester(request, payload={}):
         return 'killed'
     try:
         if request['method'] == 'GET':
-            response = requests.get(url, params=payload, headers=request['headers'], verify=False, timeout=mem.var['timeout'])
+            response = requests.get(url,
+                params=payload,
+                headers=request['headers'],
+                verify=False,
+                allow_redirects=False,
+                timeout=mem.var['timeout'],
+            )
         elif request['method'] == 'JSON':
             request['headers']['Content-Type'] = 'application/json'
             if mem.var['include'] and '$arjun$' in mem.var['include']:
-                payload = mem.var['include'].replace('$arjun$', json.dumps(payload).rstrip('}').lstrip('{'))
-                response = requests.post(url, data=payload, headers=request['headers'], verify=False, timeout=mem.var['timeout'])
+                payload = mem.var['include'].replace('$arjun$',
+                    json.dumps(payload).rstrip('}').lstrip('{'))
+                response = requests.post(url,
+                    data=payload,
+                    headers=request['headers'],
+                    verify=False,
+                    allow_redirects=False,
+                    timeout=mem.var['timeout'],
+                )
             else:
-                response = requests.post(url, json=payload, headers=request['headers'], verify=False, timeout=mem.var['timeout'])
+                response = requests.post(url,
+                    json=payload,
+                    headers=request['headers'],
+                    verify=False,
+                    allow_redirects=False,
+                    timeout=mem.var['timeout'],
+                )
         elif request['method'] == 'XML':
             request['headers']['Content-Type'] = 'application/xml'
-            payload = mem.var['include'].replace('$arjun$', dict_to_xml(payload))
-            response = requests.post(url, data=payload, headers=request['headers'], verify=False, timeout=mem.var['timeout'])
+            payload = mem.var['include'].replace('$arjun$',
+                dict_to_xml(payload))
+            response = requests.post(url,
+                data=payload,
+                headers=request['headers'],
+                verify=False,
+                allow_redirects=False,
+                timeout=mem.var['timeout'],
+            )
         else:
-            response = requests.post(url, data=payload, headers=request['headers'], verify=False, timeout=mem.var['timeout'])
+            response = requests.post(url,
+                data=payload,
+                headers=request['headers'],
+                verify=False,
+                allow_redirects=False,
+                timeout=mem.var['timeout'],
+            )
         return response
     except Exception as e:
         return str(e)
diff --git a/arjun/core/utils.py b/arjun/core/utils.py
index a72aba5..15d4f19 100644
--- a/arjun/core/utils.py
+++ b/arjun/core/utils.py
@@ -66,15 +66,18 @@ def stable_request(url, headers):
     returns None in case of failure, returns a "response" object otherwise
     """
     parsed = urlparse(url)
+    redirects_allowed = False if mem.var['disable_redirects'] else True
     scheme, host, path = parsed.scheme, parsed.netloc, parsed.path
     schemes = (['https', 'http'] if scheme == 'https' else ['http', 'https'])
     for scheme in schemes:
         try:
-            return requests.get(
+            response = requests.get(
                 scheme + '://' + host + path,
                 headers=headers,
                 verify=False,
-                timeout=10).status_code
+                timeout=10,
+                allow_redirects=redirects_allowed)
+            return response.url
         except Exception as e:
             if 'ConnectionError' not in str(e):
                 continue
@@ -160,13 +163,16 @@ def reader(path, mode='string'):
             return ''.join([line for line in file])
 
 
-re_extract_js = re.compile(r'(?si)<script[^>]*>([^<].+?)</script')
 def extract_js(response):
     """
     extracts javascript from a given string
     """
-    return re_extract_js.findall(response)
-
+    scripts = []
+    for part in re.split('(?i)<script[> ]', response):
+        actual_parts = re.split('(?i)</script>', part, maxsplit=2)
+        if len(actual_parts) > 1:
+            scripts.append(actual_parts[0])
+    return scripts
 
 def parse_headers(string):
     """
diff --git a/arjun/db/params.txt b/arjun/db/large.txt
similarity index 100%
rename from arjun/db/params.txt
rename to arjun/db/large.txt
diff --git a/arjun/db/default.txt b/arjun/db/medium.txt
similarity index 100%
rename from arjun/db/default.txt
rename to arjun/db/medium.txt
diff --git a/arjun/plugins/heuristic.py b/arjun/plugins/heuristic.py
index 776b15b..8196b32 100644
--- a/arjun/plugins/heuristic.py
+++ b/arjun/plugins/heuristic.py
@@ -9,8 +9,8 @@ def is_not_junk(param):
 # TODO: for map keys, javascript tolerates { param: "value" }
 re_input_names = re.compile(r'''(?i)<input.+?name=["']?([^"'\s>]+)''')
 re_input_ids = re.compile(r'''(?i)<input.+?id=["']?([^"'\s>]+)''')
-re_empty_vars = re.compile(r'''([^\s!=<>]+)\s*=\s*(?:['"`]{2}|true|false|null)''')
-re_map_keys = re.compile(r'''([^'"]+)['"]\s*:\s*['"`]''')
+re_empty_vars = re.compile(r'''(?:[;\n]|\bvar|\blet)(\w+)\s*=\s*(?:['"`]{1,2}|true|false|null)''')
+re_map_keys = re.compile(r'''['"](\w+?)['"]\s*:\s*['"`]''')
 def heuristic(response, wordlist):
     potential_params = []