Files
Arjun/arjun/core/utils.py

286 lines
7.5 KiB
Python
Raw Normal View History

2019-03-02 05:43:02 +05:30
import re
2021-03-11 05:36:44 +05:30
import sys
2019-03-03 16:17:06 +05:30
import json
2018-11-09 20:32:08 +05:30
import random
import requests
2020-12-06 15:01:06 +05:30
import concurrent.futures
2021-02-07 19:43:30 +05:30
from dicttoxml import dicttoxml
2020-12-06 15:01:06 +05:30
from urllib.parse import urlparse
2021-02-07 19:43:30 +05:30
from arjun.core.prompt import prompt
from arjun.core.importer import importer
from arjun.plugins.otx import otx
from arjun.plugins.wayback import wayback
from arjun.plugins.commoncrawl import commoncrawl
import arjun.core.config as mem
from arjun.core.colors import info
2020-12-06 15:01:06 +05:30
2021-02-07 19:43:30 +05:30
def extract_headers(headers):
2020-12-06 15:01:06 +05:30
"""
parses headers provided through command line
returns dict
"""
2019-04-12 18:12:15 +05:30
headers = headers.replace('\\n', '\n')
2020-12-06 15:01:06 +05:30
return parse_headers(headers)
def confirm(array_of_dicts, usable):
"""
extracts the value from single valued dict from an array of dicts
returns a array of dicts
"""
param_groups = []
for dic in array_of_dicts:
if len(dic) == 1:
usable.append(dic)
else:
2020-12-06 15:01:06 +05:30
param_groups.append(dic)
return param_groups
2019-03-02 05:43:02 +05:30
2020-12-06 15:01:06 +05:30
def slicer(dic, n=2):
"""
divides dict into n parts
returns array containing n dicts
"""
listed = list(dic.items())
k, m = divmod(len(dic), n)
return [dict(listed[i * k + min(i, m):(i + 1) * k + min(i + 1, m)]) for i in range(n)]
def populate(array):
"""
converts a list of parameters into parameter and value pair
returns dict
"""
return {name: '1' * (6 - len(str(i))) + str(i) for i, name in enumerate(array)}
2019-03-02 05:43:02 +05:30
2020-12-06 15:01:06 +05:30
def stable_request(url, headers):
"""
guarantees crash-proof HTTP(S) requests
returns None in case of failure, returns a "response" object otherwise
"""
parsed = urlparse(url)
scheme, host, path = parsed.scheme, parsed.netloc, parsed.path
schemes = (['https', 'http'] if scheme == 'https' else ['http', 'https'])
for scheme in schemes:
2018-11-09 20:32:08 +05:30
try:
2020-12-06 15:01:06 +05:30
return requests.get(
scheme + '://' + host + path,
headers=headers,
verify=False,
timeout=10).status_code
except Exception as e:
if 'ConnectionError' not in str(e):
continue
return None
2018-11-09 20:32:08 +05:30
2021-02-07 19:43:30 +05:30
def remove_tags(html):
2020-12-06 15:01:06 +05:30
"""
removes all the html from a webpage source
"""
2019-03-02 05:43:02 +05:30
return re.sub(r'(?s)<.*?>', '', html)
2020-12-06 15:01:06 +05:30
2021-02-07 19:43:30 +05:30
def diff_map(body_1, body_2):
2020-12-06 15:01:06 +05:30
"""
2021-02-07 19:43:30 +05:30
creates a list of lines that are common between two multi-line strings
returns list
2020-12-06 15:01:06 +05:30
"""
2021-02-07 19:43:30 +05:30
sig = []
lines_1, lines_2 = body_1.split('\n'), body_2.split('\n')
for line_1, line_2 in zip(lines_1, lines_2):
if line_1 == line_2:
sig.append(line_1)
return sig
2019-03-02 05:43:02 +05:30
2020-12-06 15:01:06 +05:30
2021-02-07 19:43:30 +05:30
def random_str(n):
2020-12-06 15:01:06 +05:30
"""
generates a random string of length n
"""
2019-07-02 10:05:44 +05:30
return ''.join(str(random.choice(range(10))) for i in range(n))
2019-03-03 16:17:06 +05:30
2021-02-07 19:43:30 +05:30
def get_params(include):
2020-12-06 15:01:06 +05:30
"""
loads parameters from JSON/query string
"""
params = {}
if include:
2020-12-06 16:14:32 +05:30
if include.startswith('{'):
2021-02-07 19:43:30 +05:30
try:
params = json.loads(str(include).replace('\'', '"'))
return params
except json.decoder.JSONDecodeError:
return {}
2020-12-06 16:14:32 +05:30
else:
cleaned = include.split('?')[-1]
2020-12-06 15:01:06 +05:30
parts = cleaned.split('&')
for part in parts:
each = part.split('=')
try:
params[each[0]] = each[1]
except IndexError:
params = {}
return params
2021-02-07 19:43:30 +05:30
def create_query_string(params):
"""
creates a query string from a list of parameters
returns str
"""
query_string = ''
for param in params:
pair = param + '=' + random_str(4)
query_string += pair
return '?' + query_string
2020-12-06 15:01:06 +05:30
def reader(path, mode='string'):
"""
reads a file
returns a string/array containing the content of the file
"""
with open(path, 'r', encoding='utf-8') as file:
if mode == 'lines':
2020-12-07 00:02:35 +05:30
return list(filter(None, [line.rstrip('\n') for line in file]))
2020-12-06 15:01:06 +05:30
else:
return ''.join([line for line in file])
def extract_js(response):
"""
extracts javascript from a given string
"""
return re.findall(r'(?s)<script[^>]+>([^<].+?)</script', response.lower(), re.I)
def parse_headers(string):
"""
parses headers
return dict
"""
result = {}
for line in string.split('\n'):
if len(line) > 1:
splitted = line.split(':')
result[splitted[0]] = ':'.join(splitted[1:]).strip()
return result
def parse_request(string):
"""
parses http request
returns dict
"""
result = {}
match = re.search(r'(?:([a-zA-Z0-9]+) ([^ ]+) [^ ]+\n)?([\s\S]+\n)\n?([\s\S]+)?', string)
result['method'] = match.group(1)
result['path'] = match.group(2)
result['headers'] = parse_headers(match.group(3))
result['data'] = match.group(4)
return result
def http_import(path):
"""
parses http request from a file
returns dict
"""
return parse_request(reader(path))
2018-11-09 20:32:08 +05:30
2020-12-06 15:01:06 +05:30
def fetch_params(host):
"""
fetch parameters from passive sources
returns list
"""
available_plugins = {'commoncrawl': commoncrawl, 'otx': otx, 'wayback': wayback}
page = 0
progress = 0
2019-03-03 16:17:06 +05:30
params = {}
2020-12-06 15:01:06 +05:30
while len(available_plugins) > 0 and page <= 10:
threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=len(available_plugins))
futures = (threadpool.submit(func, host, page) for func in available_plugins.values())
for each in concurrent.futures.as_completed(futures):
if progress < 98:
progress += 3
this_result = each.result()
if not this_result[1]:
progress += ((10 - page) * 10 / 3)
del available_plugins[this_result[2]]
if len(this_result[0]) > 1:
if not params:
params = this_result[0]
else:
params.update(this_result[0])
print('%s Progress: %i%%' % (info, progress), end='\r')
page += 1
print('%s Progress: %i%%' % (info, 100), end='\r')
2019-03-03 16:17:06 +05:30
return params
2021-02-07 19:43:30 +05:30
def prepare_requests(args):
"""
creates a list of request objects used by Arjun from targets given by user
returns list (of targs)
"""
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:83.0) Gecko/20100101 Firefox/83.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate',
'Connection': 'close',
'Upgrade-Insecure-Requests': '1'
}
2021-03-11 05:15:37 +05:30
if type(args.headers) == str:
2021-03-11 20:29:33 +05:30
headers = extract_headers(args.headers)
2021-03-11 05:15:37 +05:30
elif args.headers:
2021-02-07 19:43:30 +05:30
headers = extract_headers(prompt())
if mem.var['method'] == 'JSON':
headers['Content-type'] = 'application/json'
if args.url:
params = get_params(args.include)
return {
'url': args.url,
'method': mem.var['method'],
'headers': headers,
'include': params
}
elif args.import_file:
return importer(args.import_file, mem.var['method'], headers, args.include)
return []
def nullify(*args, **kwargs):
"""
a function that does nothing
"""
pass
def dict_to_xml(dict_obj):
"""
converts dict to xml string
returns str
"""
return dicttoxml(dict_obj, root=False, attr_type=False).decode('utf-8')
2021-03-11 05:36:44 +05:30
2021-03-11 20:29:33 +05:30
2021-03-11 05:36:44 +05:30
def compatible_path(path):
"""
converts filepaths to be compatible with the host OS
returns str
"""
if sys.platform.lower().startswith('win'):
return path.replace('/', '\\')
return path