Files
XSStrike/core/photon.py

54 lines
2.3 KiB
Python
Raw Normal View History

2018-10-27 18:58:52 +05:30
import concurrent.futures
from re import findall
2018-10-28 00:52:19 +05:30
from urllib.parse import urlparse
2018-10-27 18:58:52 +05:30
from core.colors import run
from core.utils import getUrl, getParams
from core.requester import requester
from core.zetanize import zetanize
2018-10-27 18:58:52 +05:30
2018-11-16 21:13:45 +05:30
2018-11-10 17:33:48 +05:30
def photon(seedUrl, headers, level, threadCount, delay, timeout):
2018-11-16 21:13:45 +05:30
forms = [] # web forms
processed = set() # urls that have been crawled
storage = set() # urls that belong to the target i.e. in-scope
schema = urlparse(seedUrl).scheme # extract the scheme e.g. http or https
host = urlparse(seedUrl).netloc # extract the host e.g. example.com
main_url = schema + '://' + host # join scheme and host to make the root url
storage.add(seedUrl) # add the url to storage
2018-10-27 18:58:52 +05:30
def rec(target):
processed.add(target)
2018-10-27 18:58:52 +05:30
print ('%s Parsing %s' % (run, target))
2018-11-13 12:43:47 +05:30
url = getUrl(target, True)
2018-10-27 18:58:52 +05:30
params = getParams(target, '', True)
2018-11-16 21:13:45 +05:30
if '=' in target: # if there's a = in the url, there should be GET parameters
2018-10-27 18:58:52 +05:30
inps = []
for name, value in params.items():
inps.append({'name': name, 'value': value})
forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}})
2018-11-10 17:33:48 +05:30
response = requester(url, params, headers, True, delay, timeout).text
2018-10-27 18:58:52 +05:30
forms.append(zetanize(response))
matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response)
2018-11-16 21:13:45 +05:30
for link in matches: # iterate over the matches
# remove everything after a "#" to deal with in-page anchors
link = link.split('#')[0]
if link[:4] == 'http':
if link.startswith(main_url):
storage.add(link)
elif link[:2] == '//':
if link.split('/')[2].startswith(host):
storage.add(schema + link)
elif link[:1] == '/':
storage.add(main_url + link)
else:
storage.add(main_url + '/' + link)
for x in range(level):
2018-11-16 21:13:45 +05:30
urls = storage - processed # urls to crawl = all urls - urls that have been crawled
threadpool = concurrent.futures.ThreadPoolExecutor(
max_workers=threadCount)
futures = (threadpool.submit(rec, url) for url in urls)
for i, _ in enumerate(concurrent.futures.as_completed(futures)):
pass
2018-11-13 12:43:47 +05:30
return [forms, processed]