2018-10-27 18:58:52 +05:30
|
|
|
# Let's import what we need
|
|
|
|
|
from re import findall
|
|
|
|
|
import concurrent.futures
|
2018-10-28 00:52:19 +05:30
|
|
|
from urllib.parse import urlparse
|
2018-10-27 18:58:52 +05:30
|
|
|
|
|
|
|
|
from core.colors import run
|
|
|
|
|
from core.zetanize import zetanize
|
|
|
|
|
from core.requester import requester
|
|
|
|
|
from core.utils import getUrl, getParams
|
|
|
|
|
|
2018-10-28 14:57:36 +05:30
|
|
|
def photon(seedUrl, headers, level):
|
2018-10-27 18:58:52 +05:30
|
|
|
forms = [] # web forms
|
|
|
|
|
processed = set() # urls that have been crawled
|
|
|
|
|
storage = set() # urls that belong to the target i.e. in-scope
|
2018-10-28 21:43:06 +05:30
|
|
|
scheme = urlparse(seedUrl).scheme
|
2018-10-28 14:57:36 +05:30
|
|
|
host = urlparse(seedUrl).netloc
|
2018-10-28 21:43:06 +05:30
|
|
|
main_url = scheme + '://' + host
|
2018-10-28 14:57:36 +05:30
|
|
|
storage.add(seedUrl)
|
2018-10-27 18:58:52 +05:30
|
|
|
def rec(target):
|
2018-10-28 14:57:36 +05:30
|
|
|
processed.add(target)
|
2018-10-27 18:58:52 +05:30
|
|
|
print ('%s Parsing %s' % (run, target))
|
|
|
|
|
url = getUrl(target, '', True)
|
|
|
|
|
params = getParams(target, '', True)
|
|
|
|
|
if '=' in target:
|
|
|
|
|
inps = []
|
|
|
|
|
for name, value in params.items():
|
|
|
|
|
inps.append({'name': name, 'value': value})
|
|
|
|
|
forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}})
|
|
|
|
|
response = requester(url, params, headers, True, 0).text
|
|
|
|
|
forms.append(zetanize(response))
|
2018-10-28 14:57:36 +05:30
|
|
|
matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response)
|
|
|
|
|
for link in matches: # iterate over the matches
|
|
|
|
|
link = link.split('#')[0] # remove everything after a "#" to deal with in-page anchors
|
|
|
|
|
if link[:4] == 'http':
|
|
|
|
|
if link.startswith(main_url):
|
|
|
|
|
storage.add(link)
|
|
|
|
|
elif link[:2] == '//':
|
|
|
|
|
if link.split('/')[2].startswith(host):
|
|
|
|
|
storage.add(schema + link)
|
|
|
|
|
elif link[:1] == '/':
|
|
|
|
|
storage.add(main_url + link)
|
|
|
|
|
else:
|
|
|
|
|
storage.add(main_url + '/' + link)
|
|
|
|
|
for x in range(level):
|
|
|
|
|
urls = storage - processed
|
|
|
|
|
from core.config import threadCount
|
|
|
|
|
threadpool = concurrent.futures.ThreadPoolExecutor(max_workers=threadCount)
|
|
|
|
|
futures = (threadpool.submit(rec, url) for url in urls)
|
|
|
|
|
for i, _ in enumerate(concurrent.futures.as_completed(futures)):
|
|
|
|
|
pass
|
2018-10-28 21:43:06 +05:30
|
|
|
return forms
|