XSStrike/core/photon.py

import concurrent.futures
from re import findall
from urllib.parse import urlparse

from core.colors import run
from core.utils import getUrl, getParams
from core.requester import requester
from core.zetanize import zetanize


def photon(seedUrl, headers, level, threadCount, delay, timeout):
    forms = []  # web forms
    processed = set()  # urls that have been crawled
    storage = set()  # urls that belong to the target i.e. in-scope
    schema = urlparse(seedUrl).scheme  # extract the scheme e.g. http or https
    host = urlparse(seedUrl).netloc  # extract the host e.g. example.com
    main_url = schema + '://' + host  # join scheme and host to make the root url
    storage.add(seedUrl)  # add the url to storage

    def rec(target):
        processed.add(target)
        print ('%s Parsing %s' % (run, target))
        url = getUrl(target, True)
        params = getParams(target, '', True)
        if '=' in target:  # if there's a = in the url, there should be GET parameters
            inps = []
            for name, value in params.items():
                inps.append({'name': name, 'value': value})
            forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}})
        response = requester(url, params, headers, True, delay, timeout).text
        forms.append(zetanize(response))
        matches = findall(r'<[aA].*href=["\']{0,1}(.*?)["\']', response)
        for link in matches:  # iterate over the matches
            # remove everything after a "#" to deal with in-page anchors
            link = link.split('#')[0]
            if link[:4] == 'http':
                if link.startswith(main_url):
                    storage.add(link)
            elif link[:2] == '//':
                if link.split('/')[2].startswith(host):
                    storage.add(schema + link)
            elif link[:1] == '/':
                storage.add(main_url + link)
            else:
                storage.add(main_url + '/' + link)
    for x in range(level):
        urls = storage - processed  # urls to crawl = all urls - urls that have been crawled
        threadpool = concurrent.futures.ThreadPoolExecutor(
            max_workers=threadCount)
        futures = (threadpool.submit(rec, url) for url in urls)
        for i, _ in enumerate(concurrent.futures.as_completed(futures)):
            pass
    return [forms, processed]
Add files via upload 2018-10-27 18:58:52 +05:30			`import concurrent.futures`
Fixed HTML comment context handling + Refactor 2018-11-15 15:41:01 +05:30			`from re import findall`
removed unused imports 2018-10-28 00:52:19 +05:30			`from urllib.parse import urlparse`
Add files via upload 2018-10-27 18:58:52 +05:30
			`from core.colors import run`
			`from core.utils import getUrl, getParams`
Fixed HTML comment context handling + Refactor 2018-11-15 15:41:01 +05:30			`from core.requester import requester`
			`from core.zetanize import zetanize`
Add files via upload 2018-10-27 18:58:52 +05:30
Documentation and pep8 compilance 2018-11-16 21:13:45 +05:30
stable release 2018-11-10 17:33:48 +05:30			`def photon(seedUrl, headers, level, threadCount, delay, timeout):`
Documentation and pep8 compilance 2018-11-16 21:13:45 +05:30			`forms = [] # web forms`
			`processed = set() # urls that have been crawled`
			`storage = set() # urls that belong to the target i.e. in-scope`
			`schema = urlparse(seedUrl).scheme # extract the scheme e.g. http or https`
			`host = urlparse(seedUrl).netloc # extract the host e.g. example.com`
			`main_url = schema + '://' + host # join scheme and host to make the root url`
			`storage.add(seedUrl) # add the url to storage`

Add files via upload 2018-10-27 18:58:52 +05:30			`def rec(target):`
added -l option to control crawling depth 2018-10-28 14:57:36 +05:30			`processed.add(target)`
Add files via upload 2018-10-27 18:58:52 +05:30			`print ('%s Parsing %s' % (run, target))`
Verbose switch, Fixes #71, Fixes #93 2018-11-13 12:43:47 +05:30			`url = getUrl(target, True)`
Add files via upload 2018-10-27 18:58:52 +05:30			`params = getParams(target, '', True)`
Documentation and pep8 compilance 2018-11-16 21:13:45 +05:30			`if '=' in target: # if there's a = in the url, there should be GET parameters`
Add files via upload 2018-10-27 18:58:52 +05:30			`inps = []`
			`for name, value in params.items():`
			`inps.append({'name': name, 'value': value})`
			`forms.append({0: {'action': url, 'method': 'get', 'inputs': inps}})`
stable release 2018-11-10 17:33:48 +05:30			`response = requester(url, params, headers, True, delay, timeout).text`
Add files via upload 2018-10-27 18:58:52 +05:30			`forms.append(zetanize(response))`
added -l option to control crawling depth 2018-10-28 14:57:36 +05:30			`matches = findall(r'<[aA].href=["\']{0,1}(.?)["\']', response)`
Documentation and pep8 compilance 2018-11-16 21:13:45 +05:30			`for link in matches: # iterate over the matches`
			`# remove everything after a "#" to deal with in-page anchors`
			`link = link.split('#')[0]`
added -l option to control crawling depth 2018-10-28 14:57:36 +05:30			`if link[:4] == 'http':`
			`if link.startswith(main_url):`
			`storage.add(link)`
			`elif link[:2] == '//':`
			`if link.split('/')[2].startswith(host):`
			`storage.add(schema + link)`
			`elif link[:1] == '/':`
			`storage.add(main_url + link)`
			`else:`
			`storage.add(main_url + '/' + link)`
			`for x in range(level):`
Documentation and pep8 compilance 2018-11-16 21:13:45 +05:30			`urls = storage - processed # urls to crawl = all urls - urls that have been crawled`
			`threadpool = concurrent.futures.ThreadPoolExecutor(`
			`max_workers=threadCount)`
added -l option to control crawling depth 2018-10-28 14:57:36 +05:30			`futures = (threadpool.submit(rec, url) for url in urls)`
			`for i, _ in enumerate(concurrent.futures.as_completed(futures)):`
			`pass`
Verbose switch, Fixes #71, Fixes #93 2018-11-13 12:43:47 +05:30			`return [forms, processed]`