Files
Arjun/arjun/plugins/wayback.py

33 lines
784 B
Python
Raw Normal View History

import requests
from urllib.parse import urlparse
def wayback(host, page):
payload = {
'url': host,
'matchType': 'host',
'collapse': 'urlkey',
'fl': 'original',
'page': page,
'limit': 10000
}
headers = {
'User-Agent': 'Mozilla'
}
try:
these_params = set()
response = requests.get(
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
params=payload,
headers=headers
).text
if not response:
return (these_params, False, 'wayback')
urls = filter(None, response.split('\n'))
for url in urls:
for param in urlparse(url).query.split('&'):
these_params.add(param.split('=')[0])
return (these_params, True, 'wayback')
except requests.exceptions.ConnectionError:
return (these_params, False, 'wayback')