35 lines
802 B
Python
35 lines
802 B
Python
import requests
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
def wayback(host, page):
|
|
payload = {
|
|
'url': host,
|
|
'matchType': 'host',
|
|
'collapse': 'urlkey',
|
|
'fl': 'original',
|
|
'page': page,
|
|
'limit': 10000
|
|
}
|
|
headers = {
|
|
'User-Agent': 'Mozilla'
|
|
}
|
|
try:
|
|
these_params = set()
|
|
response = requests.get(
|
|
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
|
|
params=payload,
|
|
headers=headers,
|
|
verify=False
|
|
).text
|
|
if not response:
|
|
return (these_params, False, 'wayback')
|
|
urls = filter(None, response.split('\n'))
|
|
for url in urls:
|
|
for param in urlparse(url).query.split('&'):
|
|
these_params.add(param.split('=')[0])
|
|
return (these_params, True, 'wayback')
|
|
except requests.exceptions.ConnectionError:
|
|
return (these_params, False, 'wayback')
|