release preparation
This commit is contained in:
32
arjun/plugins/wayback.py
Normal file
32
arjun/plugins/wayback.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import requests
|
||||
|
||||
from urllib.parse import urlparse
|
||||
|
||||
def wayback(host, page):
|
||||
payload = {
|
||||
'url': host,
|
||||
'matchType': 'host',
|
||||
'collapse': 'urlkey',
|
||||
'fl': 'original',
|
||||
'page': page,
|
||||
'limit': 10000
|
||||
}
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla'
|
||||
}
|
||||
try:
|
||||
these_params = set()
|
||||
response = requests.get(
|
||||
'http://web.archive.org/cdx/search?filter=mimetype:text/html&filter=statuscode:200',
|
||||
params=payload,
|
||||
headers=headers
|
||||
).text
|
||||
if not response:
|
||||
return (these_params, False, 'wayback')
|
||||
urls = filter(None, response.split('\n'))
|
||||
for url in urls:
|
||||
for param in urlparse(url).query.split('&'):
|
||||
these_params.add(param.split('=')[0])
|
||||
return (these_params, True, 'wayback')
|
||||
except requests.exceptions.ConnectionError:
|
||||
return (these_params, False, 'wayback')
|
||||
Reference in New Issue
Block a user