Files
Bolt/core/zetanize.py

52 lines
1.9 KiB
Python
Raw Normal View History

2018-12-30 03:07:15 +05:30
import re
from urllib.parse import urlparse
2019-04-25 01:29:54 +05:30
2018-12-30 03:07:15 +05:30
def zetanize(url, response):
parsedUrl = urlparse(url)
mainUrl = parsedUrl.scheme + '://' + parsedUrl.netloc
2019-04-25 01:29:54 +05:30
2018-12-30 03:07:15 +05:30
def e(string):
return string.encode('utf-8')
def d(string):
return string.decode('utf-8')
response = re.sub(r'(?s)<!--.*?-->', '', response)
forms = {}
matches = re.findall(r'(?i)(?s)<form.*?</form.*?>', response)
num = 0
for match in matches:
page = re.search(r'(?i)action=[\'"](.*?)[\'"]', match)
method = re.search(r'(?i)method=[\'"](.*?)[\'"]', match)
forms[num] = {}
action = d(e(page.group(1)))
if not action.startswith('http'):
if action.startswith('/'):
action = mainUrl + action
else:
action = mainUrl + '/' + action
forms[num]['action'] = action.replace('&amp;', '&') if page else ''
2019-04-25 01:29:54 +05:30
forms[num]['method'] = d(
e(method.group(1)).lower()) if method else 'get'
2018-12-30 03:07:15 +05:30
forms[num]['inputs'] = []
inputs = re.findall(r'(?i)(?s)<input.*?>', response)
for inp in inputs:
inpName = re.search(r'(?i)name=[\'"](.*?)[\'"]', inp)
if inpName:
inpType = re.search(r'(?i)type=[\'"](.*?)[\'"]', inp)
inpValue = re.search(r'(?i)value=[\'"](.*?)[\'"]', inp)
inpName = d(e(inpName.group(1)))
2019-04-25 01:29:54 +05:30
inpType = d(e(inpType.group(1)))if inpType else ''
2018-12-30 03:07:15 +05:30
inpValue = d(e(inpValue.group(1))) if inpValue else ''
if inpType.lower() == 'submit' and inpValue == '':
inpValue = 'Submit Query'
inpDict = {
2019-04-25 01:29:54 +05:30
'name': inpName,
'type': inpType,
'value': inpValue
2018-12-30 03:07:15 +05:30
}
forms[num]['inputs'].append(inpDict)
num += 1
2019-04-25 01:29:54 +05:30
return forms