2018-10-27 18:58:52 +05:30
|
|
|
import re
|
|
|
|
|
|
2018-11-16 21:13:45 +05:30
|
|
|
|
2018-10-27 18:58:52 +05:30
|
|
|
def zetanize(response):
|
|
|
|
|
def e(string):
|
|
|
|
|
return string.encode('utf-8')
|
|
|
|
|
|
|
|
|
|
def d(string):
|
|
|
|
|
return string.decode('utf-8')
|
|
|
|
|
|
2018-11-16 21:13:45 +05:30
|
|
|
# remove the content between html comments
|
2018-10-27 18:58:52 +05:30
|
|
|
response = re.sub(r'(?s)<!--.*?-->', '', response)
|
|
|
|
|
forms = {}
|
2018-11-16 21:13:45 +05:30
|
|
|
matches = re.findall(r'(?i)(?s)<form.*?</form.*?>',
|
|
|
|
|
response) # extract all the forms
|
2018-10-27 18:58:52 +05:30
|
|
|
num = 0
|
2018-11-16 21:13:45 +05:30
|
|
|
for match in matches: # everything else is self explanatory if you know regex
|
2018-10-27 18:58:52 +05:30
|
|
|
page = re.search(r'(?i)action=[\'"](.*?)[\'"]', match)
|
|
|
|
|
method = re.search(r'(?i)method=[\'"](.*?)[\'"]', match)
|
|
|
|
|
forms[num] = {}
|
|
|
|
|
forms[num]['action'] = d(e(page.group(1))) if page else ''
|
2018-11-16 21:13:45 +05:30
|
|
|
forms[num]['method'] = d(
|
|
|
|
|
e(method.group(1)).lower()) if method else 'get'
|
2018-10-27 18:58:52 +05:30
|
|
|
forms[num]['inputs'] = []
|
|
|
|
|
inputs = re.findall(r'(?i)(?s)<input.*?>', response)
|
|
|
|
|
for inp in inputs:
|
|
|
|
|
inpName = re.search(r'(?i)name=[\'"](.*?)[\'"]', inp)
|
|
|
|
|
if inpName:
|
|
|
|
|
inpType = re.search(r'(?i)type=[\'"](.*?)[\'"]', inp)
|
|
|
|
|
inpValue = re.search(r'(?i)value=[\'"](.*?)[\'"]', inp)
|
|
|
|
|
inpName = d(e(inpName.group(1)))
|
2018-11-16 21:13:45 +05:30
|
|
|
inpType = d(e(inpType.group(1)))if inpType else ''
|
2018-10-27 18:58:52 +05:30
|
|
|
inpValue = d(e(inpValue.group(1))) if inpValue else ''
|
|
|
|
|
if inpType.lower() == 'submit' and inpValue == '':
|
|
|
|
|
inpValue = 'Submit Query'
|
|
|
|
|
inpDict = {
|
2018-11-16 21:13:45 +05:30
|
|
|
'name': inpName,
|
|
|
|
|
'type': inpType,
|
|
|
|
|
'value': inpValue
|
2018-10-27 18:58:52 +05:30
|
|
|
}
|
|
|
|
|
forms[num]['inputs'].append(inpDict)
|
|
|
|
|
num += 1
|
2018-11-15 15:41:01 +05:30
|
|
|
return forms
|