52 lines
1.9 KiB
Python
52 lines
1.9 KiB
Python
import re
|
|
from urllib.parse import urlparse
|
|
|
|
|
|
def zetanize(url, response):
|
|
parsedUrl = urlparse(url)
|
|
mainUrl = parsedUrl.scheme + '://' + parsedUrl.netloc
|
|
|
|
def e(string):
|
|
return string.encode('utf-8')
|
|
|
|
def d(string):
|
|
return string.decode('utf-8')
|
|
|
|
response = re.sub(r'(?s)<!--.*?-->', '', response)
|
|
forms = {}
|
|
matches = re.findall(r'(?i)(?s)<form.*?</form.*?>', response)
|
|
num = 0
|
|
for match in matches:
|
|
page = re.search(r'(?i)action=[\'"](.*?)[\'"]', match)
|
|
method = re.search(r'(?i)method=[\'"](.*?)[\'"]', match)
|
|
forms[num] = {}
|
|
action = d(e(page.group(1)))
|
|
if not action.startswith('http'):
|
|
if action.startswith('/'):
|
|
action = mainUrl + action
|
|
else:
|
|
action = mainUrl + '/' + action
|
|
forms[num]['action'] = action.replace('&', '&') if page else ''
|
|
forms[num]['method'] = d(
|
|
e(method.group(1)).lower()) if method else 'get'
|
|
forms[num]['inputs'] = []
|
|
inputs = re.findall(r'(?i)(?s)<input.*?>', response)
|
|
for inp in inputs:
|
|
inpName = re.search(r'(?i)name=[\'"](.*?)[\'"]', inp)
|
|
if inpName:
|
|
inpType = re.search(r'(?i)type=[\'"](.*?)[\'"]', inp)
|
|
inpValue = re.search(r'(?i)value=[\'"](.*?)[\'"]', inp)
|
|
inpName = d(e(inpName.group(1)))
|
|
inpType = d(e(inpType.group(1)))if inpType else ''
|
|
inpValue = d(e(inpValue.group(1))) if inpValue else ''
|
|
if inpType.lower() == 'submit' and inpValue == '':
|
|
inpValue = 'Submit Query'
|
|
inpDict = {
|
|
'name': inpName,
|
|
'type': inpType,
|
|
'value': inpValue
|
|
}
|
|
forms[num]['inputs'].append(inpDict)
|
|
num += 1
|
|
return forms
|