39 lines
2.0 KiB
Python
39 lines
2.0 KiB
Python
# -*- coding: UTF-8 -*-
|
|
import re
|
|
|
|
def generate(odir,wdir,label):
|
|
f_input=open(wdir, 'w')
|
|
with open(odir, 'rb') as f:
|
|
data = [x.decode('utf-8').strip() for x in f.readlines()]
|
|
#print(data)
|
|
line_number=0
|
|
|
|
for line in data:
|
|
global feature
|
|
num_len=0
|
|
capital_len=0
|
|
key_num=0
|
|
feature3=0
|
|
line_number=line_number+1
|
|
num_len=len(re.compile(r'\d').findall(line))
|
|
if len(line)!=0:
|
|
num_f=num_len/len(line)#数字字符频率
|
|
capital_len=len(re.compile(r'[A-Z]').findall(line))
|
|
if len(line)!=0:
|
|
capital_f=capital_len/len(line)#大写字母频率
|
|
line=line.lower()
|
|
|
|
key_num=line.count('and%20')+line.count('or%20')+line.count('xor%20')+line.count('sysobjects%20')+line.count('version%20')+line.count('substr%20')+line.count('len%20')+line.count('substring%20')+line.count('exists%20')
|
|
key_num=key_num+line.count('mid%20')+line.count('asc%20')+line.count('inner join%20')+line.count('xp_cmdshell%20')+line.count('version%20')+line.count('exec%20')+line.count('having%20')+line.count('unnion%20')+line.count('order%20')+line.count('information schema')
|
|
key_num=key_num+line.count('load_file%20')+line.count('load data infile%20')+line.count('into outfile%20')+line.count('into dumpfile%20')
|
|
if len(line)!=0:
|
|
space_f=(line.count(" ")+line.count("%20"))/len(line)#空格百分比
|
|
special_f=(line.count("{")*2+line.count('28%')*2+line.count('NULL')+line.count('[')+line.count('=')+line.count('?'))/len(line)
|
|
prefix_f=(line.count('\\x')+line.count('&')+line.count('\\u')+line.count('%'))/len(line)
|
|
#print('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label))
|
|
|
|
f_input.write('%f,%f,%f,%f,%f,%f,%f,%f' % (len(line),key_num,capital_f,num_f,space_f,special_f,prefix_f,label)+'\n')
|
|
|
|
f_input.close()
|
|
return wdir
|