Files
wooyun_search/app.py
2016-09-15 19:21:41 +08:00

62 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#coding=utf-8
import os
import re
import sys
import MySQLdb
from bs4 import BeautifulSoup
path='bugs'
#预编译
pattern0=re.compile(r'<h3.*?class=\'wybug_title\'>.*?漏洞标题:(.*)<img.*?src="/images/credit.png"')
pattern1=re.compile(r'<h3.*?class=\'wybug_date\'>提交时间:(.*)</h3>')
pattern2=re.compile(r'>(.*)</a>')
pattern3=re.compile(r'>(.*)</a>')
pattern4=re.compile(r'(.*)</h3>')
pattern5=re.compile(r'漏洞标题:(.*)')
for docs in os.listdir(path):
#打开文件,提取内容
if os.path.isdir('bugs/'+docs):
print "目录跳过"
continue
doc=open('bugs/'+docs,'r')
html=doc.read()
doc.close()
#提取信息
soup=BeautifulSoup(html,"html.parser")
corps=soup.find_all('h3',class_='wybug_corp')
corps=corps[0].find_all('a')
corp=corps[0]
corp=str(corp).replace(' ','').replace('\n','')
authors=soup.find_all('h3',class_='wybug_author')
authors=authors[0].find_all('a')
author=authors[0]
author=str(author).replace(' ','')
types=soup.find_all('h3',class_='wybug_type')
type0=str(types[0]).replace(' ','')
title=re.findall(pattern0,html)
if title:
title1=title[0].replace(' ','').replace(' ','')
else:
title=soup.find_all('h3',class_='wybug_title')
title0=title[0].text.encode('utf-8')
title0=re.findall(pattern5,title0)
title1=title0[0].replace('\n','').replace(' ','').replace(' ','')
date=re.findall(pattern1,html)
date1=date[0].replace(' ','')
corp1=re.findall(pattern2,corp)
author1=re.findall(pattern3,author)
type1=re.findall(pattern4,type0)
print title1,date1,author1[0],type1[0],corp1[0]
#连接数据库
try:
conn=MySQLdb.connect(host='localhost',port=3306,user='root',passwd='',db='wooyun',charset='utf8')
cur=conn.cursor()
reload(sys)
sys.setdefaultencoding('utf-8')
tmp=(title1,date1,author1[0],type1[0],corp1[0],docs)
cur.execute("INSERT INTO `bugs`(`title`,`dates`,`author`,`type`,`corp`,`doc`) VALUES(%s,%s,%s,%s,%s,%s)",tmp)
conn.commit()
cur.close()
conn.close()
except MySQLdb.Error,e:
print "Mysql Error %d: %s" % (e.args[0], e.args[1])