62 lines
2.1 KiB
Python
62 lines
2.1 KiB
Python
|
|
#coding=utf-8
|
|||
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import sys
|
|||
|
|
import MySQLdb
|
|||
|
|
from bs4 import BeautifulSoup
|
|||
|
|
path='bugs'
|
|||
|
|
#预编译
|
|||
|
|
pattern0=re.compile(r'<h3.*?class=\'wybug_title\'>.*?漏洞标题:(.*)<img.*?src="/images/credit.png"')
|
|||
|
|
pattern1=re.compile(r'<h3.*?class=\'wybug_date\'>提交时间:(.*)</h3>')
|
|||
|
|
pattern2=re.compile(r'>(.*)</a>')
|
|||
|
|
pattern3=re.compile(r'>(.*)</a>')
|
|||
|
|
pattern4=re.compile(r':(.*)</h3>')
|
|||
|
|
pattern5=re.compile(r'漏洞标题:(.*)')
|
|||
|
|
for docs in os.listdir(path):
|
|||
|
|
#打开文件,提取内容
|
|||
|
|
if os.path.isdir('bugs/'+docs):
|
|||
|
|
print "目录跳过"
|
|||
|
|
continue
|
|||
|
|
doc=open('bugs/'+docs,'r')
|
|||
|
|
html=doc.read()
|
|||
|
|
doc.close()
|
|||
|
|
#提取信息
|
|||
|
|
soup=BeautifulSoup(html,"html.parser")
|
|||
|
|
corps=soup.find_all('h3',class_='wybug_corp')
|
|||
|
|
corps=corps[0].find_all('a')
|
|||
|
|
corp=corps[0]
|
|||
|
|
corp=str(corp).replace(' ','').replace('\n','')
|
|||
|
|
authors=soup.find_all('h3',class_='wybug_author')
|
|||
|
|
authors=authors[0].find_all('a')
|
|||
|
|
author=authors[0]
|
|||
|
|
author=str(author).replace(' ','')
|
|||
|
|
types=soup.find_all('h3',class_='wybug_type')
|
|||
|
|
type0=str(types[0]).replace(' ','')
|
|||
|
|
title=re.findall(pattern0,html)
|
|||
|
|
if title:
|
|||
|
|
title1=title[0].replace(' ','').replace(' ','')
|
|||
|
|
else:
|
|||
|
|
title=soup.find_all('h3',class_='wybug_title')
|
|||
|
|
title0=title[0].text.encode('utf-8')
|
|||
|
|
title0=re.findall(pattern5,title0)
|
|||
|
|
title1=title0[0].replace('\n','').replace(' ','').replace(' ','')
|
|||
|
|
date=re.findall(pattern1,html)
|
|||
|
|
date1=date[0].replace(' ','')
|
|||
|
|
corp1=re.findall(pattern2,corp)
|
|||
|
|
author1=re.findall(pattern3,author)
|
|||
|
|
type1=re.findall(pattern4,type0)
|
|||
|
|
print title1,date1,author1[0],type1[0],corp1[0]
|
|||
|
|
#连接数据库
|
|||
|
|
try:
|
|||
|
|
conn=MySQLdb.connect(host='localhost',port=3306,user='root',passwd='',db='wooyun',charset='utf8')
|
|||
|
|
cur=conn.cursor()
|
|||
|
|
reload(sys)
|
|||
|
|
sys.setdefaultencoding('utf-8')
|
|||
|
|
tmp=(title1,date1,author1[0],type1[0],corp1[0],docs)
|
|||
|
|
cur.execute("INSERT INTO `bugs`(`title`,`dates`,`author`,`type`,`corp`,`doc`) VALUES(%s,%s,%s,%s,%s,%s)",tmp)
|
|||
|
|
conn.commit()
|
|||
|
|
cur.close()
|
|||
|
|
conn.close()
|
|||
|
|
except MySQLdb.Error,e:
|
|||
|
|
print "Mysql Error %d: %s" % (e.args[0], e.args[1])
|