diff --git a/app.py b/app.py deleted file mode 100644 index ee2e07f..0000000 --- a/app.py +++ /dev/null @@ -1,61 +0,0 @@ -#coding=utf-8 -import os -import re -import sys -import MySQLdb -from bs4 import BeautifulSoup -path='bugs' -#预编译 -pattern0=re.compile(r'.*?漏洞标题:(.*)提交时间:(.*)') -pattern2=re.compile(r'>(.*)') -pattern3=re.compile(r'') -pattern4=re.compile(r':(.*)') -pattern5=re.compile(r'漏洞标题:(.*)') -for docs in os.listdir(path): - #打开文件,提取内容 - if os.path.isdir('bugs/'+docs): - print "目录跳过" - continue - doc=open('bugs/'+docs,'r') - html=doc.read() - doc.close() - #提取信息 - soup=BeautifulSoup(html,"html.parser") - corps=soup.find_all('h3',class_='wybug_corp') - corps=corps[0].find_all('a') - corp=corps[0] - corp=str(corp).replace(' ','').replace('\n','') - authors=soup.find_all('h3',class_='wybug_author') - authors=authors[0].find_all('a') - author=authors[0] - author=str(author).replace(' ','') - types=soup.find_all('h3',class_='wybug_type') - type0=str(types[0]).replace(' ','') - title=re.findall(pattern0,html) - if title: - title1=title[0].replace(' ','').replace(' ','') - else: - title=soup.find_all('h3',class_='wybug_title') - title0=title[0].text.encode('utf-8') - title0=re.findall(pattern5,title0) - title1=title0[0].replace('\n','').replace(' ','').replace(' ','') - date=re.findall(pattern1,html) - date1=date[0].replace(' ','') - corp1=re.findall(pattern2,corp) - author1=re.findall(pattern3,author) - type1=re.findall(pattern4,type0) - print title1,date1,author1[0],type1[0],corp1[0] - #连接数据库 - try: - conn=MySQLdb.connect(host='localhost',port=3306,user='root',passwd='',db='wooyun',charset='utf8') - cur=conn.cursor() - reload(sys) - sys.setdefaultencoding('utf-8') - tmp=(title1,date1,author1[0],type1[0],corp1[0],docs) - cur.execute("INSERT INTO `bugs`(`title`,`dates`,`author`,`type`,`corp`,`doc`) VALUES(%s,%s,%s,%s,%s,%s)",tmp) - conn.commit() - cur.close() - conn.close() - except MySQLdb.Error,e: - print "Mysql Error %d: %s" % (e.args[0], e.args[1])