136 lines
5.3 KiB
Python
136 lines
5.3 KiB
Python
|
|
#!/usr/bin/env python3
|
|||
|
|
# -*- encoding: utf-8 -*-
|
|||
|
|
# Created on 2019-02-26 15:45:54
|
|||
|
|
# Project: ICS_CNVD_MYSQL
|
|||
|
|
# Author: KEYONE @ https://github.com/hi-KK
|
|||
|
|
|
|||
|
|
from pyspider.libs.base_handler import *
|
|||
|
|
import pymysql
|
|||
|
|
|
|||
|
|
Headers2 = [
|
|||
|
|
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
|||
|
|
('Accept-Encoding', 'gzip, deflate'),
|
|||
|
|
('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3'),
|
|||
|
|
('Connection', 'keep-alive'),
|
|||
|
|
('Cookie', '__jsluid=fabe0b42b024cf678a08a53ce857caf8'),
|
|||
|
|
('Host', 'ics.cnvd.org.cn'),
|
|||
|
|
('Upgrade-Insecure-Requests', '1'),
|
|||
|
|
('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0')
|
|||
|
|
]
|
|||
|
|
|
|||
|
|
StartUrl = 'http://ics.cnvd.org.cn/?max=20&offset='
|
|||
|
|
|
|||
|
|
|
|||
|
|
class Handler(BaseHandler):
|
|||
|
|
crawl_config = {
|
|||
|
|
"headers": {
|
|||
|
|
"User-Agent": "BaiDuSpider", #配置用户代理,模拟百度蜘蛛
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#链接数据库
|
|||
|
|
def __init__(self):
|
|||
|
|
self.db=pymysql.connect('192.168.159.1','root','root','ics_sec_info',charset='utf8')
|
|||
|
|
|
|||
|
|
def add_Mysql(self,cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update):
|
|||
|
|
try:
|
|||
|
|
cursor=self.db.cursor()
|
|||
|
|
sql = 'insert into ics_cnvd (cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update) values ("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s")' % (cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update);
|
|||
|
|
print(sql)
|
|||
|
|
cursor.execute(sql)
|
|||
|
|
print(cursor.lastrowid)
|
|||
|
|
self.db.commit()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(e)
|
|||
|
|
self.db.rollback()
|
|||
|
|
|
|||
|
|
@every(minutes=24 * 60)
|
|||
|
|
def on_start(self):
|
|||
|
|
self.crawl(StartUrl, callback=self.index_page)
|
|||
|
|
|
|||
|
|
#1、response.json用于解析json数据
|
|||
|
|
#2、response.doc返回的是PyQuery对象
|
|||
|
|
#3、response.etree返回的是lxml对象
|
|||
|
|
#4、response.text返回的是unicode文本
|
|||
|
|
#5、response.content返回的是字节码
|
|||
|
|
|
|||
|
|
@config(age=10 * 24 * 60 * 60)
|
|||
|
|
def index_page(self, response):
|
|||
|
|
#使用response.doc引入css选择器,获取所有链接
|
|||
|
|
for each in response.doc('html>body>div.con>div.con_left>div.list>table>tbody#tr>tr>td>a').items():
|
|||
|
|
self.crawl(each.attr.href, callback=self.detail_page)
|
|||
|
|
#使用response.doc引入css选择器,定位下一页链接
|
|||
|
|
for each in response.doc('html>body>div.con>div.con_left>div.list>div.pages.clearfix>a.nextLink').items():
|
|||
|
|
self.crawl(each.attr.href,callback=self.index_page, headers=Headers2)
|
|||
|
|
|
|||
|
|
|
|||
|
|
@config(priority=2)
|
|||
|
|
def detail_page(self, response):
|
|||
|
|
#使用pyspider的response.etree來引入xpath选择器
|
|||
|
|
items = response.etree.xpath('/html/body//div[@class="tableDiv"]/table/tbody/tr')
|
|||
|
|
|
|||
|
|
cnvd_title_h1 = response.doc('h1').text()
|
|||
|
|
|
|||
|
|
cnvd_cve_id = 0
|
|||
|
|
cnvd_bug_id = 0
|
|||
|
|
for item in items:
|
|||
|
|
|
|||
|
|
cnvd_title = ''.join(item.xpath('td[@class="alignRight"]/text()')).strip()
|
|||
|
|
|
|||
|
|
cnvd_text = ''.join(item.xpath('td[last()]/text()')).strip()
|
|||
|
|
|
|||
|
|
if cnvd_title == 'CNVD-ID':
|
|||
|
|
cnvd_id = cnvd_text
|
|||
|
|
|
|||
|
|
elif cnvd_title == '公开日期':
|
|||
|
|
cnvd_date = cnvd_text
|
|||
|
|
|
|||
|
|
elif cnvd_title == '危害级别':
|
|||
|
|
cnvd_level = ''.join(cnvd_text.split()).replace('()', '')
|
|||
|
|
|
|||
|
|
elif cnvd_title == '影响产品':
|
|||
|
|
cnvd_product = ''.join(cnvd_text.split())
|
|||
|
|
|
|||
|
|
elif cnvd_title == 'CVE ID':
|
|||
|
|
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
|
|||
|
|
cnvd_cve_id = cnvd_text
|
|||
|
|
|
|||
|
|
elif cnvd_title == 'BUGTRAQ ID':
|
|||
|
|
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
|
|||
|
|
cnvd_bug_id = cnvd_text
|
|||
|
|
|
|||
|
|
elif cnvd_title == '漏洞描述':
|
|||
|
|
cnvd_description = ''.join(cnvd_text.split())
|
|||
|
|
|
|||
|
|
elif cnvd_title == '参考链接':
|
|||
|
|
cnvd_reference = ''.join(item.xpath('td/a/text()')).strip()
|
|||
|
|
|
|||
|
|
elif cnvd_title == '漏洞解决方案':
|
|||
|
|
cnvd_solution = ''.join(cnvd_text.split())
|
|||
|
|
|
|||
|
|
elif cnvd_title == '厂商补丁':
|
|||
|
|
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
|
|||
|
|
cnvd_patch = cnvd_text
|
|||
|
|
|
|||
|
|
elif cnvd_title == '更新时间':
|
|||
|
|
cnvd_update = cnvd_text
|
|||
|
|
else:
|
|||
|
|
print('')
|
|||
|
|
|
|||
|
|
self.add_Mysql(cnvd_title_h1,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update)
|
|||
|
|
|
|||
|
|
#return {
|
|||
|
|
# #"url": response.url,
|
|||
|
|
# "cnvd_title":cnvd_title_h1,
|
|||
|
|
# "cnvd_id":cnvd_id,
|
|||
|
|
# "cnvd_date":cnvd_date,
|
|||
|
|
# "cnvd_level":cnvd_level,
|
|||
|
|
# "cnvd_product":cnvd_product,
|
|||
|
|
# "cnvd_cve_id":cnvd_cve_id,
|
|||
|
|
# "cnvd_bug_id":cnvd_bug_id,
|
|||
|
|
# "cnvd_description":cnvd_description,
|
|||
|
|
# "cnvd_reference":cnvd_reference,
|
|||
|
|
# "cnvd_solution":cnvd_solution,
|
|||
|
|
# "cnvd_patch":cnvd_patch,
|
|||
|
|
# "cnvd_update":cnvd_update
|
|||
|
|
#}
|