Files
PySpider-ICS/ICS-CNVD/ICS_CNVD_MYSQL.py
墨玉麒麟 61c727d4da update
2020-03-23 14:28:08 +08:00

136 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
# Created on 2019-02-26 15:45:54
# Project: ICS_CNVD_MYSQL
# Author: KEYONE @ https://github.com/hi-KK
from pyspider.libs.base_handler import *
import pymysql
Headers2 = [
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
('Accept-Encoding', 'gzip, deflate'),
('Accept-Language', 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3'),
('Connection', 'keep-alive'),
('Cookie', '__jsluid=fabe0b42b024cf678a08a53ce857caf8'),
('Host', 'ics.cnvd.org.cn'),
('Upgrade-Insecure-Requests', '1'),
('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0')
]
StartUrl = 'http://ics.cnvd.org.cn/?max=20&offset='
class Handler(BaseHandler):
crawl_config = {
"headers": {
"User-Agent": "BaiDuSpider", #配置用户代理,模拟百度蜘蛛
}
}
#链接数据库
def __init__(self):
self.db=pymysql.connect('192.168.159.1','root','root','ics_sec_info',charset='utf8')
def add_Mysql(self,cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update):
try:
cursor=self.db.cursor()
sql = 'insert into ics_cnvd (cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update) values ("%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s","%s")' % (cnvd_title,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update);
print(sql)
cursor.execute(sql)
print(cursor.lastrowid)
self.db.commit()
except Exception as e:
print(e)
self.db.rollback()
@every(minutes=24 * 60)
def on_start(self):
self.crawl(StartUrl, callback=self.index_page)
#1、response.json用于解析json数据
#2、response.doc返回的是PyQuery对象
#3、response.etree返回的是lxml对象
#4、response.text返回的是unicode文本
#5、response.content返回的是字节码
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
#使用response.doc引入css选择器获取所有链接
for each in response.doc('html>body>div.con>div.con_left>div.list>table>tbody#tr>tr>td>a').items():
self.crawl(each.attr.href, callback=self.detail_page)
#使用response.doc引入css选择器定位下一页链接
for each in response.doc('html>body>div.con>div.con_left>div.list>div.pages.clearfix>a.nextLink').items():
self.crawl(each.attr.href,callback=self.index_page, headers=Headers2)
@config(priority=2)
def detail_page(self, response):
#使用pyspider的response.etree來引入xpath选择器
items = response.etree.xpath('/html/body//div[@class="tableDiv"]/table/tbody/tr')
cnvd_title_h1 = response.doc('h1').text()
cnvd_cve_id = 0
cnvd_bug_id = 0
for item in items:
cnvd_title = ''.join(item.xpath('td[@class="alignRight"]/text()')).strip()
cnvd_text = ''.join(item.xpath('td[last()]/text()')).strip()
if cnvd_title == 'CNVD-ID':
cnvd_id = cnvd_text
elif cnvd_title == '公开日期':
cnvd_date = cnvd_text
elif cnvd_title == '危害级别':
cnvd_level = ''.join(cnvd_text.split()).replace('()', '')
elif cnvd_title == '影响产品':
cnvd_product = ''.join(cnvd_text.split())
elif cnvd_title == 'CVE ID':
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
cnvd_cve_id = cnvd_text
elif cnvd_title == 'BUGTRAQ ID':
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
cnvd_bug_id = cnvd_text
elif cnvd_title == '漏洞描述':
cnvd_description = ''.join(cnvd_text.split())
elif cnvd_title == '参考链接':
cnvd_reference = ''.join(item.xpath('td/a/text()')).strip()
elif cnvd_title == '漏洞解决方案':
cnvd_solution = ''.join(cnvd_text.split())
elif cnvd_title == '厂商补丁':
cnvd_text = ''.join(item.xpath('td/a/text()')).strip()
cnvd_patch = cnvd_text
elif cnvd_title == '更新时间':
cnvd_update = cnvd_text
else:
print('')
self.add_Mysql(cnvd_title_h1,cnvd_id,cnvd_date,cnvd_level,cnvd_product,cnvd_cve_id,cnvd_bug_id,cnvd_description,cnvd_reference,cnvd_solution,cnvd_patch,cnvd_update)
#return {
# #"url": response.url,
# "cnvd_title":cnvd_title_h1,
# "cnvd_id":cnvd_id,
# "cnvd_date":cnvd_date,
# "cnvd_level":cnvd_level,
# "cnvd_product":cnvd_product,
# "cnvd_cve_id":cnvd_cve_id,
# "cnvd_bug_id":cnvd_bug_id,
# "cnvd_description":cnvd_description,
# "cnvd_reference":cnvd_reference,
# "cnvd_solution":cnvd_solution,
# "cnvd_patch":cnvd_patch,
# "cnvd_update":cnvd_update
#}