87 lines
3.8 KiB
Python
87 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
|
# -*- encoding: utf-8 -*-
|
|
# Created on 2019-02-26 14:51:26
|
|
# Project: ICS_CERT_MYSQL
|
|
# Author: KEYONE @ https://github.com/hi-KK
|
|
|
|
from pyspider.libs.base_handler import *
|
|
import pymysql
|
|
|
|
Headers2 = [
|
|
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'),
|
|
('Accept-Encoding', 'gzip, deflate'),
|
|
('Accept-Language', 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'),
|
|
('Connection', 'keep-alive'),
|
|
('Cookie', '_ga=GA1.2.1524773403.1542244241; _ga=GA1.3.1524773403.1542244241; has_js=1; _gid=GA1.2.1787487130.1550814632; _gid=GA1.3.1787487130.1550814632'),
|
|
('Host', 'ics-cert.us-cert.gov'),
|
|
('Referer','https://ics-cert.us-cert.gov/advisories'),
|
|
('Upgrade-Insecure-Requests', '1'),
|
|
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:60.0) Gecko/20100101 Firefox/60.0')
|
|
]
|
|
|
|
StartUrl = 'https://ics-cert.us-cert.gov/advisories'
|
|
|
|
|
|
class Handler(BaseHandler):
|
|
crawl_config = {
|
|
}
|
|
|
|
#链接数据库
|
|
def __init__(self):
|
|
self.db=pymysql.connect('192.168.159.1','root','root','ics_sec_info',charset='utf8')
|
|
|
|
def add_Mysql(self,title, icsa_id, risk, cvss,attention,vendor,equipment,vulnerability,impact_product):
|
|
try:
|
|
cursor=self.db.cursor()
|
|
sql = 'insert into ics_cert(title, icsa_id, risk, cvss,attention,vendor,equipment,vulnerability,impact_product) values ("%s","%s","%s","%s","%s","%s","%s","%s","%s")' % (title, icsa_id, risk, cvss,attention,vendor,equipment,vulnerability,impact_product);
|
|
print(sql)
|
|
cursor.execute(sql)
|
|
print(cursor.lastrowid)
|
|
self.db.commit()
|
|
except Exception as e:
|
|
print(e)
|
|
self.db.rollback()
|
|
|
|
|
|
@every(minutes=24 * 60)
|
|
def on_start(self):
|
|
self.crawl(StartUrl, callback=self.index_page)
|
|
|
|
@config(age=10 * 24 * 60 * 60)
|
|
def index_page(self, response):
|
|
for each in response.doc('.view-content ul a[href^="http"]').items():
|
|
self.crawl(each.attr.href, callback=self.detail_page)
|
|
for each in response.doc('.pager-next > a').items():
|
|
self.crawl(each.attr.href,callback=self.index_page, headers=Headers2)
|
|
@config(priority=2)
|
|
|
|
|
|
def detail_page(self, response):
|
|
#fields = response.etree.xpath('//*[@id="ncas-content"]/div/div/div')
|
|
title = response.doc('#page-sub-title').text()
|
|
icsa_id = response.doc('#page-title').text()
|
|
risk = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/p[1]//text()')).strip()
|
|
cvss = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[1]/li[1]//text()')).strip()
|
|
attention = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[1]/li[2]/text()')).strip()
|
|
vendor = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[1]/li[3]/text()')).strip()
|
|
equipment = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[1]/li[4]/text()')).strip()
|
|
vulnerability = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[1]/li[5]/text()')).strip()
|
|
impact_product = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/ul[2]/li//text()')).strip()
|
|
|
|
|
|
#solution = ''.join(response.etree.xpath('//*[@id="ncas-content"]/div/div/div/h2[4]p//text()')).strip()
|
|
|
|
self.add_Mysql(title, icsa_id, risk, cvss,attention,vendor,equipment,vulnerability,impact_product)
|
|
|
|
#return {
|
|
# "title": title,
|
|
# "icsa_id": icsa_id,
|
|
# "risk": risk,
|
|
# "cvss": cvss,
|
|
# "attention": attention,
|
|
# "vendor": vendor,
|
|
# "equipment": equipment,
|
|
# "vulnerability": vulnerability,
|
|
# "impact_product":impact_product,
|
|
# #"solution":solution,
|
|
#} |