From cfa283ceaa64f399427694fec5b7085a44607e57 Mon Sep 17 00:00:00 2001 From: Herman Slatman Date: Fri, 14 Aug 2015 01:27:29 +0200 Subject: [PATCH] Cleaner version for determining filename to output to --- xsscrapy/pipelines.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/xsscrapy/pipelines.py b/xsscrapy/pipelines.py index b3d5682..df6ccb9 100644 --- a/xsscrapy/pipelines.py +++ b/xsscrapy/pipelines.py @@ -11,11 +11,23 @@ import lxml.html from lxml.html import soupparser, fromstring import itertools #from IPython import embed +from urlparse import urlparse class XSSCharFinder(object): def __init__(self): self.url_param_xss_items = [] + def get_filename(self, url): + filename = 'xsscrapy-vulns.txt' + up = urlparse(url).netloc.replace('www.', '').split(':')[0] + if up: + filename = up + '.txt' + + return filename + + def open_spider(self, spider): + self.filename = self.get_filename(spider.url) + def process_item(self, item, spider): response = item['resp'] meta = response.meta @@ -957,7 +969,7 @@ class XSSCharFinder(object): return event_attributes def write_to_file(self, item, spider): - with open('xsscrapy-vulns.txt', 'a+') as f: + with open(self.filename, 'a+') as f: f.write('\n') f.write('URL: '+item['orig_url']+'\n')