Compare commits

...

10 Commits

Author SHA1 Message Date
Dan McInerney
49662557ca Merge pull request #72 from Deadpool2000/master
Some bug fixed
2022-04-25 12:15:45 -04:00
D3adpool2K
a1fb45f783 Update bloomfilters.py 2022-04-25 17:55:58 +05:30
D3adpool2K
efa8f13e79 Update requirements.txt 2022-04-25 17:47:04 +05:30
D3adpool2K
ac457a4e8d Update middlewares.py 2022-04-25 17:46:06 +05:30
D3adpool2K
fb7ac11ae7 Update pipelines.py
fix
2022-04-25 17:45:02 +05:30
D3adpool2K
9f910d838d Update bloomfilters.py
bug fixed
2022-04-25 17:42:59 +05:30
Dan McInerney
2d142844f4 Merge pull request #58 from shelld3v/patch-1
Bug fix
2022-01-14 10:31:31 -07:00
Dan McInerney
e1d8ff73fa Merge pull request #61 from fardeen-ahmed/master
Updated a Critical Part in README file
2022-01-14 10:30:58 -07:00
Fardeen Ahmed
3dbadfc78a Update README.md 2021-02-05 10:14:29 +00:00
shelld3v
b8def30f55 Bug fix 2020-12-27 10:41:50 +07:00
5 changed files with 12 additions and 12 deletions

View File

@@ -43,7 +43,7 @@ XSS vulnerabilities are reported in xsscrapy-vulns.txt
Dependencies Dependencies
------- -------
``` shell ``` shell
wget -O https://bootstrap.pypa.io/get-pip.py wget -O -u https://bootstrap.pypa.io/get-pip.py
python get-pip.py python get-pip.py
pip install -r requirements.txt pip install -r requirements.txt
``` ```

View File

@@ -1,7 +1,7 @@
Scrapy==1.1.0rc3 scrapy
pybloom==1.1 pybloomfilter
requests requests
beautifulsoup bs4
twisted==16.6.0 twisted==16.6.0
w3lib w3lib
lxml lxml

View File

@@ -1,7 +1,7 @@
from pybloom import BloomFilter from pybloomfilter import BloomFilter
from scrapy.utils.job import job_dir from scrapy.utils.job import job_dir
from scrapy.dupefilters import BaseDupeFilter from scrapy.dupefilters import BaseDupeFilter
from settings import bloomfilterSize from xsscrapy.settings import bloomfilterSize
class BloomURLDupeFilter(BaseDupeFilter): class BloomURLDupeFilter(BaseDupeFilter):
"""Request Fingerprint duplicates filter""" """Request Fingerprint duplicates filter"""

View File

@@ -1,9 +1,9 @@
from scrapy.exceptions import IgnoreRequest from scrapy.exceptions import IgnoreRequest
from urlparse import unquote from urllib.parse import unquote
from pybloom import BloomFilter from pybloomfilter import BloomFilter
import random import random
import re import re
from settings import bloomfilterSize from xsscrapy.settings import bloomfilterSize
# Filter out duplicate requests with Bloom filters since they're much easier on memory # Filter out duplicate requests with Bloom filters since they're much easier on memory
#URLS_FORMS_HEADERS = BloomFilter(3000000, 0.00001) #URLS_FORMS_HEADERS = BloomFilter(3000000, 0.00001)

View File

@@ -3,7 +3,7 @@
# Don't forget to add your pipeline to the ITEM_PIPELINES setting # Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem from scrapy.exceptions import DropItem
import HTMLParser from html.parser import HTMLParser
from xsscrapy.items import vuln#, inj_resp from xsscrapy.items import vuln#, inj_resp
import re import re
import lxml.etree import lxml.etree
@@ -12,7 +12,7 @@ from lxml.html import soupparser, fromstring
import itertools import itertools
#from IPython import embed #from IPython import embed
from socket import gaierror, gethostbyname from socket import gaierror, gethostbyname
from urlparse import urlparse from urllib.parse import urlparse
from logging import CRITICAL, ERROR, WARNING, INFO, DEBUG from logging import CRITICAL, ERROR, WARNING, INFO, DEBUG
class XSSCharFinder(object): class XSSCharFinder(object):
@@ -839,7 +839,7 @@ class XSSCharFinder(object):
# Just make them useless by entering empty tag and putting them at the end of the lxml matches # Just make them useless by entering empty tag and putting them at the end of the lxml matches
# so a split at tag won't find anything # so a split at tag won't find anything
if not tag_index: if not tag_index:
print ' '*36+'ERROR: Error: could not find tag index location. Element does not exist in root doc.' print (' '*36+'ERROR: Error: could not find tag index location. Element does not exist in root doc.')
tag_index = 999999999 tag_index = 999999999
tag = '' tag = ''
loc_tag = (tag_index, tag) loc_tag = (tag_index, tag)