Compare commits

...

10 Commits

Author SHA1 Message Date
Dan McInerney
49662557ca Merge pull request #72 from Deadpool2000/master
Some bug fixed
2022-04-25 12:15:45 -04:00
D3adpool2K
a1fb45f783 Update bloomfilters.py 2022-04-25 17:55:58 +05:30
D3adpool2K
efa8f13e79 Update requirements.txt 2022-04-25 17:47:04 +05:30
D3adpool2K
ac457a4e8d Update middlewares.py 2022-04-25 17:46:06 +05:30
D3adpool2K
fb7ac11ae7 Update pipelines.py
fix
2022-04-25 17:45:02 +05:30
D3adpool2K
9f910d838d Update bloomfilters.py
bug fixed
2022-04-25 17:42:59 +05:30
Dan McInerney
2d142844f4 Merge pull request #58 from shelld3v/patch-1
Bug fix
2022-01-14 10:31:31 -07:00
Dan McInerney
e1d8ff73fa Merge pull request #61 from fardeen-ahmed/master
Updated a Critical Part in README file
2022-01-14 10:30:58 -07:00
Fardeen Ahmed
3dbadfc78a Update README.md 2021-02-05 10:14:29 +00:00
shelld3v
b8def30f55 Bug fix 2020-12-27 10:41:50 +07:00
5 changed files with 12 additions and 12 deletions

View File

@@ -43,7 +43,7 @@ XSS vulnerabilities are reported in xsscrapy-vulns.txt
Dependencies
-------
``` shell
wget -O https://bootstrap.pypa.io/get-pip.py
wget -O -u https://bootstrap.pypa.io/get-pip.py
python get-pip.py
pip install -r requirements.txt
```

View File

@@ -1,7 +1,7 @@
Scrapy==1.1.0rc3
pybloom==1.1
scrapy
pybloomfilter
requests
beautifulsoup
bs4
twisted==16.6.0
w3lib
lxml

View File

@@ -1,7 +1,7 @@
from pybloom import BloomFilter
from pybloomfilter import BloomFilter
from scrapy.utils.job import job_dir
from scrapy.dupefilters import BaseDupeFilter
from settings import bloomfilterSize
from xsscrapy.settings import bloomfilterSize
class BloomURLDupeFilter(BaseDupeFilter):
"""Request Fingerprint duplicates filter"""

View File

@@ -1,9 +1,9 @@
from scrapy.exceptions import IgnoreRequest
from urlparse import unquote
from pybloom import BloomFilter
from urllib.parse import unquote
from pybloomfilter import BloomFilter
import random
import re
from settings import bloomfilterSize
from xsscrapy.settings import bloomfilterSize
# Filter out duplicate requests with Bloom filters since they're much easier on memory
#URLS_FORMS_HEADERS = BloomFilter(3000000, 0.00001)

View File

@@ -3,7 +3,7 @@
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
from scrapy.exceptions import DropItem
import HTMLParser
from html.parser import HTMLParser
from xsscrapy.items import vuln#, inj_resp
import re
import lxml.etree
@@ -12,7 +12,7 @@ from lxml.html import soupparser, fromstring
import itertools
#from IPython import embed
from socket import gaierror, gethostbyname
from urlparse import urlparse
from urllib.parse import urlparse
from logging import CRITICAL, ERROR, WARNING, INFO, DEBUG
class XSSCharFinder(object):
@@ -839,7 +839,7 @@ class XSSCharFinder(object):
# Just make them useless by entering empty tag and putting them at the end of the lxml matches
# so a split at tag won't find anything
if not tag_index:
print ' '*36+'ERROR: Error: could not find tag index location. Element does not exist in root doc.'
print (' '*36+'ERROR: Error: could not find tag index location. Element does not exist in root doc.')
tag_index = 999999999
tag = ''
loc_tag = (tag_index, tag)