Added cookie support
This commit is contained in:
@@ -21,6 +21,11 @@ If you wish to login with HTTP Basic Auth then crawl:
|
||||
./xsscrapy.py -u http://example.com/login_page -l loginname --basic
|
||||
```
|
||||
|
||||
If you wish to use cookies:
|
||||
```shell
|
||||
./xsscrapy.py -u http://example.com/login_page --cookie "SessionID=abcdef1234567890"
|
||||
```
|
||||
|
||||
If you wish to limit simultaneous connections to 20:
|
||||
```shell
|
||||
./xsscrapy.py -u http://example.com -c 20
|
||||
|
||||
@@ -19,6 +19,7 @@ def get_args():
|
||||
parser.add_argument('-c', '--connections', default='30', help="Set the max number of simultaneous connections allowed, default=30")
|
||||
parser.add_argument('-r', '--ratelimit', default='0', help="Rate in requests per minute, default=0")
|
||||
parser.add_argument('--basic', help="Use HTTP Basic Auth to login", action="store_true")
|
||||
parser.add_argument('-k', '--cookie',help="Cookie key; --cookie SessionID=afgh3193e9103bca9318031bcdf")
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
@@ -28,9 +29,12 @@ def main():
|
||||
if rate not in [None, '0']:
|
||||
rate = str(60 / float(rate))
|
||||
try:
|
||||
cookie_key = args.cookie.split('=',1)[0] if args.cookie else None
|
||||
cookie_value = ''.join(args.cookie.split('=',1)[1:]) if args.cookie else None
|
||||
execute(['scrapy', 'crawl', 'xsscrapy',
|
||||
'-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
|
||||
'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
|
||||
'-a', 'cookie_key=%s' % cookie_key, '-a', 'cookie_value=%s' % cookie_value,
|
||||
'-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
|
||||
'-s', 'DOWNLOAD_DELAY=%s' % rate])
|
||||
except KeyboardInterrupt:
|
||||
|
||||
@@ -45,13 +45,24 @@ class XSSspider(CrawlSpider):
|
||||
# gruyere or the second cookie delim
|
||||
self.test_str = '\'"(){}<x>:/'
|
||||
|
||||
# Login details
|
||||
# Login details. Either user or cookie
|
||||
self.login_user = kwargs.get('user')
|
||||
self.login_cookie_key = kwargs.get('cookie_key')
|
||||
self.login_cookie_value = kwargs.get('cookie_value')
|
||||
|
||||
# Turn Nones to Nones
|
||||
if self.login_user == 'None':
|
||||
self.login_user = None
|
||||
else:
|
||||
# Don't hit links with 'logout' in them since self.login_user exists
|
||||
if self.login_cookie_key == 'None':
|
||||
self.login_cookie_key = None
|
||||
if self.login_cookie_value == 'None':
|
||||
self.login_cookie_value = None
|
||||
|
||||
if self.login_user or (self.login_cookie_key and self.login_cookie_value):
|
||||
# Don't hit links with 'logout' in them since self.login_user or cookies exists
|
||||
self.rules = (Rule(LinkExtractor(deny=('logout')), callback='parse_resp', follow=True), )
|
||||
|
||||
# If password is not set and login user is then get password, otherwise set it
|
||||
if kwargs.get('pw') == 'None' and self.login_user is not None:
|
||||
self.login_pass = raw_input("Please enter the password: ")
|
||||
else:
|
||||
@@ -83,11 +94,24 @@ class XSSspider(CrawlSpider):
|
||||
otherwise pass it to the normal callback function '''
|
||||
if self.login_user and self.login_pass:
|
||||
if self.basic_auth == 'true':
|
||||
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
|
||||
# Take out the callback arg so crawler falls back to the rules' callback
|
||||
if self.login_cookie_key and self.login_cookie_value:
|
||||
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
|
||||
else:
|
||||
yield Request(url=self.start_urls[0])
|
||||
else:
|
||||
yield Request(url=self.start_urls[0], callback=self.login)
|
||||
if self.login_cookie_key and self.login_cookie_value:
|
||||
yield Request(url=self.start_urls[0],
|
||||
cookies={self.login_cookie_key: self.login_cookie_value},
|
||||
callback=self.login)
|
||||
else:
|
||||
yield Request(url=self.start_urls[0], callback=self.login)
|
||||
else:
|
||||
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
|
||||
# Take out the callback arg so crawler falls back to the rules' callback
|
||||
if self.login_cookie_key and self.login_cookie_value:
|
||||
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
|
||||
else:
|
||||
yield Request(url=self.start_urls[0])
|
||||
|
||||
def login(self, response):
|
||||
''' Fill out the login form and return the request'''
|
||||
|
||||
Reference in New Issue
Block a user