Added cookie support

This commit is contained in:
Charlie
2016-04-17 02:52:12 -05:00
parent fb628efbf0
commit 4bb032d53e
3 changed files with 42 additions and 9 deletions

View File

@@ -21,6 +21,11 @@ If you wish to login with HTTP Basic Auth then crawl:
./xsscrapy.py -u http://example.com/login_page -l loginname --basic
```
If you wish to use cookies:
```shell
./xsscrapy.py -u http://example.com/login_page --cookie "SessionID=abcdef1234567890"
```
If you wish to limit simultaneous connections to 20:
```shell
./xsscrapy.py -u http://example.com -c 20

View File

@@ -19,6 +19,7 @@ def get_args():
parser.add_argument('-c', '--connections', default='30', help="Set the max number of simultaneous connections allowed, default=30")
parser.add_argument('-r', '--ratelimit', default='0', help="Rate in requests per minute, default=0")
parser.add_argument('--basic', help="Use HTTP Basic Auth to login", action="store_true")
parser.add_argument('-k', '--cookie',help="Cookie key; --cookie SessionID=afgh3193e9103bca9318031bcdf")
args = parser.parse_args()
return args
@@ -28,9 +29,12 @@ def main():
if rate not in [None, '0']:
rate = str(60 / float(rate))
try:
execute(['scrapy', 'crawl', 'xsscrapy',
'-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
cookie_key = args.cookie.split('=',1)[0] if args.cookie else None
cookie_value = ''.join(args.cookie.split('=',1)[1:]) if args.cookie else None
execute(['scrapy', 'crawl', 'xsscrapy',
'-a', 'url=%s' % args.url, '-a', 'user=%s' % args.login, '-a',
'pw=%s' % args.password, '-a', 'basic=%s' % args.basic,
'-a', 'cookie_key=%s' % cookie_key, '-a', 'cookie_value=%s' % cookie_value,
'-s', 'CONCURRENT_REQUESTS=%s' % args.connections,
'-s', 'DOWNLOAD_DELAY=%s' % rate])
except KeyboardInterrupt:

View File

@@ -45,13 +45,24 @@ class XSSspider(CrawlSpider):
# gruyere or the second cookie delim
self.test_str = '\'"(){}<x>:/'
# Login details
# Login details. Either user or cookie
self.login_user = kwargs.get('user')
self.login_cookie_key = kwargs.get('cookie_key')
self.login_cookie_value = kwargs.get('cookie_value')
# Turn Nones to Nones
if self.login_user == 'None':
self.login_user = None
else:
# Don't hit links with 'logout' in them since self.login_user exists
if self.login_cookie_key == 'None':
self.login_cookie_key = None
if self.login_cookie_value == 'None':
self.login_cookie_value = None
if self.login_user or (self.login_cookie_key and self.login_cookie_value):
# Don't hit links with 'logout' in them since self.login_user or cookies exists
self.rules = (Rule(LinkExtractor(deny=('logout')), callback='parse_resp', follow=True), )
# If password is not set and login user is then get password, otherwise set it
if kwargs.get('pw') == 'None' and self.login_user is not None:
self.login_pass = raw_input("Please enter the password: ")
else:
@@ -83,11 +94,24 @@ class XSSspider(CrawlSpider):
otherwise pass it to the normal callback function '''
if self.login_user and self.login_pass:
if self.basic_auth == 'true':
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
# Take out the callback arg so crawler falls back to the rules' callback
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
else:
yield Request(url=self.start_urls[0])
else:
yield Request(url=self.start_urls[0], callback=self.login)
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0],
cookies={self.login_cookie_key: self.login_cookie_value},
callback=self.login)
else:
yield Request(url=self.start_urls[0], callback=self.login)
else:
yield Request(url=self.start_urls[0]) # Take out the callback arg so crawler falls back to the rules' callback
# Take out the callback arg so crawler falls back to the rules' callback
if self.login_cookie_key and self.login_cookie_value:
yield Request(url=self.start_urls[0], cookies={self.login_cookie_key: self.login_cookie_value})
else:
yield Request(url=self.start_urls[0])
def login(self, response):
''' Fill out the login form and return the request'''