skip non-webpage urls

This commit is contained in:
Somdev Sangwan
2024-04-01 09:03:27 +05:30
parent b9b6237e79
commit 538d5608a7
2 changed files with 5 additions and 2 deletions

View File

@@ -77,6 +77,9 @@ def stable_request(url, headers):
verify=False,
timeout=10,
allow_redirects=redirects_allowed)
if not response.headers.get('Content-Type', '').startswith('text/'):
print('%s URL doesn\'t seem to be a webpage. Skipping.' % info)
return None
return response.url
except Exception as e:
if 'ConnectionError' not in str(e):