Files
XSS_Scanner/xss_scanner/utils/tech_detector.py
2025-03-09 13:49:12 +08:00

384 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
技术检测模块,用于识别网页使用的技术、框架和编程语言
"""
import re
import logging
import json
from urllib.parse import urlparse
from bs4 import BeautifulSoup
logger = logging.getLogger('xss_scanner')
class TechDetector:
"""网站技术检测类,用于识别网站使用的技术栈"""
def __init__(self):
"""初始化技术检测器"""
# 前端框架特征
self.frontend_frameworks = {
'React': [
('script', {'src': re.compile(r'react(-|\.min\.)?\.js')}),
('script', {'src': re.compile(r'react-dom(-|\.min\.)?\.js')}),
('meta', {'name': 'generator', 'content': re.compile(r'react', re.I)}),
('div', {'id': 'root'}),
('div', {'id': 'app'}),
('meta', {'name': 'next-head-count'}),
('code', {'id': '__NEXT_DATA__'})
],
'Vue.js': [
('script', {'src': re.compile(r'vue(-|\.min\.)?\.js')}),
('div', {'id': 'app'}),
('div', {'id': 'vue-app'}),
('div', {'class': 'v-application'}),
('meta', {'name': 'generator', 'content': re.compile(r'vue', re.I)})
],
'Angular': [
('script', {'src': re.compile(r'angular(-|\.min\.)?\.js')}),
('*', {'ng-app': re.compile(r'.*')}),
('*', {'ng-controller': re.compile(r'.*')}),
('*', {'ng-repeat': re.compile(r'.*')}),
('*', {'ng-bind': re.compile(r'.*')}),
('*', {'ng-model': re.compile(r'.*')})
],
'jQuery': [
('script', {'src': re.compile(r'jquery(-|\.min\.)?\.js')}),
],
'Bootstrap': [
('link', {'href': re.compile(r'bootstrap(-|\.min\.)?\.css')}),
('script', {'src': re.compile(r'bootstrap(-|\.min\.)?\.js')}),
('div', {'class': re.compile(r'container(-fluid)?')}),
('div', {'class': re.compile(r'row')}),
('div', {'class': re.compile(r'col(-[a-z]+-[0-9]+)?')})
]
}
# 后端框架和语言特征
self.backend_technologies = {
'PHP': [
('X-Powered-By', re.compile(r'PHP/?', re.I)),
('Set-Cookie', re.compile(r'PHPSESSID', re.I)),
('link', {'href': re.compile(r'\.php')}),
('a', {'href': re.compile(r'\.php')}),
('form', {'action': re.compile(r'\.php')})
],
'WordPress': [
('meta', {'name': 'generator', 'content': re.compile(r'WordPress', re.I)}),
('link', {'href': re.compile(r'wp-content')}),
('script', {'src': re.compile(r'wp-includes')}),
('link', {'rel': 'https://api.w.org/'}),
('meta', {'property': 'og:site_name'}),
('body', {'class': re.compile(r'wordpress')})
],
'Laravel': [
('input', {'name': '_token'}),
('meta', {'name': 'csrf-token'}),
('script', {'src': re.compile(r'vendor/laravel')}),
('Set-Cookie', re.compile(r'laravel_session', re.I))
],
'Django': [
('input', {'name': 'csrfmiddlewaretoken'}),
('meta', {'name': 'csrf-token'}),
('X-Frame-Options', 'SAMEORIGIN')
],
'Flask': [
('form', {'action': re.compile(r'\/[a-z0-9_]+\/?')}),
('Set-Cookie', re.compile(r'session=', re.I))
],
'Python': [
('Server', re.compile(r'(Python|Werkzeug|Django|Tornado|Flask|CherryPy)', re.I)),
('X-Powered-By', re.compile(r'(Python|Werkzeug|Django|Tornado|Flask|CherryPy)', re.I))
],
'ASP.NET': [
('X-Powered-By', re.compile(r'ASP\.NET', re.I)),
('X-AspNet-Version', re.compile(r'.*')),
('Set-Cookie', re.compile(r'ASP\.NET_SessionId', re.I)),
('form', {'action': re.compile(r'\.aspx')}),
('input', {'name': '__VIEWSTATE'})
],
'Node.js': [
('X-Powered-By', re.compile(r'Express', re.I)),
('Set-Cookie', re.compile(r'connect\.sid', re.I))
],
'Ruby on Rails': [
('X-Powered-By', re.compile(r'Phusion Passenger|Ruby|Rails', re.I)),
('Set-Cookie', re.compile(r'_session_id', re.I)),
('meta', {'name': 'csrf-param', 'content': 'authenticity_token'})
],
'Java': [
('X-Powered-By', re.compile(r'(JSP|Servlet|Tomcat|JBoss|GlassFish|WebLogic|WebSphere|Jetty)', re.I)),
('Server', re.compile(r'(Tomcat|JBoss|GlassFish|WebLogic|WebSphere|Jetty)', re.I)),
('Set-Cookie', re.compile(r'JSESSIONID', re.I))
],
'Go': [
('Server', re.compile(r'(go httpserver)', re.I)),
('X-Powered-By', re.compile(r'(go|gin|echo)', re.I))
]
}
# 服务器特征
self.server_technologies = {
'Nginx': [
('Server', re.compile(r'nginx', re.I))
],
'Apache': [
('Server', re.compile(r'apache', re.I))
],
'IIS': [
('Server', re.compile(r'IIS', re.I))
],
'LiteSpeed': [
('Server', re.compile(r'LiteSpeed', re.I))
],
'Cloudflare': [
('Server', re.compile(r'cloudflare', re.I)),
('CF-RAY', re.compile(r'.*')),
('CF-Cache-Status', re.compile(r'.*'))
],
'Varnish': [
('X-Varnish', re.compile(r'.*')),
('X-Varnish-Cache', re.compile(r'.*'))
]
}
# WAF特征
self.waf_technologies = {
'Cloudflare': [
('Server', re.compile(r'cloudflare', re.I)),
('CF-RAY', re.compile(r'.*'))
],
'ModSecurity': [
('Server', re.compile(r'mod_security', re.I)),
('X-Mod-Security', re.compile(r'.*'))
],
'Sucuri': [
('X-Sucuri-ID', re.compile(r'.*')),
('X-Sucuri-Cache', re.compile(r'.*'))
],
'Imperva': [
('X-Iinfo', re.compile(r'.*')),
('Set-Cookie', re.compile(r'incap_ses', re.I))
],
'Akamai': [
('X-Akamai-Transformed', re.compile(r'.*')),
('Set-Cookie', re.compile(r'ak_bmsc', re.I))
],
'F5 BIG-IP': [
('Set-Cookie', re.compile(r'BIGipServer', re.I)),
('Server', re.compile(r'BigIP', re.I))
],
'Barracuda': [
('Set-Cookie', re.compile(r'barra_counter_session', re.I))
]
}
def detect(self, response, content=None):
"""
检测网页使用的技术
Args:
response: HTTP响应对象
content: HTML内容(可选)
Returns:
dict: 检测到的技术信息
"""
if not response:
return {}
results = {
'frontend': [],
'backend': [],
'server': [],
'waf': []
}
# 提取HTML内容
html_content = content or response.text
# 解析HTML
try:
soup = BeautifulSoup(html_content, 'html.parser')
except Exception as e:
logger.error(f"解析HTML时发生错误: {str(e)}")
soup = None
# 检测前端框架
if soup:
for framework, patterns in self.frontend_frameworks.items():
for tag_name, attrs in patterns:
elements = soup.find_all(tag_name, attrs)
if elements:
if framework not in results['frontend']:
results['frontend'].append(framework)
break
# 检测后端技术
headers = response.headers
# 基于HTTP头的检测
for tech, patterns in self.backend_technologies.items():
for header_name, pattern in patterns:
if header_name in headers:
if isinstance(pattern, re.Pattern) and pattern.search(headers[header_name]):
if tech not in results['backend']:
results['backend'].append(tech)
break
# 基于HTML的后端技术检测
if soup:
for tech, patterns in self.backend_technologies.items():
if tech in results['backend']:
continue
for tag_name, attrs in patterns:
if tag_name in ['link', 'a', 'form', 'input', 'meta', 'script', 'body']:
elements = soup.find_all(tag_name, attrs)
if elements:
if tech not in results['backend']:
results['backend'].append(tech)
break
# 检测服务器技术
for server, patterns in self.server_technologies.items():
for header_name, pattern in patterns:
if header_name in headers:
if isinstance(pattern, re.Pattern) and pattern.search(headers[header_name]):
if server not in results['server']:
results['server'].append(server)
break
# 检测WAF
for waf, patterns in self.waf_technologies.items():
for header_name, pattern in patterns:
if header_name in headers:
if isinstance(pattern, re.Pattern) and pattern.search(headers[header_name]):
if waf not in results['waf']:
results['waf'].append(waf)
break
# 添加详细检测信息
self._enhance_detection(results, soup, headers)
return results
def _enhance_detection(self, results, soup, headers):
"""
增强检测,添加更多详细信息
Args:
results: 已检测的结果
soup: BeautifulSoup对象
headers: HTTP响应头
"""
# 检测JavaScript库的版本
if soup:
# 检测React版本
if 'React' in results['frontend']:
script_tags = soup.find_all('script')
for script in script_tags:
if script.string and 'React.version' in script.string:
version_match = re.search(r'React.version\s*=\s*[\'"]([^\'"]+)[\'"]', script.string)
if version_match:
results['frontend'].remove('React')
results['frontend'].append(f"React {version_match.group(1)}")
break
# 检测Angular版本
if 'Angular' in results['frontend']:
for script in soup.find_all('script'):
if script.string and 'angular.version' in script.string:
version_match = re.search(r'angular.version\s*=\s*\{[^\}]*full:\s*[\'"]([^\'"]+)[\'"]', script.string)
if version_match:
results['frontend'].remove('Angular')
results['frontend'].append(f"Angular {version_match.group(1)}")
break
# WordPress版本
if 'WordPress' in results['backend']:
meta_tags = soup.find_all('meta', {'name': 'generator'})
for meta in meta_tags:
content = meta.get('content', '')
if 'WordPress' in content:
version_match = re.search(r'WordPress\s*([0-9\.]+)', content)
if version_match:
results['backend'].remove('WordPress')
results['backend'].append(f"WordPress {version_match.group(1)}")
break
# 检测服务器版本
if 'Server' in headers:
server_header = headers['Server']
# Nginx版本
if 'Nginx' in results['server']:
version_match = re.search(r'nginx/([0-9\.]+)', server_header, re.I)
if version_match:
results['server'].remove('Nginx')
results['server'].append(f"Nginx {version_match.group(1)}")
# Apache版本
elif 'Apache' in results['server']:
version_match = re.search(r'Apache/([0-9\.]+)', server_header, re.I)
if version_match:
results['server'].remove('Apache')
results['server'].append(f"Apache {version_match.group(1)}")
def get_waf_bypass_techniques(self, detected_waf):
"""
根据检测到的WAF返回可能的绕过技术
Args:
detected_waf: 检测到的WAF列表
Returns:
dict: WAF绕过技术
"""
bypass_techniques = {}
for waf in detected_waf:
if waf == 'Cloudflare':
bypass_techniques['Cloudflare'] = [
'使用不同的编码方式: HTML, URL, Unicode, Base64等',
'利用换行符分割XSS Payload',
'使用JavaScript事件处理程序的大小写混合形式',
'使用不同的HTML标签避免常见的如script, img, iframe',
'尝试使用较少被检测的事件如onmouseover, onerror, onwheel等'
]
elif waf == 'ModSecurity':
bypass_techniques['ModSecurity'] = [
'使用JavaScript事件处理程序的不同形式',
'使用HTML实体编码',
'分割Payload: < s c r i p t >',
'使用JavaScript的eval函数和字符串操作函数',
'使用CSS注入配合XSS'
]
elif waf == 'Imperva':
bypass_techniques['Imperva'] = [
'使用JavaScript原型链污染技术',
'避免使用关键词(alert, document.cookie等)',
'使用JavaScript的间接调用方法',
'使用多层编码: URL编码 + HTML编码 + Unicode编码',
'利用长字符串和重复字符迷惑WAF规则'
]
elif waf == 'F5 BIG-IP':
bypass_techniques['F5 BIG-IP'] = [
'使用非标准事件处理程序',
'DOM XSS手法通常更能绕过F5的防护',
'使用JavaScript模板字符串',
'使用JavaScript的Function构造函数',
'利用特定浏览器的解析差异'
]
elif waf == 'Akamai':
bypass_techniques['Akamai'] = [
'利用JavaScript的变量和函数名混淆',
'使用CDATA和注释规避特征检测',
'避免直接使用javascript:伪协议',
'使用data:text/html;base64,...编码',
'利用JavaScript中的字符串拼接和动态执行'
]
return bypass_techniques