Files
VulnDataExtr/main.py
2025-01-13 18:03:57 +08:00

45 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding:utf-8 -*-
import json
import re
from pathlib import Path
def extract_json_from_html(input_file, output_file):
"""
从HTML文件中提取JSON数据并保存
Args:
input_file (str): 输入HTML文件路径
output_file (str): 输出JSON文件路径
Returns:
bool: 是否成功提取
"""
try:
with open(input_file, encoding='utf8') as f:
file_content = f.read()
pat_list = re.findall(r'<script>window.data = (.*?);</script>', file_content)
if not pat_list:
return False
data_json = json.loads(pat_list[0])
with open(output_file, 'w', encoding='utf8') as f:
json.dump(data_json, f, ensure_ascii=False, indent=4)
return True
except Exception as e:
raise Exception(f"处理文件时出错: {str(e)}")
if __name__ == "__main__":
# 命令行方式运行时的代码
input_file = "index.html"
output_file = "data.json"
if extract_json_from_html(input_file, output_file):
print(f"JSON数据已保存到: {output_file}")
else:
print("未找到匹配的JSON数据")