delete main.py
This commit is contained in:
44
main.py
44
main.py
@@ -1,44 +0,0 @@
|
|||||||
# -*- coding:utf-8 -*-
|
|
||||||
import json
|
|
||||||
import re
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
def extract_json_from_html(input_file, output_file):
|
|
||||||
"""
|
|
||||||
从HTML文件中提取JSON数据并保存
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_file (str): 输入HTML文件路径
|
|
||||||
output_file (str): 输出JSON文件路径
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bool: 是否成功提取
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
with open(input_file, encoding='utf8') as f:
|
|
||||||
file_content = f.read()
|
|
||||||
|
|
||||||
pat_list = re.findall(r'<script>window.data = (.*?);</script>', file_content)
|
|
||||||
|
|
||||||
if not pat_list:
|
|
||||||
return False
|
|
||||||
|
|
||||||
data_json = json.loads(pat_list[0])
|
|
||||||
|
|
||||||
with open(output_file, 'w', encoding='utf8') as f:
|
|
||||||
json.dump(data_json, f, ensure_ascii=False, indent=4)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"处理文件时出错: {str(e)}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# 命令行方式运行时的代码
|
|
||||||
input_file = "index.html"
|
|
||||||
output_file = "data.json"
|
|
||||||
|
|
||||||
if extract_json_from_html(input_file, output_file):
|
|
||||||
print(f"JSON数据已保存到: {output_file}")
|
|
||||||
else:
|
|
||||||
print("未找到匹配的JSON数据!")
|
|
||||||
Reference in New Issue
Block a user