23 lines
732 B
Python
23 lines
732 B
Python
# -*- coding:utf-8 -*-
|
||
import json
|
||
import re
|
||
|
||
|
||
def htmlToJson():
|
||
with open('index.html', encoding='utf8') as f:
|
||
fileContent = f.read()
|
||
|
||
patList = re.findall(r'<script>window.data = (.*?);</script><title></title>', fileContent)
|
||
data_Json = json.loads(patList[0])
|
||
print(type(data_Json))
|
||
print(data_Json)
|
||
return data_Json
|
||
|
||
if __name__ == "__main__":
|
||
data = htmlToJson()
|
||
with open('data.json', 'w', encoding='utf8') as file:
|
||
# 使用json.dump()方法将字典转换为json并写入文件
|
||
# ensure_ascii=False 保证非ASCII字符以原始形式保存
|
||
# indent=4 设置缩进为4个空格,使输出更加易读
|
||
json.dump(data, file, ensure_ascii=False, indent=4)
|