first commit
This commit is contained in:
15
.gitignore
vendored
Normal file
15
.gitignore
vendored
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# 忽略构建产物目录
|
||||||
|
/build/
|
||||||
|
/dist/
|
||||||
|
/venv/
|
||||||
|
# 忽略日志文件
|
||||||
|
*.log
|
||||||
|
|
||||||
|
# 忽略一些说明文档
|
||||||
|
*.txt
|
||||||
|
|
||||||
|
# 忽略源文件
|
||||||
|
*.html
|
||||||
|
|
||||||
|
# 忽略测试输出的文件
|
||||||
|
*.json
|
||||||
22
main.py
Normal file
22
main.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
# -*- coding:utf-8 -*-
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def htmlToJson():
|
||||||
|
with open('index.html', encoding='utf8') as f:
|
||||||
|
fileContent = f.read()
|
||||||
|
|
||||||
|
patList = re.findall(r'<script>window.data = (.*?);</script><title></title>', fileContent)
|
||||||
|
data_Json = json.loads(patList[0])
|
||||||
|
print(type(data_Json))
|
||||||
|
print(data_Json)
|
||||||
|
return data_Json
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
data = htmlToJson()
|
||||||
|
with open('data.json', 'w', encoding='utf8') as file:
|
||||||
|
# 使用json.dump()方法将字典转换为json并写入文件
|
||||||
|
# ensure_ascii=False 保证非ASCII字符以原始形式保存
|
||||||
|
# indent=4 设置缩进为4个空格,使输出更加易读
|
||||||
|
json.dump(data, file, ensure_ascii=False, indent=4)
|
||||||
Reference in New Issue
Block a user