fix covid_stats_via_xpath.py (#12975)

* fix covid_stats_via_xpath.py

Improve error handling.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix covid_stats_via_xpath.py typo

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix ruff

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* upgrade covid_stats_via_xpath.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Update and fix covid_stats_via_xpath.py

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
lighting9999
2025-09-18 21:55:59 +08:00
committed by GitHub
parent 0ee534edde
commit 4ec71a303b

View File

@@ -1,7 +1,8 @@
""" """
This is to show simple COVID19 info fetching from worldometers archive site using lxml This script demonstrates fetching simple COVID-19 statistics from the
* The main motivation to use lxml in place of bs4 is that it is faster and therefore Worldometers archive site using lxml. lxml is chosen over BeautifulSoup
more convenient to use in Python web projects (e.g. Django or Flask-based) for its speed and convenience in Python web projects (such as Django or
Flask).
""" """
# /// script # /// script
@@ -25,15 +26,34 @@ class CovidData(NamedTuple):
def covid_stats( def covid_stats(
url: str = "https://web.archive.org/web/20250825095350/https://www.worldometers.info/coronavirus/", url: str = (
"https://web.archive.org/web/20250825095350/"
"https://www.worldometers.info/coronavirus/"
),
) -> CovidData: ) -> CovidData:
xpath_str = '//div[@class = "maincounter-number"]/span/text()' xpath_str = '//div[@class = "maincounter-number"]/span/text()'
return CovidData( try:
*html.fromstring(httpx.get(url, timeout=10).content).xpath(xpath_str) response = httpx.get(url, timeout=10).raise_for_status()
except httpx.TimeoutException:
print(
"Request timed out. Please check your network connection "
"or try again later."
) )
return CovidData("N/A", "N/A", "N/A")
except httpx.HTTPStatusError as e:
print(f"HTTP error occurred: {e}")
return CovidData("N/A", "N/A", "N/A")
data = html.fromstring(response.content).xpath(xpath_str)
if len(data) != 3:
print("Unexpected data format. The page structure may have changed.")
data = "N/A", "N/A", "N/A"
return CovidData(*data)
fmt = """Total COVID-19 cases in the world: {} if __name__ == "__main__":
Total deaths due to COVID-19 in the world: {} fmt = (
Total COVID-19 patients recovered in the world: {}""" "Total COVID-19 cases in the world: {}\n"
"Total deaths due to COVID-19 in the world: {}\n"
"Total COVID-19 patients recovered in the world: {}"
)
print(fmt.format(*covid_stats())) print(fmt.format(*covid_stats()))