feat: 完成模型训练/评估/Web大屏/LaTeX论文框架

- LSTM-Attention模型(983K参数) + XGBoost基线
- Flask API后端(4端点) + ECharts可视化大屏(6面板)
- LaTeX学位论文完整框架(7章+参考文献)
- ERA5下载脚本(CDS逐月并行下载)
- README项目文档

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-26 21:01:42 +08:00
parent eeab4d1330
commit 07468266b4
19 changed files with 2730 additions and 69 deletions
+51 -69
View File
@@ -1,32 +1,19 @@
"""从 Copernicus CDS 下载 ERA5-Land 再分析数据"""
"""从 Copernicus CDS 下载 ERA5-Land 再分析数据(逐月,支持并行)"""
import logging
import time
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import cdsapi
from src.utils.config import (
CITIES,
DATA_RAW,
ERA5_END_YEAR,
ERA5_START_YEAR,
ERA5_VARIABLES,
CITIES, DATA_RAW, ERA5_START_YEAR, ERA5_END_YEAR, ERA5_VARIABLES,
)
logger = logging.getLogger(__name__)
def build_request(city: str, year: int, month: int) -> dict:
"""构建 CDS API 请求参数,提取城市周围 0.5 度区域
Args:
city: 城市键名("jiaozuo""zhengzhou"
year: 年份
month: 月份(1-12),0 表示全年所有月份
Returns:
CDS API 请求参数字典
"""
lat = CITIES[city]["lat"]
lon = CITIES[city]["lon"]
return {
@@ -34,67 +21,62 @@ def build_request(city: str, year: int, month: int) -> dict:
"format": "netcdf",
"variable": ERA5_VARIABLES,
"year": [str(year)],
"month": [f"{m:02d}" for m in (range(1, 13) if month == 0 else [month])],
"month": [f"{month:02d}"],
"day": [f"{d:02d}" for d in range(1, 32)],
"time": [f"{h:02d}:00" for h in range(24)],
"area": [lat + 0.5, lon - 0.5, lat - 0.5, lon + 0.5], # [N, W, S, E]
"time": [f"{h:02d}:00" for h in [0, 6, 12, 18]],
"area": [lat + 0.5, lon - 0.5, lat - 0.5, lon + 0.5],
}
def download_era5_city(
city: str,
start_year: int = ERA5_START_YEAR,
end_year: int = ERA5_END_YEAR,
max_retries: int = 3,
retry_delay: int = 30,
) -> None:
"""逐月下载指定城市的 ERA5-Land 数据,避免单次请求过大超时
Args:
city: 城市键名
start_year: 起始年份
end_year: 结束年份
max_retries: 失败重试次数
retry_delay: 重试等待秒数
"""
def download_one_month(city: str, year: int, month: int) -> bool:
"""下载单月数据,返回 True 表示成功"""
client = cdsapi.Client()
out_dir = Path(DATA_RAW) / "era5" / city
out_dir.mkdir(parents=True, exist_ok=True)
out_path = out_dir / f"era5_{city}_{year}_{month:02d}.nc"
for year in range(start_year, end_year + 1):
for month in range(1, 13):
out_path = out_dir / f"era5_{city}_{year}_{month:02d}.nc"
if out_path.exists():
logger.info("跳过已存在: %s", out_path)
continue
if out_path.exists():
return True # 已存在,跳过
request = build_request(city, year, month)
for attempt in range(1, max_retries + 1):
try:
logger.info(
"正在下载 %s %d-%02d (第 %d/%d 次尝试)...",
city, year, month, attempt, max_retries,
)
client.retrieve(
"reanalysis-era5-land",
request,
str(out_path),
)
logger.info("下载完成: %s", out_path)
break
except Exception:
logger.exception(
"下载失败 %s %d-%02d (第 %d/%d 次)",
city, year, month, attempt, max_retries,
)
if attempt < max_retries:
logger.info("等待 %d 秒后重试...", retry_delay)
time.sleep(retry_delay)
else:
logger.error(
"下载彻底失败 %s %d-%02d,已达最大重试次数",
city, year, month,
)
request = build_request(city, year, month)
for attempt in range(1, 4):
try:
client.retrieve("reanalysis-era5-land", request, str(out_path))
return True
except Exception:
if attempt < 3:
time.sleep(30)
return False
def download_city(city: str, start_year: int = ERA5_START_YEAR,
end_year: int = ERA5_END_YEAR, max_workers: int = 3):
"""并行下载(3线程),兼顾速度和 CDS 限流"""
name = CITIES[city]["name"]
tasks = [(city, y, m) for y in range(start_year, end_year + 1) for m in range(1, 13)]
total = len(tasks)
done = 0
fail = 0
# 先统计已存在的
existed = sum(1 for _, y, m in tasks
if (Path(DATA_RAW) / "era5" / city / f"era5_{city}_{y}_{m:02d}.nc").exists())
if existed > 0:
logger.info("%s: %d/%d 已存在,跳过", name, existed, total)
done = existed
with ThreadPoolExecutor(max_workers=max_workers) as pool:
futures = {pool.submit(download_one_month, c, y, m): (y, m)
for c, y, m in tasks if not (Path(DATA_RAW) / "era5" / city
/ f"era5_{c}_{y}_{m:02d}.nc").exists()}
for f in as_completed(futures):
y, m = futures[f]
if f.result():
done += 1
else:
fail += 1
if (done + fail) % 10 == 0 or (done + fail) == (total - existed):
logger.info("%s: %d/%d 完成 (%d 失败)", name, done + existed, total, fail)
if __name__ == "__main__":
@@ -103,4 +85,4 @@ if __name__ == "__main__":
format="%(asctime)s [%(levelname)s] %(message)s",
)
for city_name in CITIES:
download_era5_city(city_name)
download_city(city_name)