feat: 完成模型训练/评估/Web大屏/LaTeX论文框架
- LSTM-Attention模型(983K参数) + XGBoost基线 - Flask API后端(4端点) + ECharts可视化大屏(6面板) - LaTeX学位论文完整框架(7章+参考文献) - ERA5下载脚本(CDS逐月并行下载) - README项目文档 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
+51
-69
@@ -1,32 +1,19 @@
|
||||
"""从 Copernicus CDS 下载 ERA5-Land 再分析数据"""
|
||||
"""从 Copernicus CDS 下载 ERA5-Land 再分析数据(逐月,支持并行)"""
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
import cdsapi
|
||||
|
||||
from src.utils.config import (
|
||||
CITIES,
|
||||
DATA_RAW,
|
||||
ERA5_END_YEAR,
|
||||
ERA5_START_YEAR,
|
||||
ERA5_VARIABLES,
|
||||
CITIES, DATA_RAW, ERA5_START_YEAR, ERA5_END_YEAR, ERA5_VARIABLES,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def build_request(city: str, year: int, month: int) -> dict:
|
||||
"""构建 CDS API 请求参数,提取城市周围 0.5 度区域
|
||||
|
||||
Args:
|
||||
city: 城市键名("jiaozuo" 或 "zhengzhou")
|
||||
year: 年份
|
||||
month: 月份(1-12),0 表示全年所有月份
|
||||
|
||||
Returns:
|
||||
CDS API 请求参数字典
|
||||
"""
|
||||
lat = CITIES[city]["lat"]
|
||||
lon = CITIES[city]["lon"]
|
||||
return {
|
||||
@@ -34,67 +21,62 @@ def build_request(city: str, year: int, month: int) -> dict:
|
||||
"format": "netcdf",
|
||||
"variable": ERA5_VARIABLES,
|
||||
"year": [str(year)],
|
||||
"month": [f"{m:02d}" for m in (range(1, 13) if month == 0 else [month])],
|
||||
"month": [f"{month:02d}"],
|
||||
"day": [f"{d:02d}" for d in range(1, 32)],
|
||||
"time": [f"{h:02d}:00" for h in range(24)],
|
||||
"area": [lat + 0.5, lon - 0.5, lat - 0.5, lon + 0.5], # [N, W, S, E]
|
||||
"time": [f"{h:02d}:00" for h in [0, 6, 12, 18]],
|
||||
"area": [lat + 0.5, lon - 0.5, lat - 0.5, lon + 0.5],
|
||||
}
|
||||
|
||||
|
||||
def download_era5_city(
|
||||
city: str,
|
||||
start_year: int = ERA5_START_YEAR,
|
||||
end_year: int = ERA5_END_YEAR,
|
||||
max_retries: int = 3,
|
||||
retry_delay: int = 30,
|
||||
) -> None:
|
||||
"""逐月下载指定城市的 ERA5-Land 数据,避免单次请求过大超时
|
||||
|
||||
Args:
|
||||
city: 城市键名
|
||||
start_year: 起始年份
|
||||
end_year: 结束年份
|
||||
max_retries: 失败重试次数
|
||||
retry_delay: 重试等待秒数
|
||||
"""
|
||||
def download_one_month(city: str, year: int, month: int) -> bool:
|
||||
"""下载单月数据,返回 True 表示成功"""
|
||||
client = cdsapi.Client()
|
||||
out_dir = Path(DATA_RAW) / "era5" / city
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
out_path = out_dir / f"era5_{city}_{year}_{month:02d}.nc"
|
||||
|
||||
for year in range(start_year, end_year + 1):
|
||||
for month in range(1, 13):
|
||||
out_path = out_dir / f"era5_{city}_{year}_{month:02d}.nc"
|
||||
if out_path.exists():
|
||||
logger.info("跳过已存在: %s", out_path)
|
||||
continue
|
||||
if out_path.exists():
|
||||
return True # 已存在,跳过
|
||||
|
||||
request = build_request(city, year, month)
|
||||
for attempt in range(1, max_retries + 1):
|
||||
try:
|
||||
logger.info(
|
||||
"正在下载 %s %d-%02d (第 %d/%d 次尝试)...",
|
||||
city, year, month, attempt, max_retries,
|
||||
)
|
||||
client.retrieve(
|
||||
"reanalysis-era5-land",
|
||||
request,
|
||||
str(out_path),
|
||||
)
|
||||
logger.info("下载完成: %s", out_path)
|
||||
break
|
||||
except Exception:
|
||||
logger.exception(
|
||||
"下载失败 %s %d-%02d (第 %d/%d 次)",
|
||||
city, year, month, attempt, max_retries,
|
||||
)
|
||||
if attempt < max_retries:
|
||||
logger.info("等待 %d 秒后重试...", retry_delay)
|
||||
time.sleep(retry_delay)
|
||||
else:
|
||||
logger.error(
|
||||
"下载彻底失败 %s %d-%02d,已达最大重试次数",
|
||||
city, year, month,
|
||||
)
|
||||
request = build_request(city, year, month)
|
||||
for attempt in range(1, 4):
|
||||
try:
|
||||
client.retrieve("reanalysis-era5-land", request, str(out_path))
|
||||
return True
|
||||
except Exception:
|
||||
if attempt < 3:
|
||||
time.sleep(30)
|
||||
return False
|
||||
|
||||
|
||||
def download_city(city: str, start_year: int = ERA5_START_YEAR,
|
||||
end_year: int = ERA5_END_YEAR, max_workers: int = 3):
|
||||
"""并行下载(3线程),兼顾速度和 CDS 限流"""
|
||||
name = CITIES[city]["name"]
|
||||
tasks = [(city, y, m) for y in range(start_year, end_year + 1) for m in range(1, 13)]
|
||||
total = len(tasks)
|
||||
done = 0
|
||||
fail = 0
|
||||
|
||||
# 先统计已存在的
|
||||
existed = sum(1 for _, y, m in tasks
|
||||
if (Path(DATA_RAW) / "era5" / city / f"era5_{city}_{y}_{m:02d}.nc").exists())
|
||||
if existed > 0:
|
||||
logger.info("%s: %d/%d 已存在,跳过", name, existed, total)
|
||||
done = existed
|
||||
|
||||
with ThreadPoolExecutor(max_workers=max_workers) as pool:
|
||||
futures = {pool.submit(download_one_month, c, y, m): (y, m)
|
||||
for c, y, m in tasks if not (Path(DATA_RAW) / "era5" / city
|
||||
/ f"era5_{c}_{y}_{m:02d}.nc").exists()}
|
||||
for f in as_completed(futures):
|
||||
y, m = futures[f]
|
||||
if f.result():
|
||||
done += 1
|
||||
else:
|
||||
fail += 1
|
||||
if (done + fail) % 10 == 0 or (done + fail) == (total - existed):
|
||||
logger.info("%s: %d/%d 完成 (%d 失败)", name, done + existed, total, fail)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
@@ -103,4 +85,4 @@ if __name__ == "__main__":
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
for city_name in CITIES:
|
||||
download_era5_city(city_name)
|
||||
download_city(city_name)
|
||||
|
||||
Reference in New Issue
Block a user