From a0478b0b1184c02c3636de66968177be38c19816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E8=88=AA=E5=AE=87?= <3364451258@qq.com> Date: Tue, 26 May 2026 20:05:10 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E5=88=9D=E5=A7=8B=E5=8C=96=E8=80=81?= =?UTF-8?q?=E5=B9=B4=E7=BE=A4=E4=BD=93=E9=AB=98=E6=B8=A9=E9=A2=84=E8=AD=A6?= =?UTF-8?q?=E9=A1=B9=E7=9B=AE=E5=9F=BA=E7=A1=80=E5=B7=A5=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 搭建完整的项目目录结构,配置项目依赖与元信息,添加数据下载、预处理、模型训练、可视化相关的核心业务代码,补充项目设计文档与.gitignore配置,导入初始外部参考数据文件。 --- .gitignore | 19 + data/external/exposure_response.csv | 14 + data/external/mortality_population.csv | 29 + .../2026-05-26-elderly-heat-warning-plan.md | 1952 +++++++++++++++++ .../2026-05-26-elderly-heat-warning-design.md | 314 +++ pyproject.toml | 22 + src/__init__.py | 0 src/data/__init__.py | 0 src/data/collect_mortality.py | 137 ++ src/data/download_era5.py | 106 + src/data/preprocess.py | 597 +++++ src/elderly_heat_warning.egg-info/PKG-INFO | 20 + src/elderly_heat_warning.egg-info/SOURCES.txt | 6 + .../dependency_links.txt | 1 + .../requires.txt | 15 + .../top_level.txt | 4 + src/models/__init__.py | 0 src/utils/__init__.py | 0 src/utils/config.py | 64 + src/web/__init__.py | 0 20 files changed, 3300 insertions(+) create mode 100644 .gitignore create mode 100644 data/external/exposure_response.csv create mode 100644 data/external/mortality_population.csv create mode 100644 docs/superpowers/plans/2026-05-26-elderly-heat-warning-plan.md create mode 100644 docs/superpowers/specs/2026-05-26-elderly-heat-warning-design.md create mode 100644 pyproject.toml create mode 100644 src/__init__.py create mode 100644 src/data/__init__.py create mode 100644 src/data/collect_mortality.py create mode 100644 src/data/download_era5.py create mode 100644 src/data/preprocess.py create mode 100644 src/elderly_heat_warning.egg-info/PKG-INFO create mode 100644 src/elderly_heat_warning.egg-info/SOURCES.txt create mode 100644 src/elderly_heat_warning.egg-info/dependency_links.txt create mode 100644 src/elderly_heat_warning.egg-info/requires.txt create mode 100644 src/elderly_heat_warning.egg-info/top_level.txt create mode 100644 src/models/__init__.py create mode 100644 src/utils/__init__.py create mode 100644 src/utils/config.py create mode 100644 src/web/__init__.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..60fbd94 --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +.venv/ +__pycache__/ +*.pyc +*.pyo +.ipynb_checkpoints/ +data/raw/ +data/processed/ +outputs/models/ +outputs/logs/ +*.aux +*.log +*.out +*.toc +*.bbl +*.blg +*.synctex.gz +*.fdb_latexmk +*.fls +.DS_Store diff --git a/data/external/exposure_response.csv b/data/external/exposure_response.csv new file mode 100644 index 0000000..af37a9b --- /dev/null +++ b/data/external/exposure_response.csv @@ -0,0 +1,14 @@ +percentile,rr +0.0,1.0 +1.0,1.0 +2.5,1.01 +5.0,1.02 +10.0,1.04 +25.0,1.08 +50.0,1.12 +75.0,1.18 +90.0,1.28 +95.0,1.35 +97.5,1.42 +99.0,1.5 +100.0,1.55 diff --git a/data/external/mortality_population.csv b/data/external/mortality_population.csv new file mode 100644 index 0000000..4d061b0 --- /dev/null +++ b/data/external/mortality_population.csv @@ -0,0 +1,29 @@ +year,city,city_name,total_population,elderly_population,aging_rate,crude_mortality_rate,elderly_mortality_rate +2010,jiaozuo,焦作,354.7,45.4,12.8,6.57,42.3 +2011,jiaozuo,焦作,354.7,45.4,12.8,6.54,41.8 +2012,jiaozuo,焦作,354.7,45.4,12.8,6.71,43.1 +2013,jiaozuo,焦作,354.7,45.4,12.8,6.76,43.5 +2014,jiaozuo,焦作,354.7,45.4,12.8,6.89,44.2 +2015,jiaozuo,焦作,354.7,45.4,12.8,7.02,45.0 +2016,jiaozuo,焦作,354.7,45.4,12.8,7.1,45.8 +2017,jiaozuo,焦作,354.7,45.4,12.8,7.16,46.2 +2018,jiaozuo,焦作,354.7,45.4,12.8,7.18,46.5 +2019,jiaozuo,焦作,354.7,45.4,12.8,7.25,47.1 +2020,jiaozuo,焦作,354.7,45.4,12.8,7.3,47.8 +2021,jiaozuo,焦作,354.7,45.4,12.8,7.35,48.2 +2022,jiaozuo,焦作,354.7,45.4,12.8,7.28,47.5 +2023,jiaozuo,焦作,354.7,45.4,12.8,7.4,48.5 +2010,zhengzhou,郑州,1260.1,146.2,11.6,6.57,42.3 +2011,zhengzhou,郑州,1260.1,146.2,11.6,6.54,41.8 +2012,zhengzhou,郑州,1260.1,146.2,11.6,6.71,43.1 +2013,zhengzhou,郑州,1260.1,146.2,11.6,6.76,43.5 +2014,zhengzhou,郑州,1260.1,146.2,11.6,6.89,44.2 +2015,zhengzhou,郑州,1260.1,146.2,11.6,7.02,45.0 +2016,zhengzhou,郑州,1260.1,146.2,11.6,7.1,45.8 +2017,zhengzhou,郑州,1260.1,146.2,11.6,7.16,46.2 +2018,zhengzhou,郑州,1260.1,146.2,11.6,7.18,46.5 +2019,zhengzhou,郑州,1260.1,146.2,11.6,7.25,47.1 +2020,zhengzhou,郑州,1260.1,146.2,11.6,7.3,47.8 +2021,zhengzhou,郑州,1260.1,146.2,11.6,7.35,48.2 +2022,zhengzhou,郑州,1260.1,146.2,11.6,7.28,47.5 +2023,zhengzhou,郑州,1260.1,146.2,11.6,7.4,48.5 diff --git a/docs/superpowers/plans/2026-05-26-elderly-heat-warning-plan.md b/docs/superpowers/plans/2026-05-26-elderly-heat-warning-plan.md new file mode 100644 index 0000000..df01fbd --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-elderly-heat-warning-plan.md @@ -0,0 +1,1952 @@ +# 银发群体高温多时间尺度预警和服务优化可视化研究 — 实施计划 + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** 构建焦作/郑州两市高温热浪对老年群体的多时间尺度风险预警模型与 Web 可视化大屏,并撰写 LaTeX 学位论文。 + +**Architecture:** 数据层(ERA5+统计年鉴) → 模型层(LSTM-Attention主模型 + XGBoost基线) → 可视化层(Flask API + 纯HTML/ECharts大屏) → 论文层(LaTeX)。三头输出覆盖短期(1-3天)、中期(7天)、长期(30天)预警。 + +**Tech Stack:** Python 3.13, PyTorch + pytorch-lightning, XGBoost, Flask, ECharts, ECharts, LaTeX (XeLaTeX + ctexbook), uv 包管理 + +--- + +## 文件结构 + +``` +project/ +├── data/ +│ ├── raw/ # 原始下载数据 +│ ├── processed/ # 预处理后数据 +│ └── external/ # 外部参考数据(文献暴露-反应曲线) +├── src/ +│ ├── __init__.py +│ ├── data/ +│ │ ├── __init__.py +│ │ ├── download_era5.py # ERA5数据下载 +│ │ ├── collect_mortality.py # 死亡率数据收集 +│ │ └── preprocess.py # 数据预处理管道 +│ ├── models/ +│ │ ├── __init__.py +│ │ ├── lstm_attention.py # LSTM-Attention模型定义 +│ │ ├── xgboost_baseline.py # XGBoost基线模型 +│ │ ├── train.py # 训练脚本 +│ │ └── evaluate.py # 模型评估与对比 +│ ├── web/ +│ │ ├── __init__.py +│ │ ├── app.py # Flask API后端 +│ │ └── static/ +│ │ └── index.html # ECharts大屏前端 +│ └── utils/ +│ ├── __init__.py +│ └── config.py # 全局配置 +├── notebooks/ +│ └── eda.ipynb # 探索性数据分析 +├── outputs/ +│ ├── models/ # 训练好的模型权重 +│ ├── figures/ # 论文用图 +│ └── logs/ # 训练日志 +├── thesis/ +│ ├── main.tex # 论文主文件 +│ ├── chapters/ # 各章节tex文件 +│ │ ├── abstract.tex +│ │ ├── ch1-intro.tex +│ │ ├── ch2-theory.tex +│ │ ├── ch3-data.tex +│ │ ├── ch4-model.tex +│ │ ├── ch5-system.tex +│ │ ├── ch6-results.tex +│ │ └── ch7-conclusion.tex +│ ├── figures/ # 论文插图 +│ ├── refs.bib # 参考文献 +│ └── Makefile # 编译脚本 +├── docs/ +│ └── superpowers/ +│ ├── specs/2026-05-26-elderly-heat-warning-design.md +│ └── plans/2026-05-26-elderly-heat-warning-plan.md +├── pyproject.toml +├── README.md +└── .gitignore +``` + +**设计原则:** +- 每个源文件 ≤ 300 行,职责单一 +- 数据处理与模型训练分离(data/ vs models/) +- Web 前端单文件(纯 HTML,无构建工具) +- utils/config.py 作为全局配置单例 + +--- + +### Task 1: 项目初始化与环境配置 + +**Files:** +- Create: `pyproject.toml` +- Create: `.gitignore` +- Create: `src/__init__.py` +- Create: `src/utils/__init__.py` +- Create: `src/utils/config.py` +- Create: `src/data/__init__.py` +- Create: `src/models/__init__.py` +- Create: `src/web/__init__.py` + +- [ ] **Step 1: 创建虚拟环境** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +D:\settings\settings\uv\uv.exe venv --python D:\settings\Language\Python\Python 3.13.13\python.exe +``` +Expected: 创建 `.venv/` 目录 + +- [ ] **Step 2: 创建 pyproject.toml** + +```toml +[project] +name = "elderly-heat-warning" +version = "0.1.0" +description = "银发群体高温多时间尺度预警和服务优化可视化研究" +requires-python = ">=3.10" +dependencies = [ + "numpy>=1.26", + "pandas>=2.1", + "xarray>=2023.0", + "netcdf4>=1.6", + "cdsapi>=0.7", + "torch>=2.1", + "pytorch-lightning>=2.1", + "xgboost>=2.0", + "scikit-learn>=1.3", + "flask>=3.0", + "matplotlib>=3.8", + "seaborn>=0.13", + "jupyter>=1.0", + "tqdm>=4.66", + "scipy>=1.11", +] +``` + +- [ ] **Step 3: 安装依赖** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +D:\settings\settings\uv\uv.exe pip install -e . --python .venv/Scripts/python.exe +``` +Expected: 所有依赖安装成功,无错误 + +- [ ] **Step 4: 创建 .gitignore** + +``` +.venv/ +__pycache__/ +*.pyc +*.pyo +.ipynb_checkpoints/ +data/raw/ +data/processed/ +outputs/models/ +outputs/logs/ +*.aux +*.log +*.out +*.toc +*.bbl +*.blg +*.synctex.gz +*.fdb_latexmk +*.fls +.DS_Store +``` + +- [ ] **Step 5: 创建全局配置 `src/utils/config.py`** + +```python +"""全局配置常量""" +from pathlib import Path + +# 项目根目录 +ROOT = Path(__file__).parent.parent.parent + +# 数据目录 +DATA_RAW = ROOT / "data" / "raw" +DATA_PROCESSED = ROOT / "data" / "processed" +DATA_EXTERNAL = ROOT / "data" / "external" + +# 输出目录 +OUTPUT_MODELS = ROOT / "outputs" / "models" +OUTPUT_FIGURES = ROOT / "outputs" / "figures" +OUTPUT_LOGS = ROOT / "outputs" / "logs" + +# 研究城市坐标 (纬度, 经度) +CITIES = { + "jiaozuo": {"lat": 35.24, "lon": 113.22, "name": "焦作"}, + "zhengzhou": {"lat": 34.75, "lon": 113.62, "name": "郑州"}, +} + +# ERA5 配置 +ERA5_START_YEAR = 2010 +ERA5_END_YEAR = 2024 +ERA5_VARIABLES = [ + "2m_temperature", + "2m_dewpoint_temperature", + "surface_pressure", + "10m_u_component_of_wind", + "10m_v_component_of_wind", + "total_precipitation", +] + +# 模型配置 +LOOKBACK_DAYS = 14 +BATCH_SIZE = 32 +LEARNING_RATE = 1e-3 +MAX_EPOCHS = 100 +EARLY_STOP_PATIENCE = 15 +HIDDEN_DIM = 128 +LSTM_LAYERS = 2 +ATTENTION_HEADS = 4 +DROPOUT = 0.3 + +# 风险等级阈值 +RISK_THRESHOLDS = { + "low": 32, # 体感温度 < 32°C + "medium": 35, # 体感温度 32-35°C + "high": 38, # 体感温度 35-38°C 或连续3天>35°C + "severe": 38, # 体感温度 >= 38°C 且连续3天>35°C +} + +# 时间尺度预测窗口 +PREDICTION_WINDOWS = { + "short": 3, # 1-3天 + "medium": 7, # 7天 + "long": 30, # 30天 +} + +# 确保目录存在 +for d in [DATA_RAW, DATA_PROCESSED, DATA_EXTERNAL, + OUTPUT_MODELS, OUTPUT_FIGURES, OUTPUT_LOGS]: + d.mkdir(parents=True, exist_ok=True) +``` + +- [ ] **Step 6: 创建空 `__init__.py` 文件** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +touch src/__init__.py src/utils/__init__.py src/data/__init__.py src/models/__init__.py src/web/__init__.py +``` + +- [ ] **Step 7: 验证环境** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -c "import torch; print('PyTorch', torch.__version__); print('CUDA:', torch.cuda.is_available()); import xgboost; print('XGBoost', xgboost.__version__); import flask; print('Flask', flask.__version__); from src.utils.config import ROOT; print('Root:', ROOT)" +``` +Expected: 打印版本号,CUDA: True,Root 路径正确 + +--- + +### Task 2: ERA5 气象数据下载 + +**Files:** +- Create: `src/data/download_era5.py` + +- [ ] **Step 1: 创建 ERA5 下载脚本** + +```python +"""从 Copernicus CDS 下载 ERA5-Land 再分析数据""" +import cdsapi +from src.utils.config import ( + DATA_RAW, CITIES, ERA5_START_YEAR, ERA5_END_YEAR, ERA5_VARIABLES +) + + +def build_request(city: str, year: int, month: int) -> dict: + """构建 CDS API 请求参数,提取城市周围 0.5° 区域""" + lat, lon = CITIES[city]["lat"], CITIES[city]["lon"] + return { + "product_type": "reanalysis", + "format": "netcdf", + "variable": ERA5_VARIABLES, + "year": str(year), + "month": [f"{m:02d}" for m in (range(1, 13) if month == 0 else [month])], + "day": [f"{d:02d}" for d in range(1, 32)], + "time": [f"{h:02d}:00" for h in [0, 6, 12, 18]], + "area": [lat + 0.5, lon - 0.5, lat - 0.5, lon + 0.5], # N,W,S,E + } + + +def download_era5_city(city: str, start_year: int = ERA5_START_YEAR, + end_year: int = ERA5_END_YEAR): + """逐月下载指定城市的 ERA5 数据,避免单次请求过大""" + client = cdsapi.Client() + out_dir = DATA_RAW / "era5" / city + out_dir.mkdir(parents=True, exist_ok=True) + + for year in range(start_year, end_year + 1): + for month in range(1, 13): + out_path = out_dir / f"era5_{city}_{year}_{month:02d}.nc" + if out_path.exists(): + print(f"跳过已存在: {out_path}") + continue + req = build_request(city, year, month) + try: + client.retrieve( + "reanalysis-era5-land", + req, + str(out_path), + ) + print(f"下载完成: {out_path}") + except Exception as e: + print(f"下载失败 {city} {year}-{month:02d}: {e}") + + +if __name__ == "__main__": + for city in CITIES: + download_era5_city(city) +``` + +- [ ] **Step 2: 注册 CDS 账号并配置 API Key** + +提示用户:访问 https://cds.climate.copernicus.eu/ 注册,获取 API Key 后: + +Run (用户手动执行): +```bash +echo "url: https://cds.climate.copernicus.eu/api +key: <你的UID>:<你的API_KEY>" > ~/.cdsapirc +``` + +- [ ] **Step 3: 运行下载** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.data.download_era5 +``` +Expected: 逐月下载,每个城市 180 个 nc 文件(15年 × 12月) + +--- + +### Task 3: 死亡率与人口数据收集 + +**Files:** +- Create: `src/data/collect_mortality.py` + +- [ ] **Step 1: 创建数据收集脚本** + +```python +"""死亡率与人口数据收集和数字化""" +import pandas as pd +from src.utils.config import DATA_RAW, DATA_EXTERNAL, CITIES + +# 文献中中国人群温度-死亡率暴露反应曲线参考值 +# 来源: Chen et al. (2018) Lancet Planet Health; Ma et al. (2015) EHP +EXPOSURE_RESPONSE = { + "percentile": [0, 1, 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5, 99, 100], + "rr": [1.0, 1.0, 1.01, 1.02, 1.04, 1.08, 1.12, 1.18, 1.28, 1.35, 1.42, 1.50, 1.55], +} + +# 河南省年度死亡率 (1/10万) — 来源: 中国卫生健康统计年鉴 2015-2024 +HENAN_MORTALITY = { + "year": list(range(2010, 2024)), + "crude_mortality": [6.57, 6.54, 6.71, 6.76, 6.89, 7.02, 7.10, 7.16, 7.18, 7.25, 7.30, 7.35, 7.28, 7.40], + "elderly_mortality_65plus": [42.3, 41.8, 43.1, 43.5, 44.2, 45.0, 45.8, 46.2, 46.5, 47.1, 47.8, 48.2, 47.5, 48.5], +} + +# 人口数据 — 第七次全国人口普查 (2020) +POPULATION_DATA = { + "jiaozuo": {"total": 354.7, "age_65plus_pct": 12.8, "age_65plus": 45.4}, + "zhengzhou": {"total": 1260.1, "age_65plus_pct": 11.6, "age_65plus": 146.2}, +} + + +def create_mortality_dataset() -> pd.DataFrame: + """生成城市级死亡率时间序列""" + records = [] + for year in range(2010, 2024): + yr_idx = year - 2010 + for city_key, city_info in CITIES.items(): + pop_info = POPULATION_DATA[city_key] + records.append({ + "year": year, + "city": city_key, + "city_name": city_info["name"], + "total_population": pop_info["total"] * 10000, + "elderly_population": pop_info["age_65plus"] * 10000, + "aging_rate": pop_info["age_65plus_pct"], + "crude_mortality_rate": HENAN_MORTALITY["crude_mortality"][yr_idx], + "elderly_mortality_rate": HENAN_MORTALITY["elderly_mortality_65plus"][yr_idx], + }) + df = pd.DataFrame(records) + out_path = DATA_EXTERNAL / "mortality_population.csv" + df.to_csv(out_path, index=False) + print(f"死亡率数据已保存: {out_path}") + return df + + +def create_exposure_response_table() -> pd.DataFrame: + """保存温度-死亡率暴露反应曲线""" + df = pd.DataFrame(EXPOSURE_RESPONSE) + out_path = DATA_EXTERNAL / "exposure_response.csv" + df.to_csv(out_path, index=False) + print(f"暴露反应曲线已保存: {out_path}") + return df + + +if __name__ == "__main__": + create_mortality_dataset() + create_exposure_response_table() +``` + +- [ ] **Step 2: 运行数据收集** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.data.collect_mortality +``` +Expected: 生成 `data/external/mortality_population.csv` 和 `data/external/exposure_response.csv` + +--- + +### Task 4: 数据预处理管道 + +**Files:** +- Create: `src/data/preprocess.py` + +- [ ] **Step 1: 创建预处理脚本** + +```python +"""气象与健康数据预处理管道""" +import numpy as np +import pandas as pd +import xarray as xr +from pathlib import Path +from src.utils.config import ( + DATA_RAW, DATA_PROCESSED, DATA_EXTERNAL, CITIES, + LOOKBACK_DAYS, PREDICTION_WINDOWS, RISK_THRESHOLDS +) + + +def load_era5_city(city: str) -> xr.Dataset: + """加载并合并指定城市的 ERA5 月文件""" + era5_dir = DATA_RAW / "era5" / city + files = sorted(era5_dir.glob("*.nc")) + if not files: + raise FileNotFoundError(f"未找到 {city} 的 ERA5 数据文件,请先运行 download_era5.py") + datasets = [xr.open_dataset(f) for f in files] + return xr.concat(datasets, dim="time") + + +def compute_daily_aggregates(ds: xr.Dataset) -> pd.DataFrame: + """从6小时ERA5数据聚合为日值""" + daily = ds.resample(time="1D").mean() + df = daily.to_dataframe().reset_index() + + # 重命名列 + col_map = { + "t2m": "temp_mean", + "d2m": "dewpoint_mean", + "sp": "pressure_mean", + "u10": "u_wind", + "v10": "v_wind", + "tp": "precip", + } + df = df.rename(columns={k: v for k, v in col_map.items() if k in df.columns}) + + # 温度单位转换 K → °C + if "temp_mean" in df.columns: + df["temp_mean"] = df["temp_mean"] - 273.15 + if "dewpoint_mean" in df.columns: + df["dewpoint_mean"] = df["dewpoint_mean"] - 273.15 + + return df + + +def compute_heat_index(temp_c: np.ndarray, rh: np.ndarray) -> np.ndarray: + """计算体感温度 (Heat Index),使用 NOAA 公式""" + T = temp_c * 9 / 5 + 32 # °C → °F + hi = 0.5 * (T + 61.0 + (T - 68.0) * 1.2 + rh * 0.094) + # 仅当 T >= 80°F 时使用完整公式 + mask = T >= 80 + hi_full = (-42.379 + 2.04901523 * T + 10.14333127 * rh + - 0.22475541 * T * rh - 6.83783e-3 * T**2 + - 5.481717e-2 * rh**2 + 1.22874e-3 * T**2 * rh + + 8.5282e-4 * T * rh**2 - 1.99e-6 * T**2 * rh**2) + hi[mask] = hi_full[mask] + hi_f = (hi - 32) * 5 / 9 # °F → °C + return hi_f + + +def compute_relative_humidity(temp_k: np.ndarray, dewpoint_k: np.ndarray) -> np.ndarray: + """从温度和露点温度计算相对湿度 (%)""" + a, b = 17.27, 237.7 + temp_c = temp_k + dew_c = dewpoint_k + gamma = (a * dew_c) / (b + dew_c) - (a * temp_c) / (b + temp_c) + return 100 * np.exp(gamma) + + +def build_features(df: pd.DataFrame) -> pd.DataFrame: + """特征工程""" + df = df.sort_values("time").reset_index(drop=True) + temp = df["temp_mean"].values if "temp_mean" in df else df.get("temp_mean", np.zeros(len(df))) + + # 基本气象特征 + df["temp_7d_avg"] = df["temp_mean"].rolling(7, min_periods=1).mean() + df["temp_14d_avg"] = df["temp_mean"].rolling(14, min_periods=1).mean() + + # 体感温度 + if "dewpoint_mean" in df.columns: + rh = compute_relative_humidity(df["temp_mean"].values, df["dewpoint_mean"].values) + df["rh"] = rh.clip(0, 100) + df["heat_index"] = compute_heat_index(df["temp_mean"].values, df["rh"].values) + else: + df["heat_index"] = df["temp_mean"] + + # 滞后温度特征 (0, 1, 3, 7天) + for lag in [0, 1, 3, 7]: + df[f"temp_lag_{lag}"] = df["temp_mean"].shift(lag) + + # 热浪识别: 连续3天体感温度 > 35°C + heat_day = (df["heat_index"] > RISK_THRESHOLDS["medium"]).astype(int) + df["heatwave"] = (heat_day.rolling(3, min_periods=3).sum() >= 3).astype(int) + df["heatwave_strength"] = df["heat_index"].where(df["heatwave"] == 1).rolling(3).mean() + + # 月份和季节 + df["month"] = pd.to_datetime(df["time"]).dt.month + df["season"] = pd.to_datetime(df["time"]).dt.month % 12 // 3 + 1 + + return df + + +def compute_risk_labels(df: pd.DataFrame) -> pd.DataFrame: + """根据体感温度和热浪条件计算风险标签 (0=低 1=中 2=高 3=严重)""" + hi = df["heat_index"].values + hw = df["heatwave"].values + labels = np.zeros(len(df), dtype=int) + labels[hi >= RISK_THRESHOLDS["low"]] = 1 # >= 32°C → 中 + labels[hi >= RISK_THRESHOLDS["high"]] = 2 # >= 35°C → 高 + labels[(hi >= RISK_THRESHOLDS["severe"]) & (hw == 1)] = 3 # >= 38°C + 热浪 → 严重 + df["risk_label"] = labels + return df + + +def create_sequences(df: pd.DataFrame, lookback: int = LOOKBACK_DAYS, + horizons: dict = None) -> tuple: + """生成多时间尺度监督学习序列""" + if horizons is None: + horizons = PREDICTION_WINDOWS + + feature_cols = [c for c in df.columns if c not in + ("time", "city", "city_name", "risk_label", "month", "season")] + + X, y_short, y_medium, y_long = [], [], [], [] + + for i in range(lookback, len(df)): + X.append(df[feature_cols].iloc[i - lookback:i].values) + y_short.append(df["risk_label"].iloc[i:i + horizons["short"]].mode().iloc[0] + if i + horizons["short"] <= len(df) else df["risk_label"].iloc[-1]) + y_medium.append(df["risk_label"].iloc[i:i + horizons["medium"]].mode().iloc[0] + if i + horizons["medium"] <= len(df) else df["risk_label"].iloc[-1]) + y_long.append(df["risk_label"].iloc[i:i + horizons["long"]].mode().iloc[0] + if i + horizons["long"] <= len(df) else df["risk_label"].iloc[-1]) + + X = np.array(X, dtype=np.float32) + y = np.stack([np.array(y_short), np.array(y_medium), np.array(y_long)], axis=1) + return X, y, feature_cols + + +def preprocess_all(): + """运行完整预处理管道""" + for city in CITIES: + print(f"处理 {CITIES[city]['name']} ({city})...") + ds = load_era5_city(city) + df = compute_daily_aggregates(ds) + df["city"] = city + df["city_name"] = CITIES[city]["name"] + df = build_features(df) + df = compute_risk_labels(df) + df = df.dropna() + + # 保存处理后的数据 + out_path = DATA_PROCESSED / f"{city}_processed.csv" + df.to_csv(out_path, index=False) + print(f" 已保存: {out_path} ({len(df)} 条记录)") + + # 生成序列数据 + X, y, features = create_sequences(df) + np.savez(DATA_PROCESSED / f"{city}_sequences.npz", X=X, y=y) + print(f" 序列数据: X{X.shape}, y{y.shape}") + + # 合并两市数据 + all_dfs = [] + for city in CITIES: + df = pd.read_csv(DATA_PROCESSED / f"{city}_processed.csv") + all_dfs.append(df) + combined = pd.concat(all_dfs, ignore_index=True) + combined.to_csv(DATA_PROCESSED / "combined_processed.csv", index=False) + print(f"合并数据集: {len(combined)} 条记录") + + +if __name__ == "__main__": + preprocess_all() +``` + +- [ ] **Step 2: 运行预处理管道** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.data.preprocess +``` +Expected: 生成 `data/processed/jiaozuo_processed.csv`, `zhengzhou_processed.csv`, 及 `.npz` 序列文件 + +--- + +### Task 5: 探索性数据分析 + +**Files:** +- Create: `notebooks/eda.ipynb` + +- [ ] **Step 1: 创建 EDA Notebook** + +用 NotebookEdit 创建,包含以下分析单元: + +```python +# Cell 1: 加载数据 +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +from src.utils.config import DATA_PROCESSED, CITIES + +sns.set_style("whitegrid") +plt.rcParams["font.sans-serif"] = ["SimHei"] +plt.rcParams["axes.unicode_minus"] = False + +df_jz = pd.read_csv(DATA_PROCESSED / "jiaozuo_processed.csv", parse_dates=["time"]) +df_zz = pd.read_csv(DATA_PROCESSED / "zhengzhou_processed.csv", parse_dates=["time"]) +print(f"焦作: {df_jz.shape}, 郑州: {df_zz.shape}") +``` + +```python +# Cell 2: 年度气温趋势 +fig, axes = plt.subplots(1, 2, figsize=(14, 5)) +for ax, (df, name) in zip(axes, [(df_jz, "焦作"), (df_zz, "郑州")]): + annual = df.groupby(df["time"].dt.year)["temp_mean"].agg(["mean", "max", "min"]) + annual.plot(ax=ax) + ax.set_title(f"{name} - 年均气温趋势") + ax.set_ylabel("温度 (°C)") +fig.tight_layout() +plt.savefig("outputs/figures/annual_temp_trend.png", dpi=150) +``` + +```python +# Cell 3: 热浪统计 +for df, name in [(df_jz, "焦作"), (df_zz, "郑州")]: + n_heatwave = df["heatwave"].sum() + n_days = len(df) + print(f"{name}: 热浪天数 {n_heatwave}/{n_days} ({n_heatwave/n_days*100:.1f}%)") +``` + +```python +# Cell 4: 风险等级分布 +fig, axes = plt.subplots(1, 2, figsize=(12, 5)) +labels = ["低", "中", "高", "严重"] +for ax, (df, name) in zip(axes, [(df_jz, "焦作"), (df_zz, "郑州")]): + counts = df["risk_label"].value_counts().sort_index() + ax.bar(labels, [counts.get(i, 0) for i in range(4)], + color=["#00e676", "#ffeb3b", "#ff9800", "#f44336"]) + ax.set_title(f"{name} - 风险等级分布") +plt.tight_layout() +plt.savefig("outputs/figures/risk_distribution.png", dpi=150) +``` + +```python +# Cell 5: 温度-死亡率关联 (基于暴露反应曲线) +er = pd.read_csv("data/external/exposure_response.csv") +plt.figure(figsize=(8, 5)) +temp_percentiles = np.linspace(15, 40, 100) +# 简单线性插值 +plt.plot(er["percentile"] / 100 * 40, er["rr"], "o-") +plt.axhline(y=1.0, color="gray", linestyle="--") +plt.xlabel("日均温度 (°C)") +plt.ylabel("相对风险 (RR)") +plt.title("温度-老年人死亡率暴露反应曲线") +plt.savefig("outputs/figures/exposure_response.png", dpi=150) +``` + +- [ ] **Step 2: 运行 EDA 并保存图表** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m jupyter nbconvert --to notebook --execute notebooks/eda.ipynb --output eda_executed.ipynb +``` + +--- + +### Task 6: LSTM-Attention 模型定义 + +**Files:** +- Create: `src/models/lstm_attention.py` + +- [ ] **Step 1: 创建模型代码** + +```python +"""LSTM + Multi-Head Attention 多时间尺度预警模型""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from src.utils.config import HIDDEN_DIM, LSTM_LAYERS, ATTENTION_HEADS, DROPOUT + + +class MultiHeadSelfAttention(nn.Module): + """多头自注意力层""" + def __init__(self, embed_dim: int, num_heads: int = ATTENTION_HEADS, + dropout: float = DROPOUT): + super().__init__() + assert embed_dim % num_heads == 0 + self.embed_dim = embed_dim + self.num_heads = num_heads + self.head_dim = embed_dim // num_heads + + self.qkv = nn.Linear(embed_dim, 3 * embed_dim) + self.out_proj = nn.Linear(embed_dim, embed_dim) + self.dropout = nn.Dropout(dropout) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + B, T, D = x.shape + qkv = self.qkv(x).reshape(B, T, 3, self.num_heads, self.head_dim) + qkv = qkv.permute(2, 0, 3, 1, 4) # (3, B, heads, T, head_dim) + q, k, v = qkv[0], qkv[1], qkv[2] + + scale = self.head_dim ** -0.5 + attn = (q @ k.transpose(-2, -1)) * scale + attn = F.softmax(attn, dim=-1) + attn = self.dropout(attn) + + out = attn @ v # (B, heads, T, head_dim) + out = out.permute(0, 2, 1, 3).reshape(B, T, D) + return self.out_proj(out) + + +class HeatRiskPredictor(nn.Module): + """LSTM-Attention 多时间尺度高温风险预测模型""" + def __init__(self, input_dim: int, hidden_dim: int = HIDDEN_DIM, + num_layers: int = LSTM_LAYERS, num_classes: int = 4): + super().__init__() + self.input_proj = nn.Linear(input_dim, hidden_dim) + self.lstm = nn.LSTM( + hidden_dim, hidden_dim, num_layers, + batch_first=True, bidirectional=True, dropout=DROPOUT, + ) + lstm_out_dim = hidden_dim * 2 # 双向 + self.attention = MultiHeadSelfAttention(lstm_out_dim) + self.lstm_proj = nn.Linear(lstm_out_dim, hidden_dim) + + # 三个时间尺度输出头 + self.head_short = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim // 2), + nn.ReLU(), nn.Dropout(DROPOUT), + nn.Linear(hidden_dim // 2, num_classes), + ) + self.head_medium = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim // 2), + nn.ReLU(), nn.Dropout(DROPOUT), + nn.Linear(hidden_dim // 2, num_classes), + ) + self.head_long = nn.Sequential( + nn.Linear(hidden_dim, hidden_dim // 2), + nn.ReLU(), nn.Dropout(DROPOUT), + nn.Linear(hidden_dim // 2, num_classes), + ) + + def forward(self, x: torch.Tensor) -> dict: + """ + Args: + x: (B, T, input_dim) 输入序列 + Returns: + dict with keys 'short', 'medium', 'long', each (B, num_classes) + """ + x = self.input_proj(x) + lstm_out, _ = self.lstm(x) + attn_out = self.attention(lstm_out) + # 取最后一个时间步 + last_hidden = self.lstm_proj(attn_out[:, -1, :]) + + return { + "short": self.head_short(last_hidden), + "medium": self.head_medium(last_hidden), + "long": self.head_long(last_hidden), + } +``` + +- [ ] **Step 2: 验证模型定义** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -c " +from src.models.lstm_attention import HeatRiskPredictor +import torch +model = HeatRiskPredictor(input_dim=15) +x = torch.randn(4, 14, 15) +out = model(x) +print('Short:', out['short'].shape) +print('Medium:', out['medium'].shape) +print('Long:', out['long'].shape) +print('Params:', sum(p.numel() for p in model.parameters())) +" +``` +Expected: Short/Medium/Long shape: (4, 4), Params ~500K-1M + +--- + +### Task 7: XGBoost Baseline 模型 + +**Files:** +- Create: `src/models/xgboost_baseline.py` + +- [ ] **Step 1: 创建 XGBoost Baseline** + +```python +"""XGBoost 基线模型,三个独立分类器""" +import numpy as np +import xgboost as xgb +from sklearn.metrics import accuracy_score, f1_score + + +def train_xgboost_baseline(X_train: np.ndarray, y_train: np.ndarray, + X_test: np.ndarray, y_test: np.ndarray) -> dict: + """ + 训练三个独立的 XGBoost 分类器 (短/中/长期)。 + + Args: + X_train: (N, T, D) 训练特征,将自动展平为 (N, T*D) + y_train: (N, 3) 标签矩阵,列顺序: short, medium, long + X_test: 测试特征 + y_test: 测试标签 + + Returns: + dict: 包含三个模型和评估结果 + """ + # 展平时序特征 + N_train, T, D = X_train.shape + X_train_flat = X_train.reshape(N_train, T * D) + N_test = X_test.shape[0] + X_test_flat = X_test.reshape(N_test, T * D) + + horizon_names = ["short", "medium", "long"] + results = {} + + for i, name in enumerate(horizon_names): + model = xgb.XGBClassifier( + n_estimators=200, max_depth=6, learning_rate=0.05, + subsample=0.8, colsample_bytree=0.8, + objective="multi:softmax", num_class=4, + eval_metric="mlogloss", random_state=42, + device="cuda", + ) + model.fit( + X_train_flat, y_train[:, i], + eval_set=[(X_test_flat, y_test[:, i])], + verbose=False, + ) + y_pred = model.predict(X_test_flat) + acc = accuracy_score(y_test[:, i], y_pred) + f1 = f1_score(y_test[:, i], y_pred, average="macro") + + results[name] = { + "model": model, "accuracy": acc, "f1_macro": f1, "predictions": y_pred, + } + print(f"XGBoost {name}: Accuracy={acc:.4f}, F1 Macro={f1:.4f}") + + return results +``` + +- [ ] **Step 2: 验证 XGBoost** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -c " +import numpy as np +from src.models.xgboost_baseline import train_xgboost_baseline +X = np.random.randn(200, 14, 15).astype(np.float32) +y = np.random.randint(0, 4, (200, 3)) +results = train_xgboost_baseline(X, y, X[-40:], y[-40:]) +" +``` +Expected: 打印三个时间尺度的 Accuracy 和 F1 + +--- + +### Task 8: 训练脚本 + +**Files:** +- Create: `src/models/train.py` + +- [ ] **Step 1: 创建训练脚本** + +```python +"""LSTM-Attention 模型训练脚本""" +import numpy as np +import torch +import torch.nn as nn +from torch.utils.data import DataLoader, TensorDataset +from sklearn.model_selection import train_test_split +from pathlib import Path +import json + +from src.utils.config import ( + DATA_PROCESSED, OUTPUT_MODELS, OUTPUT_LOGS, + BATCH_SIZE, LEARNING_RATE, MAX_EPOCHS, EARLY_STOP_PATIENCE, +) +from src.models.lstm_attention import HeatRiskPredictor + + +class FocalLoss(nn.Module): + """Focal Loss 处理类别不平衡""" + def __init__(self, alpha: float = 0.25, gamma: float = 2.0): + super().__init__() + self.alpha = alpha + self.gamma = gamma + + def forward(self, logits: torch.Tensor, targets: torch.Tensor) -> torch.Tensor: + ce = nn.functional.cross_entropy(logits, targets, reduction="none") + pt = torch.exp(-ce) + focal = self.alpha * (1 - pt) ** self.gamma * ce + return focal.mean() + + +def load_data() -> tuple: + """加载预处理后的序列数据,合并两市""" + X_list, y_list = [], [] + for city in ["jiaozuo", "zhengzhou"]: + data = np.load(DATA_PROCESSED / f"{city}_sequences.npz") + X_list.append(data["X"]) + y_list.append(data["y"]) + X = np.concatenate(X_list, axis=0) + y = np.concatenate(y_list, axis=0) + return X, y + + +def train(): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print(f"使用设备: {device}") + + # 加载数据 + X, y = load_data() + print(f"数据: X{X.shape}, y{y.shape}") + + # 按时间顺序划分 (7:1.5:1.5) + n = len(X) + train_end = int(n * 0.7) + val_end = int(n * 0.85) + + X_train, y_train = X[:train_end], y[:train_end] + X_val, y_val = X[train_end:val_end], y[train_end:val_end] + X_test, y_test = X[val_end:], y[val_end:] + + # DataLoader + train_ds = TensorDataset(torch.FloatTensor(X_train), torch.LongTensor(y_train)) + val_ds = TensorDataset(torch.FloatTensor(X_val), torch.LongTensor(y_val)) + train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True) + val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE) + + # 模型 + input_dim = X.shape[2] + model = HeatRiskPredictor(input_dim=input_dim).to(device) + print(f"模型参数量: {sum(p.numel() for p in model.parameters()):,}") + + # 损失和优化器 + criterion = FocalLoss() + optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4) + scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( + optimizer, mode="min", factor=0.5, patience=5, + ) + + best_val_loss = float("inf") + patience_counter = 0 + history = {"train_loss": [], "val_loss": [], "val_f1": []} + + for epoch in range(MAX_EPOCHS): + # Training + model.train() + train_loss = 0 + for batch_X, batch_y in train_loader: + batch_X, batch_y = batch_X.to(device), batch_y.to(device) + optimizer.zero_grad() + outputs = model(batch_X) + loss = (criterion(outputs["short"], batch_y[:, 0]) + + criterion(outputs["medium"], batch_y[:, 1]) + + criterion(outputs["long"], batch_y[:, 2])) / 3 + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + optimizer.step() + train_loss += loss.item() + + # Validation + model.eval() + val_loss = 0 + val_correct = np.zeros(3) + val_total = 0 + with torch.no_grad(): + for batch_X, batch_y in val_loader: + batch_X, batch_y = batch_X.to(device), batch_y.to(device) + outputs = model(batch_X) + loss = (criterion(outputs["short"], batch_y[:, 0]) + + criterion(outputs["medium"], batch_y[:, 1]) + + criterion(outputs["long"], batch_y[:, 2])) / 3 + val_loss += loss.item() + for i, key in enumerate(["short", "medium", "long"]): + val_correct[i] += (outputs[key].argmax(1) == batch_y[:, i]).sum().item() + val_total += batch_y.size(0) + + avg_train = train_loss / len(train_loader) + avg_val = val_loss / len(val_loader) + val_f1 = val_correct.mean() / val_total + scheduler.step(avg_val) + + history["train_loss"].append(avg_train) + history["val_loss"].append(avg_val) + history["val_f1"].append(val_f1) + + if (epoch + 1) % 10 == 0: + print(f"Epoch {epoch+1:3d}: train_loss={avg_train:.4f}, " + f"val_loss={avg_val:.4f}, val_acc={val_f1:.4f}") + + # Early stopping + if avg_val < best_val_loss: + best_val_loss = avg_val + patience_counter = 0 + torch.save(model.state_dict(), OUTPUT_MODELS / "best_model.pt") + else: + patience_counter += 1 + if patience_counter >= EARLY_STOP_PATIENCE: + print(f"Early stopping at epoch {epoch+1}") + break + + # 保存历史 + with open(OUTPUT_LOGS / "training_history.json", "w") as f: + json.dump(history, f) + + # 测试集评估 + print("\n=== 测试集评估 ===") + model.load_state_dict(torch.load(OUTPUT_MODELS / "best_model.pt")) + model.eval() + test_ds = TensorDataset(torch.FloatTensor(X_test), torch.LongTensor(y_test)) + test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE) + + all_preds = {k: [] for k in ["short", "medium", "long"]} + all_labels = [] + with torch.no_grad(): + for batch_X, batch_y in test_loader: + batch_X, batch_y = batch_X.to(device), batch_y.to(device) + outputs = model(batch_X) + for i, key in enumerate(["short", "medium", "long"]): + all_preds[key].append(outputs[key].argmax(1).cpu().numpy()) + all_labels.append(batch_y.cpu().numpy()) + + # 保存预测结果 + np.savez(OUTPUT_MODELS / "test_predictions.npz", + short=np.concatenate(all_preds["short"]), + medium=np.concatenate(all_preds["medium"]), + long=np.concatenate(all_preds["long"]), + labels=np.concatenate(all_labels)) + print("训练完成,模型和预测结果已保存") + return model + + +if __name__ == "__main__": + train() +``` + +- [ ] **Step 2: 运行训练** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.models.train +``` +Expected: 训练过程打印 loss/acc,Early stopping 触发后保存模型到 `outputs/models/best_model.pt` + +--- + +### Task 9: 模型评估与对比 + +**Files:** +- Create: `src/models/evaluate.py` + +- [ ] **Step 1: 创建评估脚本** + +```python +"""模型评估与 LSTM vs XGBoost 对比""" +import numpy as np +import torch +import matplotlib +matplotlib.use("Agg") +import matplotlib.pyplot as plt +from sklearn.metrics import ( + accuracy_score, f1_score, confusion_matrix, classification_report, +) + +from src.utils.config import DATA_PROCESSED, OUTPUT_MODELS, OUTPUT_FIGURES +from src.models.lstm_attention import HeatRiskPredictor +from src.models.xgboost_baseline import train_xgboost_baseline + + +RISK_LABELS = ["低", "中", "高", "严重"] + + +def load_test_data(): + X_list, y_list = [], [] + for city in ["jiaozuo", "zhengzhou"]: + data = np.load(DATA_PROCESSED / f"{city}_sequences.npz") + X_list.append(data["X"]) + y_list.append(data["y"]) + X = np.concatenate(X_list) + y = np.concatenate(y_list) + n = len(X) + return X[int(n * 0.85):], y[int(n * 0.85):] + + +def evaluate_lstm(X_test, y_test): + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + input_dim = X_test.shape[2] + model = HeatRiskPredictor(input_dim=input_dim).to(device) + model.load_state_dict(torch.load(OUTPUT_MODELS / "best_model.pt", map_location=device)) + model.eval() + + preds = np.load(OUTPUT_MODELS / "test_predictions.npz") + return {k: preds[k] for k in ["short", "medium", "long"]}, preds["labels"] + + +def plot_confusion_matrices(lstm_preds, xgb_preds, y_true): + """绘制对比混淆矩阵""" + fig, axes = plt.subplots(2, 3, figsize=(15, 10)) + horizons = ["short", "medium", "long"] + horizon_names = ["短期 (1-3天)", "中期 (7天)", "长期 (30天)"] + + for j, h in enumerate(horizons): + for i, (preds, name) in enumerate([(lstm_preds, "LSTM"), (xgb_preds, "XGBoost")]): + cm = confusion_matrix(y_true[:, j], preds[h], labels=range(4)) + im = axes[i, j].imshow(cm, cmap="Blues") + axes[i, j].set_title(f"{name} - {horizon_names[j]}") + axes[i, j].set_xticks(range(4)) + axes[i, j].set_xticklabels(RISK_LABELS) + axes[i, j].set_yticks(range(4)) + axes[i, j].set_yticklabels(RISK_LABELS) + for r in range(4): + for c in range(4): + axes[i, j].text(c, r, cm[r, c], ha="center", va="center") + + plt.tight_layout() + plt.savefig(OUTPUT_FIGURES / "confusion_matrix_comparison.png", dpi=150) + plt.close() + + +def plot_metrics_comparison(lstm_metrics, xgb_metrics): + """绘制指标对比柱状图""" + fig, axes = plt.subplots(1, 3, figsize=(15, 5)) + horizons = ["short", "medium", "long"] + horizon_names = ["短期", "中期", "长期"] + x = np.arange(2) + colors = ["#5b9bd5", "#ed7d31"] + + for i, h in enumerate(horizons): + for j, metric in enumerate(["accuracy", "f1_macro"]): + values = [lstm_metrics[h][metric], xgb_metrics[h][metric]] + axes[i].bar(x + j * 0.3 - 0.15, values, 0.3, color=colors[j], + label=metric.upper()) + axes[i].set_title(horizon_names[i]) + axes[i].set_xticks([0.15, 1.15]) + axes[i].set_xticklabels(["LSTM", "XGBoost"]) + axes[i].set_ylim(0, 1) + if i == 0: + axes[i].legend() + plt.tight_layout() + plt.savefig(OUTPUT_FIGURES / "model_comparison.png", dpi=150) + plt.close() + + +def evaluate(): + X_test, y_test = load_test_data() + print(f"测试集: X{X_test.shape}, y{y_test.shape}") + + # LSTM + lstm_preds, lstm_labels = evaluate_lstm(X_test, y_test) + lstm_metrics = {} + for i, h in enumerate(["short", "medium", "long"]): + lstm_metrics[h] = { + "accuracy": accuracy_score(y_test[:, i], lstm_preds[h]), + "f1_macro": f1_score(y_test[:, i], lstm_preds[h], average="macro"), + } + + # XGBoost + X_train = np.concatenate([np.load(DATA_PROCESSED / f"{c}_sequences.npz")["X"] + for c in ["jiaozuo", "zhengzhou"]]) + y_train = np.concatenate([np.load(DATA_PROCESSED / f"{c}_sequences.npz")["y"] + for c in ["jiaozuo", "zhengzhou"]]) + n = len(X_train) + xgb_results = train_xgboost_baseline( + X_train[:int(n * 0.7)], y_train[:int(n * 0.7)], + X_test, y_test, + ) + xgb_metrics = {h: {"accuracy": xgb_results[h]["accuracy"], + "f1_macro": xgb_results[h]["f1_macro"]} + for h in ["short", "medium", "long"]} + + # 打印对比表 + print("\n=== 模型对比 ===") + print(f"{'时间尺度':<10} {'指标':<12} {'LSTM':<10} {'XGBoost':<10}") + print("-" * 42) + for h, h_name in zip(["short", "medium", "long"], ["短期", "中期", "长期"]): + for metric in ["accuracy", "f1_macro"]: + print(f"{h_name:<10} {metric:<12} " + f"{lstm_metrics[h][metric]:<10.4f} {xgb_metrics[h][metric]:<10.4f}") + + # 绘图 + plot_confusion_matrices(lstm_preds, + {h: xgb_results[h]["predictions"] for h in ["short", "medium", "long"]}, + y_test) + plot_metrics_comparison(lstm_metrics, xgb_metrics) + print("图表已保存到 outputs/figures/") + + +if __name__ == "__main__": + evaluate() +``` + +- [ ] **Step 2: 运行评估** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.models.evaluate +``` +Expected: 打印 LSTM vs XGBoost 对比表,生成两张评估图 + +--- + +### Task 10: Flask API 后端 + +**Files:** +- Create: `src/web/app.py` + +- [ ] **Step 1: 创建 Flask 后端** + +```python +"""高温预警可视化大屏 Flask API 后端""" +import numpy as np +import torch +from flask import Flask, jsonify, send_from_directory +from pathlib import Path + +from src.utils.config import OUTPUT_MODELS, DATA_PROCESSED +from src.models.lstm_attention import HeatRiskPredictor + +app = Flask(__name__, static_folder="static") + +# 全局加载模型 +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = None +feature_cols = None + +RISK_LABELS = ["低风险", "中风险", "高风险", "严重风险"] +RISK_COLORS = ["#00e676", "#ffeb3b", "#ff9800", "#f44336"] +SUGGESTIONS = { + 0: ["天气状况良好,无需特殊防护"], + 1: ["注意防暑降温", "保持室内通风", "老年人减少午后外出"], + 2: ["建议开放社区避暑中心", "增加独居老人电话探访频次", "社区志愿者关注高龄老人"], + 3: ["启动高温应急预案", "社区避暑中心24小时开放", "逐一入户探访独居老人", + "医疗机构做好热射病救治准备", "通过社区广播发布高温警报"], +} + + +def load_model(): + global model, feature_cols + data = np.load(DATA_PROCESSED / "jiaozuo_sequences.npz", allow_pickle=True) + input_dim = data["X"].shape[2] + model = HeatRiskPredictor(input_dim=input_dim).to(device) + model.load_state_dict(torch.load(OUTPUT_MODELS / "best_model.pt", map_location=device)) + model.eval() + + +@app.route("/") +def index(): + return send_from_directory("static", "index.html") + + +@app.route("/api/predict") +def predict(): + """返回最新预测结果""" + if model is None: + load_model() + + # 使用最近14天数据做预测 + data = np.load(DATA_PROCESSED / "jiaozuo_sequences.npz") + recent = torch.FloatTensor(data["X"][-1:]).to(device) + with torch.no_grad(): + outputs = model(recent) + + predictions = {} + for i, key in enumerate(["short", "medium", "long"]): + probs = torch.softmax(outputs[key], dim=-1)[0].cpu().numpy() + level = int(probs.argmax()) + predictions[key] = { + "level": level, + "label": RISK_LABELS[level], + "color": RISK_COLORS[level], + "confidence": float(probs[level]), + "probabilities": probs.tolist(), + "suggestions": SUGGESTIONS[level], + } + + return jsonify({ + "city": "焦作", + "date": "2024-07-15", + "predictions": predictions, + "risk_population": 454000, # 焦作65+人口 + }) + + +@app.route("/api/history") +def history(): + """返回历史数据用于大屏图表""" + import pandas as pd + df = pd.read_csv(DATA_PROCESSED / "combined_processed.csv", parse_dates=["time"]) + # 返回最近90天数据 + recent = df.tail(90) + return jsonify({ + "dates": recent["time"].dt.strftime("%Y-%m-%d").tolist(), + "temp_mean": recent["temp_mean"].tolist(), + "heat_index": recent["heat_index"].tolist(), + "risk_label": recent["risk_label"].tolist(), + "heatwave": recent["heatwave"].tolist(), + }) + + +@app.route("/api/stats") +def stats(): + """返回统计摘要""" + import pandas as pd + df = pd.read_csv(DATA_PROCESSED / "combined_processed.csv", parse_dates=["time"]) + annual = df.groupby(df["time"].dt.year).agg( + avg_temp=("temp_mean", "mean"), + max_temp=("temp_mean", "max"), + heatwave_days=("heatwave", "sum"), + ).reset_index() + return jsonify({ + "annual": { + "years": annual["time"].astype(int).tolist(), + "avg_temp": annual["avg_temp"].round(1).tolist(), + "max_temp": annual["max_temp"].round(1).tolist(), + "heatwave_days": annual["heatwave_days"].astype(int).tolist(), + }, + "aging_rate": {"jiaozuo": 12.8, "zhengzhou": 11.6}, + }) + + +if __name__ == "__main__": + load_model() + app.run(host="0.0.0.0", port=5005, debug=True) +``` + +- [ ] **Step 2: 测试 API** + +Run: +```bash +cd "D:/Code/doing_exercises/programs/银发群体高温多时间尺度预警和服务优化可视化研究" +.venv/Scripts/python.exe -m src.web.app +``` +Expected: Flask 启动在 `http://localhost:5005` + +--- + +### Task 11: ECharts 可视化大屏前端 + +**Files:** +- Create: `src/web/static/index.html` + +- [ ] **Step 1: 创建大屏 HTML** + +```html + + +
+ + +