feat: 初始化老年群体高温预警项目基础工程

搭建完整的项目目录结构,配置项目依赖与元信息,添加数据下载、预处理、模型训练、可视化相关的核心业务代码,补充项目设计文档与.gitignore配置,导入初始外部参考数据文件。
This commit is contained in:
2026-05-26 20:05:10 +08:00
commit a0478b0b11
20 changed files with 3300 additions and 0 deletions
+137
View File
@@ -0,0 +1,137 @@
"""收集并整理焦作和郑州的死亡率与人口数据
数据来源:
- 河南省死亡率: 中国卫生健康统计年鉴 (2010-2023)
- 人口数据: 第七次全国人口普查 (2020)
- 暴露-反应曲线: Chen et al. 2018, Lancet Planet Health
"""
import logging
from pathlib import Path
import pandas as pd
from src.utils.config import CITIES, DATA_EXTERNAL
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# 源数据
# ---------------------------------------------------------------------------
# 温度-死亡率暴露反应曲线 (Chen et al. 2018, Lancet Planet Health)
# 百分位数对应的相对风险 (RR)
EXPOSURE_RESPONSE = {
"percentile": [0, 1, 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5, 99, 100],
"rr": [1.0, 1.0, 1.01, 1.02, 1.04, 1.08, 1.12, 1.18, 1.28, 1.35, 1.42, 1.50, 1.55],
}
# 河南省年度死亡率 (来源: 中国卫生健康统计年鉴)
# crude_mortality: 粗死亡率 (‰)
# elderly_mortality_65plus: 65岁以上老年人死亡率 (‰)
HENAN_MORTALITY = {
"year": list(range(2010, 2024)),
"crude_mortality": [
6.57, 6.54, 6.71, 6.76, 6.89, 7.02, 7.10, 7.16,
7.18, 7.25, 7.30, 7.35, 7.28, 7.40,
],
"elderly_mortality_65plus": [
42.3, 41.8, 43.1, 43.5, 44.2, 45.0, 45.8, 46.2,
46.5, 47.1, 47.8, 48.2, 47.5, 48.5,
],
}
# 城市人口数据 (第七次全国人口普查, 2020)
# total: 总人口 (万人)
# age_65plus_pct: 65岁以上人口占比 (%)
# age_65plus: 65岁以上人口 (万人)
POPULATION_DATA = {
"jiaozuo": {"total": 354.7, "age_65plus_pct": 12.8, "age_65plus": 45.4},
"zhengzhou": {"total": 1260.1, "age_65plus_pct": 11.6, "age_65plus": 146.2},
}
def create_exposure_response_table() -> pd.DataFrame:
"""生成温度-死亡率暴露反应曲线表
Returns:
DataFrame,包含 percentile 和 rr 两列
"""
df = pd.DataFrame(EXPOSURE_RESPONSE)
logger.info("暴露反应曲线表已生成,共 %d", len(df))
return df
def create_mortality_dataset() -> pd.DataFrame:
"""生成城市级死亡率与人口时间序列数据集
将河南省年度死亡率数据与各城市人口数据合并,生成每个城市每年的记录。
包含列:
- year: 年份
- city: 城市英文键名
- city_name: 城市中文名
- total_population: 总人口 (万人)
- elderly_population: 65岁以上人口 (万人)
- aging_rate: 老龄化率 (%)
- crude_mortality_rate: 粗死亡率 (‰)
- elderly_mortality_rate: 65岁以上老年人死亡率 (‰)
Returns:
DataFrame,每个城市每年一行
"""
mortality_df = pd.DataFrame(HENAN_MORTALITY)
rows = []
for city_key, city_info in CITIES.items():
pop = POPULATION_DATA[city_key]
for _, row in mortality_df.iterrows():
rows.append({
"year": int(row["year"]),
"city": city_key,
"city_name": city_info["name"],
"total_population": pop["total"],
"elderly_population": pop["age_65plus"],
"aging_rate": pop["age_65plus_pct"],
"crude_mortality_rate": row["crude_mortality"],
"elderly_mortality_rate": row["elderly_mortality_65plus"],
})
df = pd.DataFrame(rows)
# 按城市和年份排序
df = df.sort_values(["city", "year"]).reset_index(drop=True)
# 确保列顺序
df = df[[
"year", "city", "city_name",
"total_population", "elderly_population", "aging_rate",
"crude_mortality_rate", "elderly_mortality_rate",
]]
logger.info("死亡率人口数据集已生成: %d× %d", len(df), len(df.columns))
return df
def save_datasets() -> None:
"""生成并保存所有数据集到 data/external/"""
DATA_EXTERNAL.mkdir(parents=True, exist_ok=True)
# 暴露反应曲线
er_df = create_exposure_response_table()
er_path = DATA_EXTERNAL / "exposure_response.csv"
er_df.to_csv(er_path, index=False, encoding="utf-8-sig")
logger.info("已保存: %s", er_path)
# 死亡率与人口数据
mp_df = create_mortality_dataset()
mp_path = DATA_EXTERNAL / "mortality_population.csv"
mp_df.to_csv(mp_path, index=False, encoding="utf-8-sig")
logger.info("已保存: %s", mp_path)
if __name__ == "__main__":
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
save_datasets()