feat: 初始化老年群体高温预警项目基础工程
搭建完整的项目目录结构,配置项目依赖与元信息,添加数据下载、预处理、模型训练、可视化相关的核心业务代码,补充项目设计文档与.gitignore配置,导入初始外部参考数据文件。
This commit is contained in:
@@ -0,0 +1,137 @@
|
||||
"""收集并整理焦作和郑州的死亡率与人口数据
|
||||
|
||||
数据来源:
|
||||
- 河南省死亡率: 中国卫生健康统计年鉴 (2010-2023)
|
||||
- 人口数据: 第七次全国人口普查 (2020)
|
||||
- 暴露-反应曲线: Chen et al. 2018, Lancet Planet Health
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from src.utils.config import CITIES, DATA_EXTERNAL
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 源数据
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# 温度-死亡率暴露反应曲线 (Chen et al. 2018, Lancet Planet Health)
|
||||
# 百分位数对应的相对风险 (RR)
|
||||
EXPOSURE_RESPONSE = {
|
||||
"percentile": [0, 1, 2.5, 5, 10, 25, 50, 75, 90, 95, 97.5, 99, 100],
|
||||
"rr": [1.0, 1.0, 1.01, 1.02, 1.04, 1.08, 1.12, 1.18, 1.28, 1.35, 1.42, 1.50, 1.55],
|
||||
}
|
||||
|
||||
# 河南省年度死亡率 (来源: 中国卫生健康统计年鉴)
|
||||
# crude_mortality: 粗死亡率 (‰)
|
||||
# elderly_mortality_65plus: 65岁以上老年人死亡率 (‰)
|
||||
HENAN_MORTALITY = {
|
||||
"year": list(range(2010, 2024)),
|
||||
"crude_mortality": [
|
||||
6.57, 6.54, 6.71, 6.76, 6.89, 7.02, 7.10, 7.16,
|
||||
7.18, 7.25, 7.30, 7.35, 7.28, 7.40,
|
||||
],
|
||||
"elderly_mortality_65plus": [
|
||||
42.3, 41.8, 43.1, 43.5, 44.2, 45.0, 45.8, 46.2,
|
||||
46.5, 47.1, 47.8, 48.2, 47.5, 48.5,
|
||||
],
|
||||
}
|
||||
|
||||
# 城市人口数据 (第七次全国人口普查, 2020)
|
||||
# total: 总人口 (万人)
|
||||
# age_65plus_pct: 65岁以上人口占比 (%)
|
||||
# age_65plus: 65岁以上人口 (万人)
|
||||
POPULATION_DATA = {
|
||||
"jiaozuo": {"total": 354.7, "age_65plus_pct": 12.8, "age_65plus": 45.4},
|
||||
"zhengzhou": {"total": 1260.1, "age_65plus_pct": 11.6, "age_65plus": 146.2},
|
||||
}
|
||||
|
||||
|
||||
def create_exposure_response_table() -> pd.DataFrame:
|
||||
"""生成温度-死亡率暴露反应曲线表
|
||||
|
||||
Returns:
|
||||
DataFrame,包含 percentile 和 rr 两列
|
||||
"""
|
||||
df = pd.DataFrame(EXPOSURE_RESPONSE)
|
||||
logger.info("暴露反应曲线表已生成,共 %d 行", len(df))
|
||||
return df
|
||||
|
||||
|
||||
def create_mortality_dataset() -> pd.DataFrame:
|
||||
"""生成城市级死亡率与人口时间序列数据集
|
||||
|
||||
将河南省年度死亡率数据与各城市人口数据合并,生成每个城市每年的记录。
|
||||
|
||||
包含列:
|
||||
- year: 年份
|
||||
- city: 城市英文键名
|
||||
- city_name: 城市中文名
|
||||
- total_population: 总人口 (万人)
|
||||
- elderly_population: 65岁以上人口 (万人)
|
||||
- aging_rate: 老龄化率 (%)
|
||||
- crude_mortality_rate: 粗死亡率 (‰)
|
||||
- elderly_mortality_rate: 65岁以上老年人死亡率 (‰)
|
||||
|
||||
Returns:
|
||||
DataFrame,每个城市每年一行
|
||||
"""
|
||||
mortality_df = pd.DataFrame(HENAN_MORTALITY)
|
||||
rows = []
|
||||
|
||||
for city_key, city_info in CITIES.items():
|
||||
pop = POPULATION_DATA[city_key]
|
||||
for _, row in mortality_df.iterrows():
|
||||
rows.append({
|
||||
"year": int(row["year"]),
|
||||
"city": city_key,
|
||||
"city_name": city_info["name"],
|
||||
"total_population": pop["total"],
|
||||
"elderly_population": pop["age_65plus"],
|
||||
"aging_rate": pop["age_65plus_pct"],
|
||||
"crude_mortality_rate": row["crude_mortality"],
|
||||
"elderly_mortality_rate": row["elderly_mortality_65plus"],
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows)
|
||||
# 按城市和年份排序
|
||||
df = df.sort_values(["city", "year"]).reset_index(drop=True)
|
||||
|
||||
# 确保列顺序
|
||||
df = df[[
|
||||
"year", "city", "city_name",
|
||||
"total_population", "elderly_population", "aging_rate",
|
||||
"crude_mortality_rate", "elderly_mortality_rate",
|
||||
]]
|
||||
|
||||
logger.info("死亡率人口数据集已生成: %d 行 × %d 列", len(df), len(df.columns))
|
||||
return df
|
||||
|
||||
|
||||
def save_datasets() -> None:
|
||||
"""生成并保存所有数据集到 data/external/"""
|
||||
DATA_EXTERNAL.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 暴露反应曲线
|
||||
er_df = create_exposure_response_table()
|
||||
er_path = DATA_EXTERNAL / "exposure_response.csv"
|
||||
er_df.to_csv(er_path, index=False, encoding="utf-8-sig")
|
||||
logger.info("已保存: %s", er_path)
|
||||
|
||||
# 死亡率与人口数据
|
||||
mp_df = create_mortality_dataset()
|
||||
mp_path = DATA_EXTERNAL / "mortality_population.csv"
|
||||
mp_df.to_csv(mp_path, index=False, encoding="utf-8-sig")
|
||||
logger.info("已保存: %s", mp_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
save_datasets()
|
||||
Reference in New Issue
Block a user