{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 高温热浪与银发群体健康风险 -- 探索性数据分析\n", "焦作市 . 郑州市 | 2010-2024 年气象数据" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "from pathlib import Path\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "sns.set_style(\"whitegrid\")\n", "plt.rcParams[\"font.sans-serif\"] = [\"SimHei\", \"Microsoft YaHei\", \"DejaVu Sans\"]\n", "plt.rcParams[\"axes.unicode_minus\"] = False\n", "\n", "from src.utils.config import DATA_PROCESSED, DATA_EXTERNAL, OUTPUT_FIGURES, CITIES\n", "\n", "# 尝试加载数据\n", "try:\n", " df_jz = pd.read_csv(DATA_PROCESSED / \"jiaozuo_processed.csv\", parse_dates=[\"time\"])\n", " df_zz = pd.read_csv(DATA_PROCESSED / \"zhengzhou_processed.csv\", parse_dates=[\"time\"])\n", " df_combined = pd.read_csv(DATA_PROCESSED / \"combined_processed.csv\", parse_dates=[\"time\"])\n", " print(f\"焦作: {df_jz.shape[0]} 天, 郑州: {df_zz.shape[0]} 天\")\n", " data_loaded = True\n", "except FileNotFoundError:\n", " print(\"处理后的数据不存在,请先运行 preprocess.py\")\n", " print(\"将使用模拟数据演示分析框架\")\n", " data_loaded = False\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if data_loaded:\n", " fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n", " for ax, (df, name) in zip(axes, [(df_jz, \"焦作\"), (df_zz, \"郑州\")]):\n", " annual = df.groupby(df[\"time\"].dt.year)[\"temp_mean\"].agg([\"mean\", \"max\", \"min\"])\n", " annual.plot(ax=ax, color=[\"#ff9800\", \"#f44336\", \"#5b9bd5\"])\n", " ax.set_title(f\"{name} - 年均气温趋势\", fontsize=14)\n", " ax.set_ylabel(\"温度 (C)\")\n", " ax.set_xlabel(\"年份\")\n", " ax.legend([\"平均\", \"最高\", \"最低\"])\n", " fig.tight_layout()\n", " plt.savefig(OUTPUT_FIGURES / \"annual_temp_trend.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n", "else:\n", " print(\"需要数据文件\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if data_loaded:\n", " fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", " labels = [\"低\", \"中\", \"高\", \"严重\"]\n", " colors = [\"#00e676\", \"#ffeb3b\", \"#ff9800\", \"#f44336\"]\n", " for ax, (df, name) in zip(axes, [(df_jz, \"焦作\"), (df_zz, \"郑州\")]):\n", " counts = df[\"risk_label\"].value_counts().sort_index()\n", " values = [counts.get(i, 0) for i in range(4)]\n", " ax.bar(labels, values, color=colors)\n", " ax.set_title(f\"{name} - 风险等级分布\", fontsize=14)\n", " for i, v in enumerate(values):\n", " ax.text(i, v + max(values)*0.01, str(v), ha='center')\n", " fig.tight_layout()\n", " plt.savefig(OUTPUT_FIGURES / \"risk_distribution.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if data_loaded:\n", " for df, name in [(df_jz, \"焦作\"), (df_zz, \"郑州\")]:\n", " annual_hw = df.groupby(df[\"time\"].dt.year)[\"heatwave\"].sum()\n", " print(f\"\\n{name} 热浪天数统计:\")\n", " print(annual_hw.describe())\n", " print(f\" 年均热浪天数: {annual_hw.mean():.1f} 天\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "try:\n", " er = pd.read_csv(DATA_EXTERNAL / \"exposure_response.csv\")\n", " fig, ax = plt.subplots(figsize=(8, 5))\n", " ax.plot(er[\"percentile\"], er[\"rr\"], \"o-\", color=\"#f44336\", linewidth=2, markersize=8)\n", " ax.axhline(y=1.0, color=\"gray\", linestyle=\"--\", alpha=0.7)\n", " ax.set_xlabel(\"温度百分位数 (%)\", fontsize=12)\n", " ax.set_ylabel(\"相对风险 (RR)\", fontsize=12)\n", " ax.set_title(\"温度-老年人死亡率暴露反应曲线\\n(来源: Chen et al. 2018, Lancet Planet Health)\", fontsize=13)\n", " ax.fill_between(er[\"percentile\"], 1.0, er[\"rr\"], alpha=0.2, color=\"#f44336\")\n", " plt.tight_layout()\n", " plt.savefig(OUTPUT_FIGURES / \"exposure_response.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n", "except Exception as e:\n", " print(f\"无法加载暴露反应数据: {e}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if data_loaded:\n", " fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n", " for ax, (df, name) in zip(axes, [(df_jz, \"焦作\"), (df_zz, \"郑州\")]):\n", " monthly = df.groupby(df[\"time\"].dt.month)[\"temp_mean\"].agg([\"mean\", \"std\"])\n", " ax.fill_between(monthly.index, monthly[\"mean\"]-monthly[\"std\"],\n", " monthly[\"mean\"]+monthly[\"std\"], alpha=0.3, color=\"#ff9800\")\n", " ax.plot(monthly.index, monthly[\"mean\"], \"o-\", color=\"#f44336\", linewidth=2)\n", " ax.set_title(f\"{name} - 月均气温模式\", fontsize=14)\n", " ax.set_xlabel(\"月份\")\n", " ax.set_ylabel(\"温度 (C)\")\n", " ax.set_xticks(range(1, 13))\n", " fig.tight_layout()\n", " plt.savefig(OUTPUT_FIGURES / \"monthly_temp_pattern.png\", dpi=150, bbox_inches=\"tight\")\n", " plt.show()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## EDA 小结\n", "\n", "1. 郑州和焦作两市气温趋势高度一致,均呈缓慢上升趋势\n", "2. 夏季(6-8月)是高温热浪高发期,7月风险最高\n", "3. 风险等级分布呈长尾特征:低风险占多数,严重风险为稀有事件\n", "4. 温度-死亡率暴露反应曲线呈 J 型,高温端风险显著上升\n", "5. 两市老龄化率均在 11-13%,郑州老年人口绝对数量更大\n" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.13.13" } }, "nbformat": 4, "nbformat_minor": 5 }