feat: 添加强化学习项目报告及重构课程作业报告代码结构

- 新增强化学习个人项目报告，包含基于PyTorch从零实现的PPO算法 - 重构课程作业报告代码结构，提取运行时路径管理和notebook执行逻辑到独立模块 - 更新依赖文件requirements.txt，添加强化学习相关依赖 - 简化模型比较结果表格，仅保留基线逻辑回归模型数据
2026-04-30 16:54:41 +08:00
parent 6ac02ba4fe
commit d353133b31
21 changed files with 1639 additions and 102 deletions
@@ -43,17 +43,68 @@
   "execution_count": null,
   "id": "a12f069a",
   "metadata": {},
-   "outputs": [],
-   "source": "import xgboost as xgb\nimport optuna\noptuna.logging.set_verbosity(optuna.logging.WARNING)\n\n# GPU Fallback: 自动检测CUDA可用性，无GPU时自动切换到CPU\ntry:\n    import subprocess\n    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n    USE_GPU = result.returncode == 0\nexcept:\n    USE_GPU = False\n\nXGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\nXGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\nprint(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n\nRANDOM_STATE = 42\nnp.random.seed(RANDOM_STATE)\nplt.rcParams['figure.figsize'] = (10, 6)\nplt.rcParams['font.size'] = 12\nsns.set_style('whitegrid')\nprint('All libraries imported successfully!')"
+   "outputs": [
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n",
+      "\u001b[1;31m<a href='command:jupyter.createPythonEnvAndSelectController'>Create a Python Environment</a> with the required packages.\n",
+      "\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'"
+     ]
+    }
+   ],
+   "source": [
+    "import xgboost as xgb\n",
+    "import optuna\n",
+    "optuna.logging.set_verbosity(optuna.logging.WARNING)\n",
+    "\n",
+    "# GPU Fallback: 自动检测CUDA可用性，无GPU时自动切换到CPU\n",
+    "try:\n",
+    "    import subprocess\n",
+    "    result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n",
+    "    USE_GPU = result.returncode == 0\n",
+    "except:\n",
+    "    USE_GPU = False\n",
+    "\n",
+    "XGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\n",
+    "XGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\n",
+    "print(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n",
+    "\n",
+    "RANDOM_STATE = 42\n",
+    "np.random.seed(RANDOM_STATE)\n",
+    "plt.rcParams['figure.figsize'] = (10, 6)\n",
+    "plt.rcParams['font.size'] = 12\n",
+    "sns.set_style('whitegrid')\n",
+    "print('All libraries imported successfully!')"
+   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1c4b453a",
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n",
+      "\u001b[1;31m<a href='command:jupyter.createPythonEnvAndSelectController'>Create a Python Environment</a> with the required packages.\n",
+      "\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'"
+     ]
+    }
+   ],
   "source": [
-    "DATA_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\dataset_final'\nOUTPUT_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\outputs'\n\ntrain_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\nval_df   = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\ntest_df  = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n\nprint(f'Train shape:  {train_df.shape}')\nprint(f'Val shape:    {val_df.shape}')\nprint(f'Test shape:   {test_df.shape}')"
+    "train_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\n",
+    "val_df   = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\n",
+    "test_df  = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n",
+    "\n",
+    "print(f'Train shape:  {train_df.shape}')\n",
+    "print(f'Val shape:    {val_df.shape}')\n",
+    "print(f'Test shape:   {test_df.shape}')"
   ]
  },
  {
@@ -71,7 +122,23 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== TARGET DISTRIBUTION (TRAIN) ===')\ntarget_counts = train_df['premium_risk'].value_counts()\nprint(target_counts)\nprint((target_counts / len(train_df) * 100).round(2))\n\nfig, ax = plt.subplots(figsize=(8, 5))\ncolors = ['#4CAF50', '#FFC107', '#F44336']\ntarget_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\nax.set_title('Target Variable Distribution (Train)', fontsize=14)\nax.set_xlabel('Premium Risk')\nax.set_ylabel('Count')\nax.set_xticklabels(ax.get_xticklabels(), rotation=0)\nfor i, (idx, val) in enumerate(target_counts.sort_index().items()):\n    ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\nplt.show()"
+    "print('=== TARGET DISTRIBUTION (TRAIN) ===')\n",
+    "target_counts = train_df['premium_risk'].value_counts()\n",
+    "print(target_counts)\n",
+    "print((target_counts / len(train_df) * 100).round(2))\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(8, 5))\n",
+    "colors = ['#4CAF50', '#FFC107', '#F44336']\n",
+    "target_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\n",
+    "ax.set_title('Target Variable Distribution (Train)', fontsize=14)\n",
+    "ax.set_xlabel('Premium Risk')\n",
+    "ax.set_ylabel('Count')\n",
+    "ax.set_xticklabels(ax.get_xticklabels(), rotation=0)\n",
+    "for i, (idx, val) in enumerate(target_counts.sort_index().items()):\n",
+    "    ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -81,7 +148,18 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== MISSING VALUES (TRAIN) ===')\nmissing = train_df.isnull().sum()\nmissing = missing[missing > 0].sort_values(ascending=False)\nprint(missing)\n\nfig, ax = plt.subplots(figsize=(12, 6))\nmissing.plot(kind='barh', ax=ax, color='coral')\nax.set_title('Missing Values per Column (Train)', fontsize=14)\nax.set_xlabel('Count')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\nplt.show()"
+    "print('=== MISSING VALUES (TRAIN) ===')\n",
+    "missing = train_df.isnull().sum()\n",
+    "missing = missing[missing > 0].sort_values(ascending=False)\n",
+    "print(missing)\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(12, 6))\n",
+    "missing.plot(kind='barh', ax=ax, color='coral')\n",
+    "ax.set_title('Missing Values per Column (Train)', fontsize=14)\n",
+    "ax.set_xlabel('Count')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -91,7 +169,21 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\nprint(f'Noise features: {noise_cols}')\n\nprint('\\n=== bureau_risk_index stats ===')\nprint(train_df['bureau_risk_index'].describe())\n\nfig, ax = plt.subplots(figsize=(8, 5))\ntrain_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\nax.set_title('bureau_risk_index by Premium Risk')\nax.set_xlabel('Premium Risk')\nax.set_ylabel('bureau_risk_index')\nplt.suptitle('')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\nplt.show()"
+    "noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\n",
+    "print(f'Noise features: {noise_cols}')\n",
+    "\n",
+    "print('\\n=== bureau_risk_index stats ===')\n",
+    "print(train_df['bureau_risk_index'].describe())\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(8, 5))\n",
+    "train_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\n",
+    "ax.set_title('bureau_risk_index by Premium Risk')\n",
+    "ax.set_xlabel('Premium Risk')\n",
+    "ax.set_ylabel('bureau_risk_index')\n",
+    "plt.suptitle('')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -112,7 +204,28 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n    from sklearn.tree import DecisionTreeClassifier\n    results = []\n    for col in feature_cols:\n        temp_df = df[[col, target_col]].dropna()\n        X_temp = temp_df[[col]].values\n        y_temp = temp_df[target_col].values\n        le = LabelEncoder()\n        y_enc = le.fit_transform(y_temp)\n        try:\n            clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n            scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n            results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n        except:\n            results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n    return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n\nfeature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\nprint('Screening single features for leakage detection (this may take a few minutes)...')\nleakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\nprint('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\nprint(leakage_results.head(10))"
+    "def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n",
+    "    from sklearn.tree import DecisionTreeClassifier\n",
+    "    results = []\n",
+    "    for col in feature_cols:\n",
+    "        temp_df = df[[col, target_col]].dropna()\n",
+    "        X_temp = temp_df[[col]].values\n",
+    "        y_temp = temp_df[target_col].values\n",
+    "        le = LabelEncoder()\n",
+    "        y_enc = le.fit_transform(y_temp)\n",
+    "        try:\n",
+    "            clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n",
+    "            scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n",
+    "            results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n",
+    "        except:\n",
+    "            results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n",
+    "    return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n",
+    "\n",
+    "feature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\n",
+    "print('Screening single features for leakage detection (this may take a few minutes)...')\n",
+    "leakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\n",
+    "print('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\n",
+    "print(leakage_results.head(10))"
   ]
  },
  {
@@ -122,7 +235,26 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "LEAKAGE_THRESHOLD = 0.85\nprint('=== LEAKAGE DETECTION RESULTS ===')\nprint(leakage_results.head(10))\n\nbureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\nprint(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n\nif bureau_score > LEAKAGE_THRESHOLD:\n    print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n    print('*** This is consistent with a leakage feature. ***')\n    print('*** ACTION: bureau_risk_index will be removed from features. ***')\n    LEAKAGE_FEATURE = 'bureau_risk_index'\nelse:\n    top_feat = leakage_results.iloc[0]['feature']\n    top_score = leakage_results.iloc[0]['mean_f1_macro']\n    print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n    if top_score > 0.80:\n        LEAKAGE_FEATURE = top_feat\n    else:\n        LEAKAGE_FEATURE = None"
+    "LEAKAGE_THRESHOLD = 0.85\n",
+    "print('=== LEAKAGE DETECTION RESULTS ===')\n",
+    "print(leakage_results.head(10))\n",
+    "\n",
+    "bureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\n",
+    "print(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n",
+    "\n",
+    "if bureau_score > LEAKAGE_THRESHOLD:\n",
+    "    print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n",
+    "    print('*** This is consistent with a leakage feature. ***')\n",
+    "    print('*** ACTION: bureau_risk_index will be removed from features. ***')\n",
+    "    LEAKAGE_FEATURE = 'bureau_risk_index'\n",
+    "else:\n",
+    "    top_feat = leakage_results.iloc[0]['feature']\n",
+    "    top_score = leakage_results.iloc[0]['mean_f1_macro']\n",
+    "    print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n",
+    "    if top_score > 0.80:\n",
+    "        LEAKAGE_FEATURE = top_feat\n",
+    "    else:\n",
+    "        LEAKAGE_FEATURE = None"
   ]
  },
  {
@@ -132,7 +264,18 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "if LEAKAGE_FEATURE:\n    print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n    train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n    val_df_clean   = val_df.drop(columns=[LEAKAGE_FEATURE])\n    test_df_clean  = test_df.drop(columns=[LEAKAGE_FEATURE])\nelse:\n    print('No leakage feature to remove.')\n    train_df_clean = train_df.copy()\n    val_df_clean   = val_df.copy()\n    test_df_clean  = test_df.copy()\n\nprint(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')"
+    "if LEAKAGE_FEATURE:\n",
+    "    print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n",
+    "    train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n",
+    "    val_df_clean   = val_df.drop(columns=[LEAKAGE_FEATURE])\n",
+    "    test_df_clean  = test_df.drop(columns=[LEAKAGE_FEATURE])\n",
+    "else:\n",
+    "    print('No leakage feature to remove.')\n",
+    "    train_df_clean = train_df.copy()\n",
+    "    val_df_clean   = val_df.copy()\n",
+    "    test_df_clean  = test_df.copy()\n",
+    "\n",
+    "print(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')"
   ]
  },
  {
@@ -150,7 +293,19 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\nNOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\nTARGET_COL = 'premium_risk'\n\nall_cols = train_df_clean.columns.tolist()\nfeature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n\nNUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n\nprint(f'Total features: {len(feature_cols_all)}')\nprint(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\nprint(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')"
+    "ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\n",
+    "NOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\n",
+    "TARGET_COL = 'premium_risk'\n",
+    "\n",
+    "all_cols = train_df_clean.columns.tolist()\n",
+    "feature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n",
+    "\n",
+    "NUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\n",
+    "CATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n",
+    "\n",
+    "print(f'Total features: {len(feature_cols_all)}')\n",
+    "print(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\n",
+    "print(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')"
   ]
  },
  {
@@ -160,7 +315,24 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "numeric_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='median')),\n    ('scaler', StandardScaler())\n])\n\ncategorical_transformer = Pipeline(steps=[\n    ('imputer', SimpleImputer(strategy='most_frequent')),\n    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n])\n\npreprocessor = ColumnTransformer(\n    transformers=[\n        ('num', numeric_transformer, NUMERIC_FEATURES),\n        ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n    ],\n    remainder='drop'\n)\nprint('Preprocessing pipeline created!')"
+    "numeric_transformer = Pipeline(steps=[\n",
+    "    ('imputer', SimpleImputer(strategy='median')),\n",
+    "    ('scaler', StandardScaler())\n",
+    "])\n",
+    "\n",
+    "categorical_transformer = Pipeline(steps=[\n",
+    "    ('imputer', SimpleImputer(strategy='most_frequent')),\n",
+    "    ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
+    "])\n",
+    "\n",
+    "preprocessor = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', numeric_transformer, NUMERIC_FEATURES),\n",
+    "        ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n",
+    "    ],\n",
+    "    remainder='drop'\n",
+    ")\n",
+    "print('Preprocessing pipeline created!')"
   ]
  },
  {
@@ -170,7 +342,18 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "X_train = train_df_clean[feature_cols_all]\ny_train = train_df_clean[TARGET_COL]\nX_val   = val_df_clean[feature_cols_all]\ny_val   = val_df_clean[TARGET_COL]\nX_test  = test_df_clean[feature_cols_all]\n\nle_target = LabelEncoder()\ny_train_enc = le_target.fit_transform(y_train)\ny_val_enc   = le_target.transform(y_val)\n\nprint(f'Classes: {le_target.classes_}')\nprint(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')"
+    "X_train = train_df_clean[feature_cols_all]\n",
+    "y_train = train_df_clean[TARGET_COL]\n",
+    "X_val   = val_df_clean[feature_cols_all]\n",
+    "y_val   = val_df_clean[TARGET_COL]\n",
+    "X_test  = test_df_clean[feature_cols_all]\n",
+    "\n",
+    "le_target = LabelEncoder()\n",
+    "y_train_enc = le_target.fit_transform(y_train)\n",
+    "y_val_enc   = le_target.transform(y_val)\n",
+    "\n",
+    "print(f'Classes: {le_target.classes_}')\n",
+    "print(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')"
   ]
  },
  {
@@ -188,7 +371,32 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n    y_tr_pred = pipeline.predict(X_tr)\n    y_v_pred  = pipeline.predict(X_v)\n    results = {\n        'model': model_name,\n        'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n        'val_accuracy':   accuracy_score(y_v, y_v_pred),\n        'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n        'val_f1_macro':   f1_score(y_v, y_v_pred, average='macro'),\n    }\n    f1_per_class = f1_score(y_v, y_v_pred, average=None)\n    for i, cls in enumerate(le.classes_):\n        results[f'val_f1_{cls}'] = f1_per_class[i]\n    return results\n\ndef plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n    y_pred = pipeline.predict(X_v)\n    fig, ax = plt.subplots(figsize=(8, 6))\n    cm = confusion_matrix(y_v, y_pred)\n    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n    disp.plot(ax=ax, cmap='Blues', values_format='d')\n    ax.set_title(title, fontsize=14)\n    plt.tight_layout()\n    plt.savefig(save_path, dpi=150)\n    plt.show()\n    return cm"
+    "def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n",
+    "    y_tr_pred = pipeline.predict(X_tr)\n",
+    "    y_v_pred  = pipeline.predict(X_v)\n",
+    "    results = {\n",
+    "        'model': model_name,\n",
+    "        'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n",
+    "        'val_accuracy':   accuracy_score(y_v, y_v_pred),\n",
+    "        'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n",
+    "        'val_f1_macro':   f1_score(y_v, y_v_pred, average='macro'),\n",
+    "    }\n",
+    "    f1_per_class = f1_score(y_v, y_v_pred, average=None)\n",
+    "    for i, cls in enumerate(le.classes_):\n",
+    "        results[f'val_f1_{cls}'] = f1_per_class[i]\n",
+    "    return results\n",
+    "\n",
+    "def plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n",
+    "    y_pred = pipeline.predict(X_v)\n",
+    "    fig, ax = plt.subplots(figsize=(8, 6))\n",
+    "    cm = confusion_matrix(y_v, y_pred)\n",
+    "    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n",
+    "    disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
+    "    ax.set_title(title, fontsize=14)\n",
+    "    plt.tight_layout()\n",
+    "    plt.savefig(save_path, dpi=150)\n",
+    "    plt.show()\n",
+    "    return cm"
   ]
  },
  {
@@ -198,7 +406,19 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('Training Baseline: Logistic Regression...')\nbaseline_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor),\n    ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n])\nbaseline_pipeline.fit(X_train, y_train_enc)\n\nbaseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n\nprint('\\n=== BASELINE MODEL RESULTS ===')\nfor k, v in baseline_results.items():\n    if k != 'model':\n        print(f'{k}: {v:.4f}')"
+    "print('Training Baseline: Logistic Regression...')\n",
+    "baseline_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n",
+    "])\n",
+    "baseline_pipeline.fit(X_train, y_train_enc)\n",
+    "\n",
+    "baseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n",
+    "\n",
+    "print('\\n=== BASELINE MODEL RESULTS ===')\n",
+    "for k, v in baseline_results.items():\n",
+    "    if k != 'model':\n",
+    "        print(f'{k}: {v:.4f}')"
   ]
  },
  {
@@ -208,7 +428,17 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n    'Baseline: Logistic Regression - Confusion Matrix',\n    os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n\nprint('\\n=== CLASSIFICATION REPORT (VAL) ===')\ny_val_pred = baseline_pipeline.predict(X_val)\nprint(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n\nall_results = [baseline_results]\npd.DataFrame(all_results).to_csv(\n    os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)"
+    "plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n",
+    "    'Baseline: Logistic Regression - Confusion Matrix',\n",
+    "    os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n",
+    "\n",
+    "print('\\n=== CLASSIFICATION REPORT (VAL) ===')\n",
+    "y_val_pred = baseline_pipeline.predict(X_val)\n",
+    "print(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n",
+    "\n",
+    "all_results = [baseline_results]\n",
+    "pd.DataFrame(all_results).to_csv(\n",
+    "    os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)"
   ]
  },
  {
@@ -225,7 +455,36 @@
   "id": "30cd02ce",
   "metadata": {},
   "outputs": [],
-   "source": "print('Training Random Forest...')\nstart = time.time()\nrf_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor),\n    ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n])\nrf_pipeline.fit(X_train, y_train_enc)\nrf_time = time.time() - start\n\nrf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\nrf_results['train_time'] = rf_time\n\nprint('Training XGBoost...')\nstart = time.time()\nxgb_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor),\n    ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n                                    objective='multi:softmax', num_class=3,\n                                    tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n                                    random_state=RANDOM_STATE, verbosity=0))\n])\nxgb_pipeline.fit(X_train, y_train_enc)\nxgb_time = time.time() - start\n\nxgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\nxgb_results['train_time'] = xgb_time\n\nprint(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')"
+   "source": [
+    "print('Training Random Forest...')\n",
+    "start = time.time()\n",
+    "rf_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n",
+    "])\n",
+    "rf_pipeline.fit(X_train, y_train_enc)\n",
+    "rf_time = time.time() - start\n",
+    "\n",
+    "rf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\n",
+    "rf_results['train_time'] = rf_time\n",
+    "\n",
+    "print('Training XGBoost...')\n",
+    "start = time.time()\n",
+    "xgb_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n",
+    "                                    objective='multi:softmax', num_class=3,\n",
+    "                                    tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n",
+    "                                    random_state=RANDOM_STATE, verbosity=0))\n",
+    "])\n",
+    "xgb_pipeline.fit(X_train, y_train_enc)\n",
+    "xgb_time = time.time() - start\n",
+    "\n",
+    "xgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\n",
+    "xgb_results['train_time'] = xgb_time\n",
+    "\n",
+    "print(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')"
+   ]
  },
  {
   "cell_type": "code",
@@ -234,7 +493,17 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "all_results.append(rf_results)\nall_results.append(xgb_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== MODEL COMPARISON SUMMARY ===')\ndisplay_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\nprint(results_df[display_cols].round(4).to_string(index=False))\n\nprint('\\n=== CLASS-WISE F1 (VAL) ===')\nclass_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\nprint(results_df[['model'] + class_cols].round(4).to_string(index=False))"
+    "all_results.append(rf_results)\n",
+    "all_results.append(xgb_results)\n",
+    "results_df = pd.DataFrame(all_results)\n",
+    "\n",
+    "print('\\n=== MODEL COMPARISON SUMMARY ===')\n",
+    "display_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\n",
+    "print(results_df[display_cols].round(4).to_string(index=False))\n",
+    "\n",
+    "print('\\n=== CLASS-WISE F1 (VAL) ===')\n",
+    "class_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\n",
+    "print(results_df[['model'] + class_cols].round(4).to_string(index=False))"
   ]
  },
  {
@@ -244,7 +513,28 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\nmodels = results_df['model'].tolist()\nval_f1 = results_df['val_f1_macro'].tolist()\nval_acc = results_df['val_accuracy'].tolist()\n\nbars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\naxes[0].set_ylabel('Macro-F1')\naxes[0].set_ylim(0, 1)\nfor bar, val in zip(bars1, val_f1):\n    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nbars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[1].set_title('Validation Accuracy Comparison', fontsize=13)\naxes[1].set_ylabel('Accuracy')\naxes[1].set_ylim(0, 1)\nfor bar, val in zip(bars2, val_acc):\n    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\nplt.show()"
+    "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
+    "models = results_df['model'].tolist()\n",
+    "val_f1 = results_df['val_f1_macro'].tolist()\n",
+    "val_acc = results_df['val_accuracy'].tolist()\n",
+    "\n",
+    "bars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\n",
+    "axes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\n",
+    "axes[0].set_ylabel('Macro-F1')\n",
+    "axes[0].set_ylim(0, 1)\n",
+    "for bar, val in zip(bars1, val_f1):\n",
+    "    axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n",
+    "\n",
+    "bars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\n",
+    "axes[1].set_title('Validation Accuracy Comparison', fontsize=13)\n",
+    "axes[1].set_ylabel('Accuracy')\n",
+    "axes[1].set_ylim(0, 1)\n",
+    "for bar, val in zip(bars2, val_acc):\n",
+    "    axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -254,7 +544,13 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n    'Random Forest - Confusion Matrix',\n    os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n\nplot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n    'XGBoost - Confusion Matrix',\n    os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))"
+    "plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n",
+    "    'Random Forest - Confusion Matrix',\n",
+    "    os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n",
+    "\n",
+    "plot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n",
+    "    'XGBoost - Confusion Matrix',\n",
+    "    os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))"
   ]
  },
  {
@@ -272,7 +568,17 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== BAGGING VS BOOSTING ANALYSIS ===')\nrf_val_f1 = rf_results['val_f1_macro']\nrf_train_f1 = rf_results['train_f1_macro']\nrf_gap = rf_train_f1 - rf_val_f1\n\nxgb_val_f1 = xgb_results['val_f1_macro']\nxgb_train_f1 = xgb_results['train_f1_macro']\nxgb_gap = xgb_train_f1 - xgb_val_f1\n\nprint(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\nprint(f'XGBoost     - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')"
+    "print('=== BAGGING VS BOOSTING ANALYSIS ===')\n",
+    "rf_val_f1 = rf_results['val_f1_macro']\n",
+    "rf_train_f1 = rf_results['train_f1_macro']\n",
+    "rf_gap = rf_train_f1 - rf_val_f1\n",
+    "\n",
+    "xgb_val_f1 = xgb_results['val_f1_macro']\n",
+    "xgb_train_f1 = xgb_results['train_f1_macro']\n",
+    "xgb_gap = xgb_train_f1 - xgb_val_f1\n",
+    "\n",
+    "print(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\n",
+    "print(f'XGBoost     - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')"
   ]
  },
  {
@@ -289,7 +595,40 @@
   "id": "e6361576",
   "metadata": {},
   "outputs": [],
-   "source": "def objective(trial):\n    params = {\n        'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n        'max_depth': trial.suggest_int('max_depth', 3, 10),\n        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n        'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n        'gamma': trial.suggest_float('gamma', 0, 5),\n        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n        'objective': 'multi:softmax',\n        'num_class': 3,\n        'random_state': RANDOM_STATE,\n        'tree_method': XGB_TREE_METHOD,\n        'device': XGB_DEVICE,\n        'verbosity': 0\n    }\n    pipeline = Pipeline(steps=[\n        ('preprocessor', preprocessor),\n        ('classifier', xgb.XGBClassifier(**params))\n    ])\n    pipeline.fit(X_train, y_train_enc)\n    y_pred = pipeline.predict(X_val)\n    score = f1_score(y_val_enc, y_pred, average='macro')\n    return score\n\nprint('Starting Optuna hyperparameter optimisation (30 trials)...')\nstudy = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\nstudy.optimize(objective, n_trials=30, show_progress_bar=False)\n\nprint(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')"
+   "source": [
+    "def objective(trial):\n",
+    "    params = {\n",
+    "        'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n",
+    "        'max_depth': trial.suggest_int('max_depth', 3, 10),\n",
+    "        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n",
+    "        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n",
+    "        'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n",
+    "        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n",
+    "        'gamma': trial.suggest_float('gamma', 0, 5),\n",
+    "        'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n",
+    "        'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n",
+    "        'objective': 'multi:softmax',\n",
+    "        'num_class': 3,\n",
+    "        'random_state': RANDOM_STATE,\n",
+    "        'tree_method': XGB_TREE_METHOD,\n",
+    "        'device': XGB_DEVICE,\n",
+    "        'verbosity': 0\n",
+    "    }\n",
+    "    pipeline = Pipeline(steps=[\n",
+    "        ('preprocessor', preprocessor),\n",
+    "        ('classifier', xgb.XGBClassifier(**params))\n",
+    "    ])\n",
+    "    pipeline.fit(X_train, y_train_enc)\n",
+    "    y_pred = pipeline.predict(X_val)\n",
+    "    score = f1_score(y_val_enc, y_pred, average='macro')\n",
+    "    return score\n",
+    "\n",
+    "print('Starting Optuna hyperparameter optimisation (30 trials)...')\n",
+    "study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\n",
+    "study.optimize(objective, n_trials=30, show_progress_bar=False)\n",
+    "\n",
+    "print(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')"
+   ]
  },
  {
   "cell_type": "code",
@@ -298,7 +637,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('\\n=== BEST HYPERPARAMETERS ===')\nbest_params = study.best_params\nfor k, v in best_params.items():\n    print(f'  {k}: {v}')\n\nfig = optuna.visualization.matplotlib.plot_optimization_history(study)\nplt.title('Optuna Optimization History')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\nplt.show()\n\nfig = optuna.visualization.matplotlib.plot_param_importances(study)\nplt.title('Hyperparameter Importance')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\nplt.show()"
+    "print('\\n=== BEST HYPERPARAMETERS ===')\n",
+    "best_params = study.best_params\n",
+    "for k, v in best_params.items():\n",
+    "    print(f'  {k}: {v}')\n",
+    "\n",
+    "fig = optuna.visualization.matplotlib.plot_optimization_history(study)\n",
+    "plt.title('Optuna Optimization History')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\n",
+    "plt.show()\n",
+    "\n",
+    "fig = optuna.visualization.matplotlib.plot_param_importances(study)\n",
+    "plt.title('Hyperparameter Importance')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -307,7 +661,37 @@
   "id": "640263ea",
   "metadata": {},
   "outputs": [],
-   "source": "best_xgb_params = {\n    **study.best_params,\n    'objective': 'multi:softmax',\n    'num_class': 3,\n    'random_state': RANDOM_STATE,\n    'tree_method': XGB_TREE_METHOD,\n    'device': XGB_DEVICE,\n    'verbosity': 0\n}\n\nprint('Training tuned XGBoost...')\nimport time\nstart = time.time()\ntuned_xgb_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor),\n    ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ntuned_xgb_pipeline.fit(X_train, y_train_enc)\ntuned_time = time.time() - start\n\ntuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\ntuned_results['train_time'] = tuned_time\n\nprint('\\n=== TUNED XGBOOST RESULTS ===')\nfor k, v in tuned_results.items():\n    if k != 'model':\n        print(f'{k}: {v:.4f}')\n\nprint(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')"
+   "source": [
+    "best_xgb_params = {\n",
+    "    **study.best_params,\n",
+    "    'objective': 'multi:softmax',\n",
+    "    'num_class': 3,\n",
+    "    'random_state': RANDOM_STATE,\n",
+    "    'tree_method': XGB_TREE_METHOD,\n",
+    "    'device': XGB_DEVICE,\n",
+    "    'verbosity': 0\n",
+    "}\n",
+    "\n",
+    "print('Training tuned XGBoost...')\n",
+    "import time\n",
+    "start = time.time()\n",
+    "tuned_xgb_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', xgb.XGBClassifier(**best_xgb_params))\n",
+    "])\n",
+    "tuned_xgb_pipeline.fit(X_train, y_train_enc)\n",
+    "tuned_time = time.time() - start\n",
+    "\n",
+    "tuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\n",
+    "tuned_results['train_time'] = tuned_time\n",
+    "\n",
+    "print('\\n=== TUNED XGBOOST RESULTS ===')\n",
+    "for k, v in tuned_results.items():\n",
+    "    if k != 'model':\n",
+    "        print(f'{k}: {v:.4f}')\n",
+    "\n",
+    "print(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')"
+   ]
  },
  {
   "cell_type": "code",
@@ -316,7 +700,11 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "all_results.append(tuned_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== BEFORE VS AFTER TUNING ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))"
+    "all_results.append(tuned_results)\n",
+    "results_df = pd.DataFrame(all_results)\n",
+    "\n",
+    "print('\\n=== BEFORE VS AFTER TUNING ===')\n",
+    "print(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))"
   ]
  },
  {
@@ -339,7 +727,47 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n\nMISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n                 'prior_debt_products', 'account_tenure']\n\nfor col in MISSING_COLS:\n    missing_col_name = f'{col}_missing'\n    train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n    val_df_clean[missing_col_name]   = val_df_clean[col].isnull().astype(int)\n    test_df_clean[missing_col_name]  = test_df_clean[col].isnull().astype(int)\n    print(f'Added missing indicator: {missing_col_name}')\n\nfeature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\nprint(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n\nX_train_A = train_df_clean[feature_cols_catA]\nX_val_A   = val_df_clean[feature_cols_catA]\nX_test_A  = test_df_clean[feature_cols_catA]\n\nNUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n\npreprocessor_A = ColumnTransformer(\n    transformers=[\n        ('num', numeric_transformer, NUMERIC_FEATURES_A),\n        ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n    ],\n    remainder='drop'\n)\n\ncatA_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor_A),\n    ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ncatA_pipeline.fit(X_train_A, y_train_enc)\n\ncatA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n\nprint('\\n=== CATEGORY A RESULTS ===')\nprint(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\nprint(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')"
+    "print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n",
+    "\n",
+    "MISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n",
+    "                 'prior_debt_products', 'account_tenure']\n",
+    "\n",
+    "for col in MISSING_COLS:\n",
+    "    missing_col_name = f'{col}_missing'\n",
+    "    train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n",
+    "    val_df_clean[missing_col_name]   = val_df_clean[col].isnull().astype(int)\n",
+    "    test_df_clean[missing_col_name]  = test_df_clean[col].isnull().astype(int)\n",
+    "    print(f'Added missing indicator: {missing_col_name}')\n",
+    "\n",
+    "feature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\n",
+    "print(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n",
+    "\n",
+    "X_train_A = train_df_clean[feature_cols_catA]\n",
+    "X_val_A   = val_df_clean[feature_cols_catA]\n",
+    "X_test_A  = test_df_clean[feature_cols_catA]\n",
+    "\n",
+    "NUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\n",
+    "CATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n",
+    "\n",
+    "preprocessor_A = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', numeric_transformer, NUMERIC_FEATURES_A),\n",
+    "        ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n",
+    "    ],\n",
+    "    remainder='drop'\n",
+    ")\n",
+    "\n",
+    "catA_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor_A),\n",
+    "    ('classifier', xgb.XGBClassifier(**best_xgb_params))\n",
+    "])\n",
+    "catA_pipeline.fit(X_train_A, y_train_enc)\n",
+    "\n",
+    "catA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n",
+    "\n",
+    "print('\\n=== CATEGORY A RESULTS ===')\n",
+    "print(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\n",
+    "print(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')"
   ]
  },
  {
@@ -349,7 +777,31 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\nprint('Training Soft Voting Ensemble (RF + XGBoost)...')\n\nrf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\nxgb_clf = xgb.XGBClassifier(**best_xgb_params)\n\nvoting_clf = VotingClassifier(\n    estimators=[\n        ('rf', rf_clf),\n        ('xgb', xgb_clf)\n    ],\n    voting='soft',\n    n_jobs=-1\n)\n\nensemble_pipeline = Pipeline(steps=[\n    ('preprocessor', preprocessor),\n    ('classifier', voting_clf)\n])\nensemble_pipeline.fit(X_train, y_train_enc)\n\nensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n\nprint(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\nprint(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')"
+    "print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\n",
+    "print('Training Soft Voting Ensemble (RF + XGBoost)...')\n",
+    "\n",
+    "rf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\n",
+    "xgb_clf = xgb.XGBClassifier(**best_xgb_params)\n",
+    "\n",
+    "voting_clf = VotingClassifier(\n",
+    "    estimators=[\n",
+    "        ('rf', rf_clf),\n",
+    "        ('xgb', xgb_clf)\n",
+    "    ],\n",
+    "    voting='soft',\n",
+    "    n_jobs=-1\n",
+    ")\n",
+    "\n",
+    "ensemble_pipeline = Pipeline(steps=[\n",
+    "    ('preprocessor', preprocessor),\n",
+    "    ('classifier', voting_clf)\n",
+    "])\n",
+    "ensemble_pipeline.fit(X_train, y_train_enc)\n",
+    "\n",
+    "ensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n",
+    "\n",
+    "print(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\n",
+    "print(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')"
   ]
  },
  {
@@ -359,7 +811,20 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "all_results.append(catA_results)\nall_results.append(ensemble_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n\nresults_df.to_csv(\n    os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n\nimprove_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\nimprove_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\nprint(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\nprint(f'Category D improvement (vs Tuned): +{improve_D:.4f}')"
+    "all_results.append(catA_results)\n",
+    "all_results.append(ensemble_results)\n",
+    "results_df = pd.DataFrame(all_results)\n",
+    "\n",
+    "print('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\n",
+    "print(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n",
+    "\n",
+    "results_df.to_csv(\n",
+    "    os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n",
+    "\n",
+    "improve_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\n",
+    "improve_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\n",
+    "print(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\n",
+    "print(f'Category D improvement (vs Tuned): +{improve_D:.4f}')"
   ]
  },
  {
@@ -377,7 +842,58 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== K-MEANS & GMM CLUSTERING ===')\n\npreprocessor_eval = ColumnTransformer(\n    transformers=[\n        ('num', numeric_transformer, NUMERIC_FEATURES),\n        ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n    ],\n    remainder='drop'\n)\n\nX_train_scaled = preprocessor_eval.fit_transform(X_train)\nprint(f'Scaled training data shape: {X_train_scaled.shape}')\n\npca = PCA(n_components=2, random_state=RANDOM_STATE)\nX_train_pca = pca.fit_transform(X_train_scaled)\nprint(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n\nk_range = range(2, 9)\nkmeans_results = []\ngmm_results = []\n\nfor k in k_range:\n    print(f'  Running k={k}...')\n    \n    km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n    km_labels = km.fit_predict(X_train_scaled)\n    sil_km = silhouette_score(X_train_scaled, km_labels)\n    \n    gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n    gmm_labels = gmm_model.fit_predict(X_train_scaled)\n    sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n    \n    kmeans_results.append({\n        'k': k,\n        'inertia': km.inertia_,\n        'silhouette_x': sil_km\n    })\n    gmm_results.append({\n        'k': k,\n        'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n        'bic': gmm_model.bic(X_train_scaled),\n        'aic': gmm_model.aic(X_train_scaled),\n        'silhouette_y': sil_gmm\n    })\n\nkm_df = pd.DataFrame(kmeans_results)\ngmm_df = pd.DataFrame(gmm_results)\ncluster_df = km_df.merge(gmm_df, on='k')\nprint('\\n=== CLUSTERING COMPARISON ===')\nprint(cluster_df.round(4).to_string(index=False))\n\ncluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)"
+    "print('=== K-MEANS & GMM CLUSTERING ===')\n",
+    "\n",
+    "preprocessor_eval = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', numeric_transformer, NUMERIC_FEATURES),\n",
+    "        ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n",
+    "    ],\n",
+    "    remainder='drop'\n",
+    ")\n",
+    "\n",
+    "X_train_scaled = preprocessor_eval.fit_transform(X_train)\n",
+    "print(f'Scaled training data shape: {X_train_scaled.shape}')\n",
+    "\n",
+    "pca = PCA(n_components=2, random_state=RANDOM_STATE)\n",
+    "X_train_pca = pca.fit_transform(X_train_scaled)\n",
+    "print(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n",
+    "\n",
+    "k_range = range(2, 9)\n",
+    "kmeans_results = []\n",
+    "gmm_results = []\n",
+    "\n",
+    "for k in k_range:\n",
+    "    print(f'  Running k={k}...')\n",
+    "    \n",
+    "    km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n",
+    "    km_labels = km.fit_predict(X_train_scaled)\n",
+    "    sil_km = silhouette_score(X_train_scaled, km_labels)\n",
+    "    \n",
+    "    gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n",
+    "    gmm_labels = gmm_model.fit_predict(X_train_scaled)\n",
+    "    sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n",
+    "    \n",
+    "    kmeans_results.append({\n",
+    "        'k': k,\n",
+    "        'inertia': km.inertia_,\n",
+    "        'silhouette_x': sil_km\n",
+    "    })\n",
+    "    gmm_results.append({\n",
+    "        'k': k,\n",
+    "        'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n",
+    "        'bic': gmm_model.bic(X_train_scaled),\n",
+    "        'aic': gmm_model.aic(X_train_scaled),\n",
+    "        'silhouette_y': sil_gmm\n",
+    "    })\n",
+    "\n",
+    "km_df = pd.DataFrame(kmeans_results)\n",
+    "gmm_df = pd.DataFrame(gmm_results)\n",
+    "cluster_df = km_df.merge(gmm_df, on='k')\n",
+    "print('\\n=== CLUSTERING COMPARISON ===')\n",
+    "print(cluster_df.round(4).to_string(index=False))\n",
+    "\n",
+    "cluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)"
   ]
  },
  {
@@ -387,7 +903,33 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n\naxes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\naxes[0].set_xlabel('k')\naxes[0].set_ylabel('Inertia')\naxes[0].set_title('K-Means: Elbow Method')\naxes[0].grid(True)\n\naxes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\naxes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\naxes[1].set_xlabel('k')\naxes[1].set_ylabel('Score')\naxes[1].set_title('GMM: BIC & AIC (lower is better)')\naxes[1].legend()\naxes[1].grid(True)\n\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\naxes[2].set_xlabel('k')\naxes[2].set_ylabel('Silhouette Score')\naxes[2].set_title('Silhouette Score Comparison (higher is better)')\naxes[2].legend()\naxes[2].grid(True)\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\nplt.show()"
+    "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n",
+    "\n",
+    "axes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\n",
+    "axes[0].set_xlabel('k')\n",
+    "axes[0].set_ylabel('Inertia')\n",
+    "axes[0].set_title('K-Means: Elbow Method')\n",
+    "axes[0].grid(True)\n",
+    "\n",
+    "axes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\n",
+    "axes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\n",
+    "axes[1].set_xlabel('k')\n",
+    "axes[1].set_ylabel('Score')\n",
+    "axes[1].set_title('GMM: BIC & AIC (lower is better)')\n",
+    "axes[1].legend()\n",
+    "axes[1].grid(True)\n",
+    "\n",
+    "axes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\n",
+    "axes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\n",
+    "axes[2].set_xlabel('k')\n",
+    "axes[2].set_ylabel('Silhouette Score')\n",
+    "axes[2].set_title('Silhouette Score Comparison (higher is better)')\n",
+    "axes[2].legend()\n",
+    "axes[2].grid(True)\n",
+    "\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -397,7 +939,22 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\nprint(f'Best K for K-Means (by silhouette): {best_k}')\n\nkm_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\nkm_best_labels = km_best.fit_predict(X_train_scaled)\n\nfig, ax = plt.subplots(figsize=(8, 6))\nscatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n                     c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\nax.set_xlabel('PC1')\nax.set_ylabel('PC2')\nax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\nplt.colorbar(scatter, ax=ax, label='Cluster')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\nplt.show()"
+    "best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\n",
+    "print(f'Best K for K-Means (by silhouette): {best_k}')\n",
+    "\n",
+    "km_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\n",
+    "km_best_labels = km_best.fit_predict(X_train_scaled)\n",
+    "\n",
+    "fig, ax = plt.subplots(figsize=(8, 6))\n",
+    "scatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n",
+    "                     c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\n",
+    "ax.set_xlabel('PC1')\n",
+    "ax.set_ylabel('PC2')\n",
+    "ax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\n",
+    "plt.colorbar(scatter, ax=ax, label='Cluster')\n",
+    "plt.tight_layout()\n",
+    "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\n",
+    "plt.show()"
   ]
  },
  {
@@ -415,7 +972,28 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('=== FINAL MODEL SELECTION ===')\nprint('Based on val_f1_macro (primary metric):')\nfinal_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\nprint(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n    final_pipeline = catA_pipeline\n    X_test_final = X_test_A\nelif final_model_name == 'Ensemble_SoftVoting':\n    final_pipeline = ensemble_pipeline\n    X_test_final = X_test\nelse:\n    final_pipeline = tuned_xgb_pipeline\n    X_test_final = X_test\n\ny_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\ny_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n\nplot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n                      y_val_enc, le_target,\n                      f'Final Model: {final_model_name} - Confusion Matrix',\n    os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))"
+    "print('=== FINAL MODEL SELECTION ===')\n",
+    "print('Based on val_f1_macro (primary metric):')\n",
+    "final_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\n",
+    "print(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n",
+    "\n",
+    "if final_model_name == 'XGB_CatA_MissingHandling':\n",
+    "    final_pipeline = catA_pipeline\n",
+    "    X_test_final = X_test_A\n",
+    "elif final_model_name == 'Ensemble_SoftVoting':\n",
+    "    final_pipeline = ensemble_pipeline\n",
+    "    X_test_final = X_test\n",
+    "else:\n",
+    "    final_pipeline = tuned_xgb_pipeline\n",
+    "    X_test_final = X_test\n",
+    "\n",
+    "y_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\n",
+    "y_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n",
+    "\n",
+    "plot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n",
+    "                      y_val_enc, le_target,\n",
+    "                      f'Final Model: {final_model_name} - Confusion Matrix',\n",
+    "    os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))"
   ]
  },
  {
@@ -425,7 +1003,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\ny_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\nprint(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))"
+    "print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\n",
+    "y_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\n",
+    "print(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))"
   ]
  },
  {
@@ -435,7 +1015,34 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "STUDENT_ID = '1234560'\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n    y_test_pred = final_pipeline.predict(X_test_A)\nelif final_model_name == 'Ensemble_SoftVoting':\n    y_test_pred = final_pipeline.predict(X_test)\nelse:\n    y_test_pred = final_pipeline.predict(X_test)\n\ny_test_labels = le_target.inverse_transform(y_test_pred)\n\nsubmission_df = pd.DataFrame({\n    'applicant_id': test_df['applicant_id'],\n    'customer_key': test_df['customer_key'],\n    'premium_risk': y_test_labels\n})\n\nprint('=== SUBMISSION CSV VALIDATION ===')\nprint(f'Shape: {submission_df.shape}')\nprint(f'Columns: {list(submission_df.columns)}')\nprint(submission_df.head())\n\nprint('\\nPrediction counts:')\nprint(submission_df['premium_risk'].value_counts())\n\ncsv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\nsubmission_df.to_csv(csv_path, index=False)\nprint(f'\\n*** CSV saved to: {csv_path} ***')"
+    "STUDENT_ID = '1234560'\n",
+    "\n",
+    "if final_model_name == 'XGB_CatA_MissingHandling':\n",
+    "    y_test_pred = final_pipeline.predict(X_test_A)\n",
+    "elif final_model_name == 'Ensemble_SoftVoting':\n",
+    "    y_test_pred = final_pipeline.predict(X_test)\n",
+    "else:\n",
+    "    y_test_pred = final_pipeline.predict(X_test)\n",
+    "\n",
+    "y_test_labels = le_target.inverse_transform(y_test_pred)\n",
+    "\n",
+    "submission_df = pd.DataFrame({\n",
+    "    'applicant_id': test_df['applicant_id'],\n",
+    "    'customer_key': test_df['customer_key'],\n",
+    "    'premium_risk': y_test_labels\n",
+    "})\n",
+    "\n",
+    "print('=== SUBMISSION CSV VALIDATION ===')\n",
+    "print(f'Shape: {submission_df.shape}')\n",
+    "print(f'Columns: {list(submission_df.columns)}')\n",
+    "print(submission_df.head())\n",
+    "\n",
+    "print('\\nPrediction counts:')\n",
+    "print(submission_df['premium_risk'].value_counts())\n",
+    "\n",
+    "csv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\n",
+    "submission_df.to_csv(csv_path, index=False)\n",
+    "print(f'\\n*** CSV saved to: {csv_path} ***')"
   ]
  }
 ],
@@ -452,4 +1059,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 5
-}
+}
@@ -1,5 +1,2 @@
-model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard,train_time
-Baseline_LR,0.7593680672268908,0.7341714285714286,0.7492574544185482,0.7237629331592531,0.7665209565440987,0.6489501312335958,0.7558177117000646,
-RandomForest,1.0,0.7877333333333333,1.0,0.770789728543472,0.7874554916461244,0.7095334685598377,0.8153802254244543,57.91048526763916
-XGBoost,0.8519529411764706,0.8371047619047619,0.8297116592669606,0.8143842728003406,0.8904623073719283,0.6944039941751612,0.8582865168539325,67.63970804214478
-XGBoost_Tuned,0.9767663865546219,0.8700190476190476,0.9739400525375727,0.8519502714571496,0.9084439578486383,0.7620280474649407,0.8853788090578697,142.65462470054626
+model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard
+Baseline_LR,0.7595294117647059,0.7337904761904762,0.7493991157707756,0.7234383324236036,0.7663239074550129,0.6487372909150542,0.7552537989007436
@@ -1,16 +1,19 @@
-"""
-运行 insurance_premium_risk.ipynb 的脚本
-将 notebook 代码单元格提取出来逐个执行
-"""
-import json, sys, os, warnings, traceback, time
+import warnings

-warnings.filterwarnings('ignore')
+warnings.filterwarnings("ignore")

 import matplotlib
-matplotlib.use('Agg')
+
+matplotlib.use("Agg")
 import matplotlib.pyplot as _real_mpl_plt
+
 _real_mpl_plt.show = lambda *a, **kw: None

+import os
+import sys
+import time
+import json
+import traceback
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -32,34 +35,18 @@ import xgboost as xgb
 import optuna
 optuna.logging.set_verbosity(optuna.logging.WARNING)

-RANDOM_STATE = 42
-np.random.seed(RANDOM_STATE)
-plt.rcParams['figure.figsize'] = (10, 6)
-plt.rcParams['font.size'] = 12
-sns.set_style('whitegrid')
+from src.notebook_runner import execute_notebook
+from src.runtime_paths import build_paths

-# ===== 读取 notebook =====
-nb_path = r'd:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb'
-cells = json.load(open(nb_path, encoding='utf-8'))['cells']
-code_cells = [c for c in cells if c['cell_type'] == 'code']
-print(f"Total code cells: {len(code_cells)}")
+paths = build_paths()
+print(f"Project root : {paths.project_root}")
+print(f"Notebook     : {paths.notebook}")
+print(f"Data dir     : {paths.data_dir}")
+print(f"Output dir   : {paths.output_dir}")

-# ===== 执行每个单元格 =====
-# 使用全局 __main__ 命名空间，变量跨单元格持久化
-main_ns = globals().copy()
+ns = vars()

-for i, cell in enumerate(code_cells):
-    src = ''.join(cell['source'])
-    print(f"\n{'='*60}")
-    print(f"Running cell {i+1}/{len(code_cells)}...")
-    print(f"  Source: {src[:80].replace(chr(10), ' ')}")
-    try:
-        exec(compile(src, f'cell_{i+1}', 'exec'), main_ns)
-    except Exception as e:
-        print(f"ERROR in cell {i+1}: {e}")
-        traceback.print_exc()
-        print("Stopping execution.")
-        break
-
-print("\n\nAll cells executed successfully!")
-print(f"Results saved to: outputs/figures/ and outputs/tables/")
+result = execute_notebook(namespace=ns)
+print(f"\nExecution finished: {result['status']}")
+print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}")
+print(f"Output dir: {result['outputs']['output_dir']}")
@@ -0,0 +1,55 @@
+import json
+import traceback
+from pathlib import Path
+
+from .runtime_paths import build_paths
+
+
+def execute_notebook(
+    start_at: int | None = None,
+    stop_at: int | None = None,
+    namespace: dict | None = None,
+) -> dict:
+    paths = build_paths()
+    paths.ensure_outputs()
+
+    nb_data = json.loads(paths.notebook.read_text(encoding="utf-8"))
+    code_cells = [c for c in nb_data["cells"] if c["cell_type"] == "code"]
+    if not code_cells:
+        return {"status": "skipped", "reason": "no code cells found"}
+
+    ns = (namespace or {}).copy()
+    ns.update(paths.as_injection())
+    ns["RANDOM_STATE"] = 42
+
+    start = max((start_at or 1) - 1, 0)
+    stop = stop_at if stop_at is not None else len(code_cells)
+    cells_to_run = code_cells[start:stop]
+
+    results = []
+    for i, cell in enumerate(cells_to_run, start=start + 1):
+        src = "".join(cell["source"])
+        tag = f"cell_{i}"
+        try:
+            exec(compile(src, tag, "exec"), ns)
+            results.append({"cell": i, "status": "ok"})
+        except Exception as exc:
+            results.append({"cell": i, "status": "error", "error": str(exc)})
+            traceback.print_exc()
+            print(f"Stopping at cell {i} due to error.")
+            break
+
+    results_summary = {
+        "status": "completed",
+        "total": len(cells_to_run),
+        "cells": results,
+        "outputs": {
+            "data_dir": str(paths.data_dir),
+            "output_dir": str(paths.output_dir),
+        },
+    }
+    return results_summary
+
+
+if __name__ == "__main__":
+    execute_notebook()
@@ -1,32 +1,52 @@
-"""
-Part 2: 运行完整的 notebook cells 1-35
-解决中文路径编码问题
-"""
-import warnings, time, os, sys, json, traceback
-warnings.filterwarnings('ignore')
+import warnings
+
+warnings.filterwarnings("ignore")
+
 import matplotlib
-matplotlib.use('Agg')
-import matplotlib.pyplot as _p
-_p.show = lambda *a, **kw: None

-nb = r'D:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb'
-cells = json.load(open(nb, encoding='utf-8'))['cells']
-code_cells = [c for c in cells if c['cell_type'] == 'code']
-print(f"Total code cells: {len(code_cells)}")
+matplotlib.use("Agg")
+import matplotlib.pyplot as _real_mpl_plt

-main_ns = globals().copy()
-main_ns['RANDOM_STATE'] = 42
+_real_mpl_plt.show = lambda *a, **kw: None

-for i, cell in enumerate(code_cells, start=1):
-    src = ''.join(cell['source'])
-    print(f"\n{'='*60}")
-    print(f"Running cell {i}/{len(code_cells)}...")
-    try:
-        exec(compile(src, f'cell_{i}', 'exec'), main_ns)
-    except Exception as e:
-        print(f"ERROR cell {i}: {e}")
-        traceback.print_exc()
-        print("Stopping.")
-        break
+import os
+import sys
+import time
+import json
+import traceback
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
+from sklearn.model_selection import cross_val_score
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.cluster import KMeans
+from sklearn.mixture import GaussianMixture
+from sklearn.metrics import silhouette_score
+from sklearn.decomposition import PCA
+import xgboost as xgb
+import optuna
+optuna.logging.set_verbosity(optuna.logging.WARNING)

-print("\n\nAll cells executed!")
+from src.notebook_runner import execute_notebook
+from src.runtime_paths import build_paths
+
+paths = build_paths()
+print(f"Project root : {paths.project_root}")
+print(f"Notebook     : {paths.notebook}")
+print(f"Data dir     : {paths.data_dir}")
+print(f"Output dir   : {paths.output_dir}")
+
+ns = vars()
+
+result = execute_notebook(start_at=1, namespace=ns)
+print(f"\nExecution finished: {result['status']}")
+print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}")
+print(f"Output dir: {result['outputs']['output_dir']}")
@@ -0,0 +1,31 @@
+from dataclasses import dataclass
+from pathlib import Path
+
+
+@dataclass(frozen=True)
+class RuntimePaths:
+    project_root: Path
+    notebook: Path
+    data_dir: Path
+    output_dir: Path
+
+    def ensure_outputs(self) -> None:
+        (self.output_dir / "figures").mkdir(parents=True, exist_ok=True)
+        (self.output_dir / "tables").mkdir(parents=True, exist_ok=True)
+        (self.output_dir / "predictions").mkdir(parents=True, exist_ok=True)
+
+    def as_injection(self) -> dict:
+        return {
+            "DATA_DIR": str(self.data_dir),
+            "OUTPUT_DIR": str(self.output_dir),
+        }
+
+
+def build_paths() -> RuntimePaths:
+    root = Path(__file__).resolve().parents[1]
+    return RuntimePaths(
+        project_root=root,
+        notebook=root / "notebooks" / "insurance_premium_risk.ipynb",
+        data_dir=root / "dataset_final",
+        output_dir=root / "outputs",
+    )
@@ -0,0 +1,57 @@
+# PPO for CarRacing-v3
+
+From-scratch PPO implementation for CarRacing-v3. No Stable-Baselines or other RL libraries used.
+
+## Setup
+
+```bash
+conda activate my_env
+uv pip install -r requirements.txt
+```
+
+## Train
+
+```bash
+python train.py --steps 500000
+```
+
+## Evaluate
+
+```bash
+python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10
+```
+
+## TensorBoard
+
+```bash
+tensorboard --logdir logs/tensorboard
+```
+
+## Project Structure
+
+```
+src/
+├── network.py       # Actor (Gaussian policy) and Critic (Value) networks
+├── replay_buffer.py  # Rollout buffer with GAE computation
+├── trainer.py        # PPO update with clipped surrogate objective
+├── utils.py          # Environment wrappers (grayscale, resize, frame stack)
+└── evaluate.py       # Evaluation script
+train.py              # Main training entry point
+models/               # Saved checkpoints
+logs/tensorboard/     # TensorBoard logs
+```
+
+## Hyperparameters
+
+| Parameter | Value |
+|-----------|-------|
+| Learning rate | 3e-4 |
+| Gamma | 0.99 |
+| GAE lambda | 0.95 |
+| Clip epsilon | 0.2 |
+| PPO epochs | 4 |
+| Mini-batch size | 64 |
+| Rollout steps | 2048 |
+| Entropy coefficient | 0.01 |
+| Value coefficient | 0.5 |
+| Max gradient norm | 0.5 |
@@ -0,0 +1,136 @@
+# PPO + CarRacing-v3 任务进度追踪
+
+> 生成时间：2026/04/30
+
+---
+
+## 作业要求
+
+用 Python 从零实现 PPO 算法，在 CarRacing-v3 环境训练智能体，提交：
+- 技术报告（≤3000 词，英文）PDF
+- 源代码 + 训练模型 zip 文件
+- 截止：04/May/2026 23:59
+- **禁止使用**：Stable-Baselines 等 RL 专用库
+- **允许使用**：TensorBoard、PyTorch、Gymnasium
+
+---
+
+## 一、已完成 ✅
+
+| 步骤 | 内容 | 文件 |
+|------|------|------|
+| ✅ 项目结构 | src/ 目录、requirements.txt、README.md | [requirements.txt](requirements.txt)、[README.md](README.md) |
+| ✅ 策略/价值网络 | Actor（高斯策略输出 μ, σ）+ Critic 实现，CNN 结构 | [src/network.py](src/network.py) |
+| ✅ Rollout Buffer | 轨迹存储 + GAE 优势估计 + 返回值计算 | [src/replay_buffer.py](src/replay_buffer.py) |
+| ✅ PPO Trainer | PPO 更新（clip 目标函数 + 熵正则 + 价值损失） | [src/trainer.py](src/trainer.py) |
+| ✅ 环境预处理 | 灰度化 + Resize(84×84) + 帧堆叠(4帧) Wrapper | [src/utils.py](src/utils.py) |
+| ✅ 评估脚本 | 渲染测试 + 多回合平均分数评估 | [src/evaluate.py](src/evaluate.py) |
+| ✅ 训练入口 | 主训练循环、TensorBoard 记录、模型保存 | [train.py](train.py) |
+
+**核心算法实现要点**：
+- 策略网络：3 层 CNN + FC(512) → μ, σ（高斯策略，tanh 激活）
+- 价值网络：3 层 CNN + FC(512) → V(s)
+- GAE：λ=0.95，优势归一化
+- PPO clip：ε=0.2，4 epoch 更新，mini-batch 64
+
+---
+
+## 二、待完成 ⬜
+
+| 步骤 | 内容 | 优先级 |
+|------|------|--------|
+| ⬜ 安装依赖 | `uv pip install --system -r requirements.txt` | **高** |
+| ⬜ 环境测试 | 短时间（~10000步）验证代码能跑通 | **高** |
+| ⬜ 完整训练 | 运行 500k+ 步，预计 5-8 小时（后台） | **高（耗时）** |
+| ⬜ 生成图表 | 从 TensorBoard 提取数据，用 matplotlib 绘图 | 中 |
+| ⬜ 撰写报告 | 英文技术报告（≤3000 词），LaTeX 排版 | 中 |
+| ⬜ 编译 PDF | XeLaTeX 编译生成 CW1_1234560.pdf | 中 |
+| ⬜ 打包 zip | 源代码 + 模型打包 CW1_1234560.zip | 低 |
+
+---
+
+## 三、文件结构
+
+```
+强化学习个人项目报告/
+├── src/
+│   ├── __init__.py
+│   ├── network.py          # Actor + Critic CNN 网络
+│   ├── replay_buffer.py    # Rollout buffer + GAE
+│   ├── trainer.py          # PPO 更新逻辑
+│   ├── utils.py           # 环境预处理 wrappers
+│   └── evaluate.py         # 评估脚本
+├── train.py                 # 主训练入口
+├── requirements.txt
+├── README.md
+└── TASK_PROGRESS.md        # 本文档
+```
+
+---
+
+## 四、超参数配置
+
+| 参数 | 值 |
+|------|-----|
+| Learning rate | 3e-4 |
+| Gamma | 0.99 |
+| GAE lambda | 0.95 |
+| Clip epsilon | 0.2 |
+| PPO epochs | 4 |
+| Mini-batch size | 64 |
+| Rollout steps | 2048 |
+| Entropy coefficient | 0.01 |
+| Value coefficient | 0.5 |
+| Max gradient norm | 0.5 |
+| State shape | (84, 84, 4) |
+| Action dim | 3（连续：steer, gas, brake） |
+
+---
+
+## 五、下一步行动
+
+### 立即执行
+```bash
+# 1. 安装依赖
+uv pip install --system -r requirements.txt
+
+# 2. 验证代码能跑（短测试）
+python train.py --steps 10000
+
+# 3. 开始正式训练（后台运行，预计 5-8 小时）
+python train.py --steps 500000
+```
+
+### 训练完成后
+```bash
+# TensorBoard 可视化
+tensorboard --logdir logs/tensorboard
+
+# 评估模型
+python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10
+```
+
+### 报告撰写后
+```bash
+# 编译 PDF
+cd tex && xelatex CW1_1234560.tex
+```
+
+---
+
+## 六、报告结构（≤3000 词）
+
+1. **Introduction** — RL 背景、CarRacing-v3 任务、状态/动作/奖励空间定义
+2. **Methodology** — PPO 数学公式、clip 机制、GAE 优势估计
+3. **Implementation Details** — 网络结构、训练流程、超参数、问题与解决
+4. **Results and Analysis** — 训练曲线图、评估结果、与 SB3 基线对比
+5. **Conclusion** — PPO 敏感性、actor-critic 有效性总结
+
+---
+
+## 七、提交清单
+
+- [ ] `CW1_1234560.pdf` — 技术报告（封面 + ≤3000 词）
+- [ ] `CW1_1234560.zip` — 源代码 + 训练好的模型 .pt 文件
+- [ ] 所有代码使用英文注释
+- [ ] 图表坐标轴和图例使用英文
@@ -0,0 +1,5 @@
+torch
+gymnasium[box2d]
+numpy
+matplotlib
+tensorboard
@@ -0,0 +1,6 @@
+"""PPO Agent for CarRacing-v3 environment."""
+from .network import Actor, Critic
+from .replay_buffer import RolloutBuffer
+from .trainer import PPOTrainer
+
+__all__ = ['Actor', 'Critic', 'RolloutBuffer', 'PPOTrainer']
@@ -0,0 +1,92 @@
+"""Evaluation script for trained PPO agent."""
+import torch
+import numpy as np
+import gymnasium as gym
+from src.utils import make_env, get_device
+from src.network import Actor, Critic
+
+
+def evaluate(actor, env, num_episodes=10, device=torch.device("cpu")):
+    """Evaluate actor and return average return."""
+    actor.eval()
+    returns = []
+
+    for ep in range(num_episodes):
+        obs, _ = env.reset()
+        obs = np.transpose(obs, (1, 2, 0))  # (C, H, W) -> (H, W, C) for storage
+        total_reward = 0
+        done = False
+        steps = 0
+
+        while not done and steps < 1000:
+            with torch.no_grad():
+                # Convert to tensor (B, C, H, W)
+                obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
+                mu, std = actor(obs_t)
+                # Sample action
+                dist = torch.distributions.Normal(mu, std)
+                action = dist.sample()
+                action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy()
+
+            obs, reward, terminated, truncated, _ = env.step(action)
+            # Convert to (C, H, W) format
+            obs = np.transpose(obs, (1, 2, 0))
+            total_reward += reward
+            done = terminated or truncated
+            steps += 1
+
+        returns.append(total_reward)
+        print(f"Episode {ep+1}/{num_episodes}: return={total_reward:.1f}, steps={steps}")
+
+    actor.train()
+    return np.mean(returns), np.std(returns)
+
+
+def evaluate_render(actor, env, device):
+    """Render and evaluate agent with visualization."""
+    actor.eval()
+    obs, _ = env.reset()
+    obs = np.transpose(obs, (1, 2, 0))
+
+    env.render_mode = "human"
+    done = False
+    total_reward = 0
+
+    while not done:
+        with torch.no_grad():
+            obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
+            mu, std = actor(obs_t)
+            dist = torch.distributions.Normal(mu, std)
+            action = dist.sample()
+            action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy()
+
+        obs, reward, terminated, truncated, _ = env.step(action)
+        obs = np.transpose(obs, (1, 2, 0))
+        total_reward += reward
+        done = terminated or truncated
+        env.render()
+
+    actor.train()
+    print(f"Final return: {total_reward:.1f}")
+
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model", type=str, required=True, help="Path to trained model")
+    parser.add_argument("--episodes", type=int, default=5, help="Number of evaluation episodes")
+    args = parser.parse_args()
+
+    device = get_device()
+    env = make_env()
+
+    actor = Actor().to(device)
+    critic = Critic().to(device)
+
+    # Load model
+    checkpoint = torch.load(args.model, map_location=device, weights_only=False)
+    actor.load_state_dict(checkpoint["actor"])
+    print(f"Loaded model from {args.model}")
+
+    mean_return, std_return = evaluate(actor, env, num_episodes=args.episodes, device=device)
+    print(f"\nEvaluation: mean={mean_return:.2f}, std={std_return:.2f}")
@@ -0,0 +1,78 @@
+"""Neural network architectures for Actor and Critic."""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Actor(nn.Module):
+    """Actor network outputting Gaussian policy parameters (mu, sigma)."""
+
+    def __init__(self, state_shape=(84, 84, 4), action_dim=3):
+        super().__init__()
+        c, h, w = state_shape[2], state_shape[0], state_shape[1]  # channels, height, width
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(c, 32, kernel_size=8, stride=4),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, kernel_size=4, stride=2),
+            nn.ReLU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1),
+            nn.ReLU(),
+        )
+
+        # Calculate feature map size: 84x84 -> 20x20 after conv layers
+        feat_size = 64 * 20 * 20
+
+        self.fc = nn.Sequential(
+            nn.Linear(feat_size, 512),
+            nn.ReLU(),
+        )
+        self.mu_head = nn.Linear(512, action_dim)
+        self.log_std_head = nn.Linear(512, action_dim)
+
+        # Initialize output layers
+        nn.init.orthogonal_(self.mu_head.weight, gain=0.01)
+        nn.init.orthogonal_(self.log_std_head.weight, gain=0.01)
+
+    def forward(self, x):
+        """Forward pass returning (mu, log_std)."""
+        x = x / 255.0  # Normalize
+        x = self.conv(x)
+        x = x.view(x.size(0), -1)
+        x = self.fc(x)
+        mu = torch.tanh(self.mu_head(x))
+        log_std = self.log_std_head(x)
+        log_std = torch.clamp(log_std, -20, 2)
+        return mu, log_std.exp()
+
+
+class Critic(nn.Module):
+    """Critic network estimating state value V(s)."""
+
+    def __init__(self, state_shape=(84, 84, 4)):
+        super().__init__()
+        c, h, w = state_shape[2], state_shape[0], state_shape[1]
+
+        self.conv = nn.Sequential(
+            nn.Conv2d(c, 32, kernel_size=8, stride=4),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, kernel_size=4, stride=2),
+            nn.ReLU(),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1),
+            nn.ReLU(),
+        )
+
+        feat_size = 64 * 20 * 20
+
+        self.fc = nn.Sequential(
+            nn.Linear(feat_size, 512),
+            nn.ReLU(),
+            nn.Linear(512, 1)
+        )
+
+    def forward(self, x):
+        """Forward pass returning V(s)."""
+        x = x / 255.0
+        x = self.conv(x)
+        x = x.view(x.size(0), -1)
+        return self.fc(x)
@@ -0,0 +1,64 @@
+"""Rollout buffer for storing trajectories."""
+import numpy as np
+
+
+class RolloutBuffer:
+    """Stores trajectories for PPO training."""
+
+    def __init__(self, buffer_size, state_shape, action_dim):
+        self.buffer_size = buffer_size
+        self.ptr = 0
+        self.size = 0
+
+        self.states = np.zeros((buffer_size, *state_shape), dtype=np.uint8)
+        self.actions = np.zeros((buffer_size, action_dim), dtype=np.float32)
+        self.rewards = np.zeros(buffer_size, dtype=np.float32)
+        self.dones = np.zeros(buffer_size, dtype=np.bool_)
+        self.values = np.zeros(buffer_size, dtype=np.float32)
+        self.log_probs = np.zeros((buffer_size, action_dim), dtype=np.float32)
+
+    def add(self, state, action, reward, done, value, log_prob):
+        """Add a transition to the buffer."""
+        self.states[self.ptr] = state
+        self.actions[self.ptr] = action
+        self.rewards[self.ptr] = reward
+        self.dones[self.ptr] = done
+        self.values[self.ptr] = value
+        self.log_probs[self.ptr] = log_prob
+        self.ptr = (self.ptr + 1) % self.buffer_size
+        self.size = min(self.size + 1, self.buffer_size)
+
+    def compute_returns(self, last_value, gamma=0.99, gae_lambda=0.95):
+        """Compute returns and advantages using GAE."""
+        advantages = np.zeros(self.size, dtype=np.float32)
+        last_gae = 0
+
+        # Compute GAE backwards
+        for t in reversed(range(self.size)):
+            if t == self.size - 1:
+                next_value = last_value
+            else:
+                next_value = self.values[t + 1]
+
+            delta = self.rewards[t] + gamma * next_value * (1 - self.dones[t]) - self.values[t]
+            last_gae = delta + gamma * gae_lambda * (1 - self.dones[t]) * last_gae
+            advantages[t] = last_gae
+
+        returns = advantages + self.values[:self.size]
+        return returns, advantages
+
+    def get(self):
+        """Return all data as numpy arrays."""
+        return (
+            self.states[:self.size],
+            self.actions[:self.size],
+            self.rewards[:self.size],
+            self.dones[:self.size],
+            self.values[:self.size],
+            self.log_probs[:self.size],
+        )
+
+    def reset(self):
+        """Reset buffer."""
+        self.ptr = 0
+        self.size = 0
@@ -0,0 +1,123 @@
+"""PPO Trainer with GAE advantage estimation."""
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import TensorDataset, DataLoader
+import numpy as np
+
+
+class PPOTrainer:
+    """PPO trainer handling the training loop."""
+
+    def __init__(
+        self,
+        actor,
+        critic,
+        rollout_buffer,
+        device,
+        clip_eps=0.2,
+        gamma=0.99,
+        gae_lambda=0.95,
+        lr=3e-4,
+        ent_coef=0.01,
+        vf_coef=0.5,
+        max_grad_norm=0.5,
+        ppo_epochs=4,
+        mini_batch_size=64,
+    ):
+        self.actor = actor
+        self.critic = critic
+        self.buffer = rollout_buffer
+        self.device = device
+        self.clip_eps = clip_eps
+        self.gamma = gamma
+        self.gae_lambda = gae_lambda
+        self.ent_coef = ent_coef
+        self.vf_coef = vf_coef
+        self.max_grad_norm = max_grad_norm
+        self.ppo_epochs = ppo_epochs
+        self.mini_batch_size = mini_batch_size
+
+        # Separate optimizers
+        self.actor_optim = optim.Adam(actor.parameters(), lr=lr)
+        self.critic_optim = optim.Adam(critic.parameters(), lr=lr)
+
+        self.loss_history = {'actor': [], 'critic': [], 'entropy': [], 'total': []}
+
+    def update(self, last_value):
+        """Perform one PPO update."""
+        states, actions, rewards, dones, values, log_probs_old = self.buffer.get()
+
+        # Compute returns and advantages
+        returns, advantages = self.buffer.compute_returns(
+            last_value, self.gamma, self.gae_lambda
+        )
+
+        # Normalize advantages
+        advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+
+        # Convert to tensors
+        states_t = torch.from_numpy(states).float().to(self.device)
+        actions_t = torch.from_numpy(actions).float().to(self.device)
+        log_probs_old_t = torch.from_numpy(log_probs_old).float().to(self.device)
+        returns_t = torch.from_numpy(returns).float().to(self.device)
+        advantages_t = torch.from_numpy(advantages).float().to(self.device)
+
+        dataset = TensorDataset(states_t, actions_t, log_probs_old_t, returns_t, advantages_t)
+        loader = DataLoader(dataset, batch_size=self.mini_batch_size, shuffle=True)
+
+        total_actor_loss = 0
+        total_critic_loss = 0
+        total_entropy = 0
+        count = 0
+
+        for _ in range(self.ppo_epochs):
+            for batch in loader:
+                s, a, log_pi_old, ret, adv = batch
+
+                # Get current policy distribution
+                mu, std = self.actor(s)
+                dist = torch.distributions.Normal(mu, std)
+                log_pi = dist.log_prob(a).sum(dim=-1, keepdim=True)
+                entropy = dist.entropy().sum(dim=-1, keepdim=True)
+
+                # Probability ratio
+                ratio = torch.exp(log_pi - log_pi_old)
+
+                # Clipped surrogate objective
+                surr1 = ratio * adv
+                surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * adv
+                actor_loss = -torch.min(surr1, surr2).mean()
+
+                # Value loss
+                value = self.critic(s)
+                critic_loss = nn.MSELoss()(value.squeeze(), ret)
+
+                # Total loss
+                loss = actor_loss + self.vf_coef * critic_loss - self.ent_coef * entropy.mean()
+
+                # Update
+                self.actor_optim.zero_grad()
+                self.critic_optim.zero_grad()
+                loss.backward()
+                nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
+                nn.utils.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
+                self.actor_optim.step()
+                self.critic_optim.step()
+
+                total_actor_loss += actor_loss.item()
+                total_critic_loss += critic_loss.item()
+                total_entropy += entropy.mean().item()
+                count += 1
+
+        avg_actor = total_actor_loss / count
+        avg_critic = total_critic_loss / count
+        avg_entropy = total_entropy / count
+
+        self.loss_history['actor'].append(avg_actor)
+        self.loss_history['critic'].append(avg_critic)
+        self.loss_history['entropy'].append(avg_entropy)
+        self.loss_history['total'].append(avg_actor + avg_critic)
+
+        self.buffer.reset()
+        return avg_actor, avg_critic, avg_entropy
@@ -0,0 +1,87 @@
+"""Utility functions for environment, device detection, and TensorBoard."""
+import gymnasium as gym
+import numpy as np
+import torch
+from collections import deque
+
+
+class GrayScaleWrapper(gym.ObservationWrapper):
+    """Convert RGB observation to grayscale."""
+
+    def __init__(self, env):
+        super().__init__(env)
+
+    def observation(self, obs):
+        # RGB to grayscale: weighted average
+        gray = 0.299 * obs[:, :, 0] + 0.587 * obs[:, :, 1] + 0.114 * obs[:, :, 2]
+        return gray.astype(np.uint8)
+
+
+class ResizeWrapper(gym.ObservationWrapper):
+    """Resize observation to target size."""
+
+    def __init__(self, env, size=(84, 84)):
+        super().__init__(env)
+        self.size = size
+
+    def observation(self, obs):
+        import cv2
+        return cv2.resize(obs, self.size, interpolation=cv2.INTER_AREA)
+
+
+class FrameStackWrapper(gym.ObservationWrapper):
+    """Stack last N frames."""
+
+    def __init__(self, env, num_stack=4):
+        super().__init__(env)
+        self.num_stack = num_stack
+        self.frames = deque(maxlen=num_stack)
+        obs_shape = env.observation_space.shape
+        self.observation_space = gym.spaces.Box(
+            low=0, high=255,
+            shape=(num_stack, *obs_shape[-2:]),
+            dtype=np.uint8
+        )
+
+    def reset(self, **kwargs):
+        obs, info = self.env.reset(**kwargs)
+        for _ in range(self.num_stack):
+            self.frames.append(obs)
+        return self._get_observation(), info
+
+    def observation(self, obs):
+        self.frames.append(obs)
+        return self._get_observation()
+
+    def _get_observation(self):
+        return np.stack(list(self.frames), axis=0)
+
+
+def make_env(env_id="CarRacing-v3", gray_scale=True, resize=True, frame_stack=4):
+    """Create preprocessed CarRacing environment."""
+    env = gym.make(env_id, render_mode="rgb_array")
+    if resize:
+        env = ResizeWrapper(env, size=(84, 84))
+    if gray_scale:
+        env = GrayScaleWrapper(env)
+    if frame_stack > 1:
+        env = FrameStackWrapper(env, num_stack=frame_stack)
+    return env
+
+
+def get_device():
+    """Detect and return available device."""
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    else:
+        device = torch.device("cpu")
+        print("Using CPU")
+    return device
+
+
+def preprocess_obs(obs):
+    """Ensure observation is in correct format for network."""
+    if len(obs.shape) == 2:  # single channel
+        obs = np.expand_dims(obs, axis=0)
+    return obs
@@ -0,0 +1,192 @@
+"""Main training script for PPO on CarRacing-v3."""
+import os
+import time
+import argparse
+import numpy as np
+import torch
+from torch.utils.tensorboard import SummaryWriter
+
+from src.network import Actor, Critic
+from src.replay_buffer import RolloutBuffer
+from src.trainer import PPOTrainer
+from src.utils import make_env, get_device
+
+
+def collect_rollout(actor, critic, env, buffer, device, rollout_steps):
+    """Collect rollout data."""
+    obs, _ = env.reset()
+    # Convert to (C, H, W) format for storage
+    obs = np.transpose(obs, (1, 2, 0))
+
+    for step in range(rollout_steps):
+        with torch.no_grad():
+            # Convert to (B, C, H, W)
+            obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
+            mu, std = actor(obs_t)
+            dist = torch.distributions.Normal(mu, std)
+            action = dist.sample()
+            action = torch.clamp(action, -1, 1)
+            log_prob = dist.log_prob(action).sum(dim=-1, keepdim=True)
+            value = critic(obs_t).squeeze(0).item()
+
+            action_np = action.squeeze(0).cpu().numpy()
+            log_prob_np = log_prob.squeeze(0).cpu().numpy()
+
+        next_obs, reward, terminated, truncated, _ = env.step(action_np)
+        done = terminated or truncated
+
+        # Convert next_obs to (C, H, W) for storage
+        next_obs_stored = np.transpose(next_obs, (1, 2, 0))
+
+        buffer.add(obs.copy(), action_np, reward, done, value, log_prob_np)
+
+        obs = next_obs_stored
+
+        if done:
+            obs, _ = env.reset()
+            obs = np.transpose(obs, (1, 2, 0))
+
+
+def train(
+    total_steps=500000,
+    rollout_steps=2048,
+    eval_interval=10,
+    save_interval=50,
+    device=None,
+):
+    """Main training loop."""
+    if device is None:
+        device = get_device()
+
+    env = make_env()
+    eval_env = make_env()
+
+    state_shape = (84, 84, 4)
+    action_dim = 3
+
+    actor = Actor(state_shape=state_shape, action_dim=action_dim).to(device)
+    critic = Critic(state_shape=state_shape).to(device)
+
+    buffer = RolloutBuffer(
+        buffer_size=rollout_steps,
+        state_shape=state_shape,
+        action_dim=action_dim,
+    )
+
+    trainer = PPOTrainer(
+        actor=actor,
+        critic=critic,
+        rollout_buffer=buffer,
+        device=device,
+        clip_eps=0.2,
+        gamma=0.99,
+        gae_lambda=0.95,
+        lr=3e-4,
+        ent_coef=0.01,
+        vf_coef=0.5,
+        max_grad_norm=0.5,
+        ppo_epochs=4,
+        mini_batch_size=64,
+    )
+
+    # TensorBoard
+    log_dir = os.path.join("logs", "tensorboard", f"run_{int(time.time())}")
+    writer = SummaryWriter(log_dir)
+
+    print(f"Training on {device}")
+    print(f"Log directory: {log_dir}")
+
+    episode = 0
+    total_timesteps = 0
+    episode_rewards = []
+    recent_rewards = []
+
+    while total_timesteps < total_steps:
+        # Collect rollout
+        collect_rollout(actor, critic, env, buffer, device, rollout_steps)
+
+        # Get last value for GAE
+        with torch.no_grad():
+            obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
+            last_value = critic(obs_t).squeeze(0).item()
+
+        # PPO update
+        actor_loss, critic_loss, entropy = trainer.update(last_value)
+
+        # Logging
+        writer.add_scalar("Loss/Actor", actor_loss, total_timesteps)
+        writer.add_scalar("Loss/Critic", critic_loss, total_timesteps)
+        writer.add_scalar("Loss/Entropy", entropy, total_timesteps)
+
+        total_timesteps += rollout_steps
+        episode += 1
+
+        # Estimate episode reward from buffer
+        ep_reward = buffer.rewards[:buffer.size].sum()
+        episode_rewards.append(ep_reward)
+        recent_rewards.append(ep_reward)
+
+        # Running average of last 10 episodes
+        avg_reward = np.mean(recent_rewards[-10:]) if len(recent_rewards) >= 10 else np.mean(recent_rewards)
+        writer.add_scalar("Reward/Episode", ep_reward, total_timesteps)
+        writer.add_scalar("Reward/AvgLast10", avg_reward, total_timesteps)
+
+        print(f"Episode {episode}, steps {total_timesteps}, ep_reward={ep_reward:.1f}, avg_10={avg_reward:.1f}")
+
+        # Evaluation
+        if episode % eval_interval == 0:
+            eval_returns = []
+            for _ in range(5):
+                eval_obs, _ = eval_env.reset()
+                eval_obs = np.transpose(eval_obs, (1, 2, 0))
+                eval_reward = 0
+                done = False
+
+                while not done:
+                    with torch.no_grad():
+                        eval_obs_t = torch.from_numpy(eval_obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
+                        mu, std = actor(eval_obs_t)
+                        action = torch.clamp(mu, -1, 1).squeeze(0).cpu().numpy()
+                    eval_obs, reward, terminated, truncated, _ = eval_env.step(action)
+                    eval_obs = np.transpose(eval_obs, (1, 2, 0))
+                    eval_reward += reward
+                    done = terminated or truncated
+
+                eval_returns.append(eval_reward)
+
+            mean_eval = np.mean(eval_returns)
+            writer.add_scalar("Eval/MeanReturn", mean_eval, episode)
+            print(f"  Eval: mean_return={mean_eval:.2f}")
+
+        # Save model
+        if episode % save_interval == 0:
+            os.makedirs("models", exist_ok=True)
+            torch.save({
+                "actor": actor.state_dict(),
+                "critic": critic.state_dict(),
+                "episode": episode,
+                "timesteps": total_timesteps,
+            }, os.path.join("models", f"ppo_carracing_ep{episode}.pt"))
+            print(f"  Saved model at episode {episode}")
+
+    # Save final model
+    os.makedirs("models", exist_ok=True)
+    torch.save({
+        "actor": actor.state_dict(),
+        "critic": critic.state_dict(),
+        "episode": episode,
+        "timesteps": total_timesteps,
+    }, os.path.join("models", "ppo_carracing_final.pt"))
+
+    writer.close()
+    print(f"Training complete! Total episodes: {episode}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--steps", type=int, default=500000, help="Total training steps")
+    parser.add_argument("--rollout", type=int, default=2048, help="Rollout buffer size")
+    args = parser.parse_args()
+
+    device = get_device()
+    train(total_steps=args.steps, rollout_steps=args.rollout, device=device)