feat: 添加强化学习项目报告及重构课程作业报告代码结构

- 新增强化学习个人项目报告,包含基于PyTorch从零实现的PPO算法
- 重构课程作业报告代码结构,提取运行时路径管理和notebook执行逻辑到独立模块
- 更新依赖文件requirements.txt,添加强化学习相关依赖
- 简化模型比较结果表格,仅保留基线逻辑回归模型数据
This commit is contained in:
2026-04-30 16:54:41 +08:00
parent 6ac02ba4fe
commit d353133b31
21 changed files with 1639 additions and 102 deletions
@@ -43,17 +43,68 @@
"execution_count": null,
"id": "a12f069a",
"metadata": {},
"outputs": [],
"source": "import xgboost as xgb\nimport optuna\noptuna.logging.set_verbosity(optuna.logging.WARNING)\n\n# GPU Fallback: 自动检测CUDA可用性,无GPU时自动切换到CPU\ntry:\n import subprocess\n result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n USE_GPU = result.returncode == 0\nexcept:\n USE_GPU = False\n\nXGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\nXGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\nprint(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n\nRANDOM_STATE = 42\nnp.random.seed(RANDOM_STATE)\nplt.rcParams['figure.figsize'] = (10, 6)\nplt.rcParams['font.size'] = 12\nsns.set_style('whitegrid')\nprint('All libraries imported successfully!')"
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n",
"\u001b[1;31m<a href='command:jupyter.createPythonEnvAndSelectController'>Create a Python Environment</a> with the required packages.\n",
"\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'"
]
}
],
"source": [
"import xgboost as xgb\n",
"import optuna\n",
"optuna.logging.set_verbosity(optuna.logging.WARNING)\n",
"\n",
"# GPU Fallback: 自动检测CUDA可用性,无GPU时自动切换到CPU\n",
"try:\n",
" import subprocess\n",
" result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n",
" USE_GPU = result.returncode == 0\n",
"except:\n",
" USE_GPU = False\n",
"\n",
"XGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\n",
"XGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\n",
"print(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n",
"\n",
"RANDOM_STATE = 42\n",
"np.random.seed(RANDOM_STATE)\n",
"plt.rcParams['figure.figsize'] = (10, 6)\n",
"plt.rcParams['font.size'] = 12\n",
"sns.set_style('whitegrid')\n",
"print('All libraries imported successfully!')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1c4b453a",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n",
"\u001b[1;31m<a href='command:jupyter.createPythonEnvAndSelectController'>Create a Python Environment</a> with the required packages.\n",
"\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'"
]
}
],
"source": [
"DATA_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\dataset_final'\nOUTPUT_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\outputs'\n\ntrain_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\nval_df = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\ntest_df = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n\nprint(f'Train shape: {train_df.shape}')\nprint(f'Val shape: {val_df.shape}')\nprint(f'Test shape: {test_df.shape}')"
"train_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\n",
"val_df = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\n",
"test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n",
"\n",
"print(f'Train shape: {train_df.shape}')\n",
"print(f'Val shape: {val_df.shape}')\n",
"print(f'Test shape: {test_df.shape}')"
]
},
{
@@ -71,7 +122,23 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== TARGET DISTRIBUTION (TRAIN) ===')\ntarget_counts = train_df['premium_risk'].value_counts()\nprint(target_counts)\nprint((target_counts / len(train_df) * 100).round(2))\n\nfig, ax = plt.subplots(figsize=(8, 5))\ncolors = ['#4CAF50', '#FFC107', '#F44336']\ntarget_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\nax.set_title('Target Variable Distribution (Train)', fontsize=14)\nax.set_xlabel('Premium Risk')\nax.set_ylabel('Count')\nax.set_xticklabels(ax.get_xticklabels(), rotation=0)\nfor i, (idx, val) in enumerate(target_counts.sort_index().items()):\n ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\nplt.show()"
"print('=== TARGET DISTRIBUTION (TRAIN) ===')\n",
"target_counts = train_df['premium_risk'].value_counts()\n",
"print(target_counts)\n",
"print((target_counts / len(train_df) * 100).round(2))\n",
"\n",
"fig, ax = plt.subplots(figsize=(8, 5))\n",
"colors = ['#4CAF50', '#FFC107', '#F44336']\n",
"target_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\n",
"ax.set_title('Target Variable Distribution (Train)', fontsize=14)\n",
"ax.set_xlabel('Premium Risk')\n",
"ax.set_ylabel('Count')\n",
"ax.set_xticklabels(ax.get_xticklabels(), rotation=0)\n",
"for i, (idx, val) in enumerate(target_counts.sort_index().items()):\n",
" ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -81,7 +148,18 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== MISSING VALUES (TRAIN) ===')\nmissing = train_df.isnull().sum()\nmissing = missing[missing > 0].sort_values(ascending=False)\nprint(missing)\n\nfig, ax = plt.subplots(figsize=(12, 6))\nmissing.plot(kind='barh', ax=ax, color='coral')\nax.set_title('Missing Values per Column (Train)', fontsize=14)\nax.set_xlabel('Count')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\nplt.show()"
"print('=== MISSING VALUES (TRAIN) ===')\n",
"missing = train_df.isnull().sum()\n",
"missing = missing[missing > 0].sort_values(ascending=False)\n",
"print(missing)\n",
"\n",
"fig, ax = plt.subplots(figsize=(12, 6))\n",
"missing.plot(kind='barh', ax=ax, color='coral')\n",
"ax.set_title('Missing Values per Column (Train)', fontsize=14)\n",
"ax.set_xlabel('Count')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -91,7 +169,21 @@
"metadata": {},
"outputs": [],
"source": [
"noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\nprint(f'Noise features: {noise_cols}')\n\nprint('\\n=== bureau_risk_index stats ===')\nprint(train_df['bureau_risk_index'].describe())\n\nfig, ax = plt.subplots(figsize=(8, 5))\ntrain_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\nax.set_title('bureau_risk_index by Premium Risk')\nax.set_xlabel('Premium Risk')\nax.set_ylabel('bureau_risk_index')\nplt.suptitle('')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\nplt.show()"
"noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\n",
"print(f'Noise features: {noise_cols}')\n",
"\n",
"print('\\n=== bureau_risk_index stats ===')\n",
"print(train_df['bureau_risk_index'].describe())\n",
"\n",
"fig, ax = plt.subplots(figsize=(8, 5))\n",
"train_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\n",
"ax.set_title('bureau_risk_index by Premium Risk')\n",
"ax.set_xlabel('Premium Risk')\n",
"ax.set_ylabel('bureau_risk_index')\n",
"plt.suptitle('')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -112,7 +204,28 @@
"metadata": {},
"outputs": [],
"source": [
"def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n from sklearn.tree import DecisionTreeClassifier\n results = []\n for col in feature_cols:\n temp_df = df[[col, target_col]].dropna()\n X_temp = temp_df[[col]].values\n y_temp = temp_df[target_col].values\n le = LabelEncoder()\n y_enc = le.fit_transform(y_temp)\n try:\n clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n except:\n results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n\nfeature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\nprint('Screening single features for leakage detection (this may take a few minutes)...')\nleakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\nprint('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\nprint(leakage_results.head(10))"
"def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n",
" from sklearn.tree import DecisionTreeClassifier\n",
" results = []\n",
" for col in feature_cols:\n",
" temp_df = df[[col, target_col]].dropna()\n",
" X_temp = temp_df[[col]].values\n",
" y_temp = temp_df[target_col].values\n",
" le = LabelEncoder()\n",
" y_enc = le.fit_transform(y_temp)\n",
" try:\n",
" clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n",
" scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n",
" results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n",
" except:\n",
" results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n",
" return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n",
"\n",
"feature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\n",
"print('Screening single features for leakage detection (this may take a few minutes)...')\n",
"leakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\n",
"print('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\n",
"print(leakage_results.head(10))"
]
},
{
@@ -122,7 +235,26 @@
"metadata": {},
"outputs": [],
"source": [
"LEAKAGE_THRESHOLD = 0.85\nprint('=== LEAKAGE DETECTION RESULTS ===')\nprint(leakage_results.head(10))\n\nbureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\nprint(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n\nif bureau_score > LEAKAGE_THRESHOLD:\n print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n print('*** This is consistent with a leakage feature. ***')\n print('*** ACTION: bureau_risk_index will be removed from features. ***')\n LEAKAGE_FEATURE = 'bureau_risk_index'\nelse:\n top_feat = leakage_results.iloc[0]['feature']\n top_score = leakage_results.iloc[0]['mean_f1_macro']\n print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n if top_score > 0.80:\n LEAKAGE_FEATURE = top_feat\n else:\n LEAKAGE_FEATURE = None"
"LEAKAGE_THRESHOLD = 0.85\n",
"print('=== LEAKAGE DETECTION RESULTS ===')\n",
"print(leakage_results.head(10))\n",
"\n",
"bureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\n",
"print(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n",
"\n",
"if bureau_score > LEAKAGE_THRESHOLD:\n",
" print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n",
" print('*** This is consistent with a leakage feature. ***')\n",
" print('*** ACTION: bureau_risk_index will be removed from features. ***')\n",
" LEAKAGE_FEATURE = 'bureau_risk_index'\n",
"else:\n",
" top_feat = leakage_results.iloc[0]['feature']\n",
" top_score = leakage_results.iloc[0]['mean_f1_macro']\n",
" print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n",
" if top_score > 0.80:\n",
" LEAKAGE_FEATURE = top_feat\n",
" else:\n",
" LEAKAGE_FEATURE = None"
]
},
{
@@ -132,7 +264,18 @@
"metadata": {},
"outputs": [],
"source": [
"if LEAKAGE_FEATURE:\n print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n val_df_clean = val_df.drop(columns=[LEAKAGE_FEATURE])\n test_df_clean = test_df.drop(columns=[LEAKAGE_FEATURE])\nelse:\n print('No leakage feature to remove.')\n train_df_clean = train_df.copy()\n val_df_clean = val_df.copy()\n test_df_clean = test_df.copy()\n\nprint(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')"
"if LEAKAGE_FEATURE:\n",
" print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n",
" train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n",
" val_df_clean = val_df.drop(columns=[LEAKAGE_FEATURE])\n",
" test_df_clean = test_df.drop(columns=[LEAKAGE_FEATURE])\n",
"else:\n",
" print('No leakage feature to remove.')\n",
" train_df_clean = train_df.copy()\n",
" val_df_clean = val_df.copy()\n",
" test_df_clean = test_df.copy()\n",
"\n",
"print(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')"
]
},
{
@@ -150,7 +293,19 @@
"metadata": {},
"outputs": [],
"source": [
"ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\nNOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\nTARGET_COL = 'premium_risk'\n\nall_cols = train_df_clean.columns.tolist()\nfeature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n\nNUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n\nprint(f'Total features: {len(feature_cols_all)}')\nprint(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\nprint(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')"
"ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\n",
"NOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\n",
"TARGET_COL = 'premium_risk'\n",
"\n",
"all_cols = train_df_clean.columns.tolist()\n",
"feature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n",
"\n",
"NUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\n",
"CATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n",
"\n",
"print(f'Total features: {len(feature_cols_all)}')\n",
"print(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\n",
"print(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')"
]
},
{
@@ -160,7 +315,24 @@
"metadata": {},
"outputs": [],
"source": [
"numeric_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='median')),\n ('scaler', StandardScaler())\n])\n\ncategorical_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='most_frequent')),\n ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n])\n\npreprocessor = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n ],\n remainder='drop'\n)\nprint('Preprocessing pipeline created!')"
"numeric_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='median')),\n",
" ('scaler', StandardScaler())\n",
"])\n",
"\n",
"categorical_transformer = Pipeline(steps=[\n",
" ('imputer', SimpleImputer(strategy='most_frequent')),\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
"])\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, NUMERIC_FEATURES),\n",
" ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n",
" ],\n",
" remainder='drop'\n",
")\n",
"print('Preprocessing pipeline created!')"
]
},
{
@@ -170,7 +342,18 @@
"metadata": {},
"outputs": [],
"source": [
"X_train = train_df_clean[feature_cols_all]\ny_train = train_df_clean[TARGET_COL]\nX_val = val_df_clean[feature_cols_all]\ny_val = val_df_clean[TARGET_COL]\nX_test = test_df_clean[feature_cols_all]\n\nle_target = LabelEncoder()\ny_train_enc = le_target.fit_transform(y_train)\ny_val_enc = le_target.transform(y_val)\n\nprint(f'Classes: {le_target.classes_}')\nprint(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')"
"X_train = train_df_clean[feature_cols_all]\n",
"y_train = train_df_clean[TARGET_COL]\n",
"X_val = val_df_clean[feature_cols_all]\n",
"y_val = val_df_clean[TARGET_COL]\n",
"X_test = test_df_clean[feature_cols_all]\n",
"\n",
"le_target = LabelEncoder()\n",
"y_train_enc = le_target.fit_transform(y_train)\n",
"y_val_enc = le_target.transform(y_val)\n",
"\n",
"print(f'Classes: {le_target.classes_}')\n",
"print(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')"
]
},
{
@@ -188,7 +371,32 @@
"metadata": {},
"outputs": [],
"source": [
"def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n y_tr_pred = pipeline.predict(X_tr)\n y_v_pred = pipeline.predict(X_v)\n results = {\n 'model': model_name,\n 'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n 'val_accuracy': accuracy_score(y_v, y_v_pred),\n 'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n 'val_f1_macro': f1_score(y_v, y_v_pred, average='macro'),\n }\n f1_per_class = f1_score(y_v, y_v_pred, average=None)\n for i, cls in enumerate(le.classes_):\n results[f'val_f1_{cls}'] = f1_per_class[i]\n return results\n\ndef plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n y_pred = pipeline.predict(X_v)\n fig, ax = plt.subplots(figsize=(8, 6))\n cm = confusion_matrix(y_v, y_pred)\n disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n disp.plot(ax=ax, cmap='Blues', values_format='d')\n ax.set_title(title, fontsize=14)\n plt.tight_layout()\n plt.savefig(save_path, dpi=150)\n plt.show()\n return cm"
"def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n",
" y_tr_pred = pipeline.predict(X_tr)\n",
" y_v_pred = pipeline.predict(X_v)\n",
" results = {\n",
" 'model': model_name,\n",
" 'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n",
" 'val_accuracy': accuracy_score(y_v, y_v_pred),\n",
" 'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n",
" 'val_f1_macro': f1_score(y_v, y_v_pred, average='macro'),\n",
" }\n",
" f1_per_class = f1_score(y_v, y_v_pred, average=None)\n",
" for i, cls in enumerate(le.classes_):\n",
" results[f'val_f1_{cls}'] = f1_per_class[i]\n",
" return results\n",
"\n",
"def plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n",
" y_pred = pipeline.predict(X_v)\n",
" fig, ax = plt.subplots(figsize=(8, 6))\n",
" cm = confusion_matrix(y_v, y_pred)\n",
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n",
" disp.plot(ax=ax, cmap='Blues', values_format='d')\n",
" ax.set_title(title, fontsize=14)\n",
" plt.tight_layout()\n",
" plt.savefig(save_path, dpi=150)\n",
" plt.show()\n",
" return cm"
]
},
{
@@ -198,7 +406,19 @@
"metadata": {},
"outputs": [],
"source": [
"print('Training Baseline: Logistic Regression...')\nbaseline_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n])\nbaseline_pipeline.fit(X_train, y_train_enc)\n\nbaseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n\nprint('\\n=== BASELINE MODEL RESULTS ===')\nfor k, v in baseline_results.items():\n if k != 'model':\n print(f'{k}: {v:.4f}')"
"print('Training Baseline: Logistic Regression...')\n",
"baseline_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n",
"])\n",
"baseline_pipeline.fit(X_train, y_train_enc)\n",
"\n",
"baseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n",
"\n",
"print('\\n=== BASELINE MODEL RESULTS ===')\n",
"for k, v in baseline_results.items():\n",
" if k != 'model':\n",
" print(f'{k}: {v:.4f}')"
]
},
{
@@ -208,7 +428,17 @@
"metadata": {},
"outputs": [],
"source": [
"plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n 'Baseline: Logistic Regression - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n\nprint('\\n=== CLASSIFICATION REPORT (VAL) ===')\ny_val_pred = baseline_pipeline.predict(X_val)\nprint(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n\nall_results = [baseline_results]\npd.DataFrame(all_results).to_csv(\n os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)"
"plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n",
" 'Baseline: Logistic Regression - Confusion Matrix',\n",
" os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n",
"\n",
"print('\\n=== CLASSIFICATION REPORT (VAL) ===')\n",
"y_val_pred = baseline_pipeline.predict(X_val)\n",
"print(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n",
"\n",
"all_results = [baseline_results]\n",
"pd.DataFrame(all_results).to_csv(\n",
" os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)"
]
},
{
@@ -225,7 +455,36 @@
"id": "30cd02ce",
"metadata": {},
"outputs": [],
"source": "print('Training Random Forest...')\nstart = time.time()\nrf_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n])\nrf_pipeline.fit(X_train, y_train_enc)\nrf_time = time.time() - start\n\nrf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\nrf_results['train_time'] = rf_time\n\nprint('Training XGBoost...')\nstart = time.time()\nxgb_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n objective='multi:softmax', num_class=3,\n tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n random_state=RANDOM_STATE, verbosity=0))\n])\nxgb_pipeline.fit(X_train, y_train_enc)\nxgb_time = time.time() - start\n\nxgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\nxgb_results['train_time'] = xgb_time\n\nprint(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')"
"source": [
"print('Training Random Forest...')\n",
"start = time.time()\n",
"rf_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n",
"])\n",
"rf_pipeline.fit(X_train, y_train_enc)\n",
"rf_time = time.time() - start\n",
"\n",
"rf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\n",
"rf_results['train_time'] = rf_time\n",
"\n",
"print('Training XGBoost...')\n",
"start = time.time()\n",
"xgb_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n",
" objective='multi:softmax', num_class=3,\n",
" tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n",
" random_state=RANDOM_STATE, verbosity=0))\n",
"])\n",
"xgb_pipeline.fit(X_train, y_train_enc)\n",
"xgb_time = time.time() - start\n",
"\n",
"xgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\n",
"xgb_results['train_time'] = xgb_time\n",
"\n",
"print(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')"
]
},
{
"cell_type": "code",
@@ -234,7 +493,17 @@
"metadata": {},
"outputs": [],
"source": [
"all_results.append(rf_results)\nall_results.append(xgb_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== MODEL COMPARISON SUMMARY ===')\ndisplay_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\nprint(results_df[display_cols].round(4).to_string(index=False))\n\nprint('\\n=== CLASS-WISE F1 (VAL) ===')\nclass_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\nprint(results_df[['model'] + class_cols].round(4).to_string(index=False))"
"all_results.append(rf_results)\n",
"all_results.append(xgb_results)\n",
"results_df = pd.DataFrame(all_results)\n",
"\n",
"print('\\n=== MODEL COMPARISON SUMMARY ===')\n",
"display_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\n",
"print(results_df[display_cols].round(4).to_string(index=False))\n",
"\n",
"print('\\n=== CLASS-WISE F1 (VAL) ===')\n",
"class_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\n",
"print(results_df[['model'] + class_cols].round(4).to_string(index=False))"
]
},
{
@@ -244,7 +513,28 @@
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(1, 2, figsize=(14, 5))\nmodels = results_df['model'].tolist()\nval_f1 = results_df['val_f1_macro'].tolist()\nval_acc = results_df['val_accuracy'].tolist()\n\nbars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\naxes[0].set_ylabel('Macro-F1')\naxes[0].set_ylim(0, 1)\nfor bar, val in zip(bars1, val_f1):\n axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nbars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[1].set_title('Validation Accuracy Comparison', fontsize=13)\naxes[1].set_ylabel('Accuracy')\naxes[1].set_ylim(0, 1)\nfor bar, val in zip(bars2, val_acc):\n axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\nplt.show()"
"fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
"models = results_df['model'].tolist()\n",
"val_f1 = results_df['val_f1_macro'].tolist()\n",
"val_acc = results_df['val_accuracy'].tolist()\n",
"\n",
"bars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\n",
"axes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\n",
"axes[0].set_ylabel('Macro-F1')\n",
"axes[0].set_ylim(0, 1)\n",
"for bar, val in zip(bars1, val_f1):\n",
" axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n",
"\n",
"bars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\n",
"axes[1].set_title('Validation Accuracy Comparison', fontsize=13)\n",
"axes[1].set_ylabel('Accuracy')\n",
"axes[1].set_ylim(0, 1)\n",
"for bar, val in zip(bars2, val_acc):\n",
" axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -254,7 +544,13 @@
"metadata": {},
"outputs": [],
"source": [
"plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n 'Random Forest - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n\nplot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n 'XGBoost - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))"
"plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n",
" 'Random Forest - Confusion Matrix',\n",
" os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n",
"\n",
"plot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n",
" 'XGBoost - Confusion Matrix',\n",
" os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))"
]
},
{
@@ -272,7 +568,17 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== BAGGING VS BOOSTING ANALYSIS ===')\nrf_val_f1 = rf_results['val_f1_macro']\nrf_train_f1 = rf_results['train_f1_macro']\nrf_gap = rf_train_f1 - rf_val_f1\n\nxgb_val_f1 = xgb_results['val_f1_macro']\nxgb_train_f1 = xgb_results['train_f1_macro']\nxgb_gap = xgb_train_f1 - xgb_val_f1\n\nprint(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\nprint(f'XGBoost - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')"
"print('=== BAGGING VS BOOSTING ANALYSIS ===')\n",
"rf_val_f1 = rf_results['val_f1_macro']\n",
"rf_train_f1 = rf_results['train_f1_macro']\n",
"rf_gap = rf_train_f1 - rf_val_f1\n",
"\n",
"xgb_val_f1 = xgb_results['val_f1_macro']\n",
"xgb_train_f1 = xgb_results['train_f1_macro']\n",
"xgb_gap = xgb_train_f1 - xgb_val_f1\n",
"\n",
"print(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\n",
"print(f'XGBoost - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')"
]
},
{
@@ -289,7 +595,40 @@
"id": "e6361576",
"metadata": {},
"outputs": [],
"source": "def objective(trial):\n params = {\n 'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n 'max_depth': trial.suggest_int('max_depth', 3, 10),\n 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n 'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n 'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n 'gamma': trial.suggest_float('gamma', 0, 5),\n 'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n 'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n 'objective': 'multi:softmax',\n 'num_class': 3,\n 'random_state': RANDOM_STATE,\n 'tree_method': XGB_TREE_METHOD,\n 'device': XGB_DEVICE,\n 'verbosity': 0\n }\n pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(**params))\n ])\n pipeline.fit(X_train, y_train_enc)\n y_pred = pipeline.predict(X_val)\n score = f1_score(y_val_enc, y_pred, average='macro')\n return score\n\nprint('Starting Optuna hyperparameter optimisation (30 trials)...')\nstudy = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\nstudy.optimize(objective, n_trials=30, show_progress_bar=False)\n\nprint(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')"
"source": [
"def objective(trial):\n",
" params = {\n",
" 'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n",
" 'max_depth': trial.suggest_int('max_depth', 3, 10),\n",
" 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n",
" 'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n",
" 'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n",
" 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n",
" 'gamma': trial.suggest_float('gamma', 0, 5),\n",
" 'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n",
" 'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n",
" 'objective': 'multi:softmax',\n",
" 'num_class': 3,\n",
" 'random_state': RANDOM_STATE,\n",
" 'tree_method': XGB_TREE_METHOD,\n",
" 'device': XGB_DEVICE,\n",
" 'verbosity': 0\n",
" }\n",
" pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', xgb.XGBClassifier(**params))\n",
" ])\n",
" pipeline.fit(X_train, y_train_enc)\n",
" y_pred = pipeline.predict(X_val)\n",
" score = f1_score(y_val_enc, y_pred, average='macro')\n",
" return score\n",
"\n",
"print('Starting Optuna hyperparameter optimisation (30 trials)...')\n",
"study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\n",
"study.optimize(objective, n_trials=30, show_progress_bar=False)\n",
"\n",
"print(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')"
]
},
{
"cell_type": "code",
@@ -298,7 +637,22 @@
"metadata": {},
"outputs": [],
"source": [
"print('\\n=== BEST HYPERPARAMETERS ===')\nbest_params = study.best_params\nfor k, v in best_params.items():\n print(f' {k}: {v}')\n\nfig = optuna.visualization.matplotlib.plot_optimization_history(study)\nplt.title('Optuna Optimization History')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\nplt.show()\n\nfig = optuna.visualization.matplotlib.plot_param_importances(study)\nplt.title('Hyperparameter Importance')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\nplt.show()"
"print('\\n=== BEST HYPERPARAMETERS ===')\n",
"best_params = study.best_params\n",
"for k, v in best_params.items():\n",
" print(f' {k}: {v}')\n",
"\n",
"fig = optuna.visualization.matplotlib.plot_optimization_history(study)\n",
"plt.title('Optuna Optimization History')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\n",
"plt.show()\n",
"\n",
"fig = optuna.visualization.matplotlib.plot_param_importances(study)\n",
"plt.title('Hyperparameter Importance')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -307,7 +661,37 @@
"id": "640263ea",
"metadata": {},
"outputs": [],
"source": "best_xgb_params = {\n **study.best_params,\n 'objective': 'multi:softmax',\n 'num_class': 3,\n 'random_state': RANDOM_STATE,\n 'tree_method': XGB_TREE_METHOD,\n 'device': XGB_DEVICE,\n 'verbosity': 0\n}\n\nprint('Training tuned XGBoost...')\nimport time\nstart = time.time()\ntuned_xgb_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ntuned_xgb_pipeline.fit(X_train, y_train_enc)\ntuned_time = time.time() - start\n\ntuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\ntuned_results['train_time'] = tuned_time\n\nprint('\\n=== TUNED XGBOOST RESULTS ===')\nfor k, v in tuned_results.items():\n if k != 'model':\n print(f'{k}: {v:.4f}')\n\nprint(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')"
"source": [
"best_xgb_params = {\n",
" **study.best_params,\n",
" 'objective': 'multi:softmax',\n",
" 'num_class': 3,\n",
" 'random_state': RANDOM_STATE,\n",
" 'tree_method': XGB_TREE_METHOD,\n",
" 'device': XGB_DEVICE,\n",
" 'verbosity': 0\n",
"}\n",
"\n",
"print('Training tuned XGBoost...')\n",
"import time\n",
"start = time.time()\n",
"tuned_xgb_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', xgb.XGBClassifier(**best_xgb_params))\n",
"])\n",
"tuned_xgb_pipeline.fit(X_train, y_train_enc)\n",
"tuned_time = time.time() - start\n",
"\n",
"tuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\n",
"tuned_results['train_time'] = tuned_time\n",
"\n",
"print('\\n=== TUNED XGBOOST RESULTS ===')\n",
"for k, v in tuned_results.items():\n",
" if k != 'model':\n",
" print(f'{k}: {v:.4f}')\n",
"\n",
"print(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')"
]
},
{
"cell_type": "code",
@@ -316,7 +700,11 @@
"metadata": {},
"outputs": [],
"source": [
"all_results.append(tuned_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== BEFORE VS AFTER TUNING ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))"
"all_results.append(tuned_results)\n",
"results_df = pd.DataFrame(all_results)\n",
"\n",
"print('\\n=== BEFORE VS AFTER TUNING ===')\n",
"print(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))"
]
},
{
@@ -339,7 +727,47 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n\nMISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n 'prior_debt_products', 'account_tenure']\n\nfor col in MISSING_COLS:\n missing_col_name = f'{col}_missing'\n train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n val_df_clean[missing_col_name] = val_df_clean[col].isnull().astype(int)\n test_df_clean[missing_col_name] = test_df_clean[col].isnull().astype(int)\n print(f'Added missing indicator: {missing_col_name}')\n\nfeature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\nprint(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n\nX_train_A = train_df_clean[feature_cols_catA]\nX_val_A = val_df_clean[feature_cols_catA]\nX_test_A = test_df_clean[feature_cols_catA]\n\nNUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n\npreprocessor_A = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES_A),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n ],\n remainder='drop'\n)\n\ncatA_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor_A),\n ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ncatA_pipeline.fit(X_train_A, y_train_enc)\n\ncatA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n\nprint('\\n=== CATEGORY A RESULTS ===')\nprint(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\nprint(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')"
"print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n",
"\n",
"MISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n",
" 'prior_debt_products', 'account_tenure']\n",
"\n",
"for col in MISSING_COLS:\n",
" missing_col_name = f'{col}_missing'\n",
" train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n",
" val_df_clean[missing_col_name] = val_df_clean[col].isnull().astype(int)\n",
" test_df_clean[missing_col_name] = test_df_clean[col].isnull().astype(int)\n",
" print(f'Added missing indicator: {missing_col_name}')\n",
"\n",
"feature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\n",
"print(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n",
"\n",
"X_train_A = train_df_clean[feature_cols_catA]\n",
"X_val_A = val_df_clean[feature_cols_catA]\n",
"X_test_A = test_df_clean[feature_cols_catA]\n",
"\n",
"NUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\n",
"CATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n",
"\n",
"preprocessor_A = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, NUMERIC_FEATURES_A),\n",
" ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n",
" ],\n",
" remainder='drop'\n",
")\n",
"\n",
"catA_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor_A),\n",
" ('classifier', xgb.XGBClassifier(**best_xgb_params))\n",
"])\n",
"catA_pipeline.fit(X_train_A, y_train_enc)\n",
"\n",
"catA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n",
"\n",
"print('\\n=== CATEGORY A RESULTS ===')\n",
"print(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\n",
"print(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')"
]
},
{
@@ -349,7 +777,31 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\nprint('Training Soft Voting Ensemble (RF + XGBoost)...')\n\nrf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\nxgb_clf = xgb.XGBClassifier(**best_xgb_params)\n\nvoting_clf = VotingClassifier(\n estimators=[\n ('rf', rf_clf),\n ('xgb', xgb_clf)\n ],\n voting='soft',\n n_jobs=-1\n)\n\nensemble_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', voting_clf)\n])\nensemble_pipeline.fit(X_train, y_train_enc)\n\nensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n\nprint(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\nprint(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')"
"print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\n",
"print('Training Soft Voting Ensemble (RF + XGBoost)...')\n",
"\n",
"rf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\n",
"xgb_clf = xgb.XGBClassifier(**best_xgb_params)\n",
"\n",
"voting_clf = VotingClassifier(\n",
" estimators=[\n",
" ('rf', rf_clf),\n",
" ('xgb', xgb_clf)\n",
" ],\n",
" voting='soft',\n",
" n_jobs=-1\n",
")\n",
"\n",
"ensemble_pipeline = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('classifier', voting_clf)\n",
"])\n",
"ensemble_pipeline.fit(X_train, y_train_enc)\n",
"\n",
"ensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n",
"\n",
"print(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\n",
"print(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')"
]
},
{
@@ -359,7 +811,20 @@
"metadata": {},
"outputs": [],
"source": [
"all_results.append(catA_results)\nall_results.append(ensemble_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n\nresults_df.to_csv(\n os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n\nimprove_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\nimprove_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\nprint(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\nprint(f'Category D improvement (vs Tuned): +{improve_D:.4f}')"
"all_results.append(catA_results)\n",
"all_results.append(ensemble_results)\n",
"results_df = pd.DataFrame(all_results)\n",
"\n",
"print('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\n",
"print(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n",
"\n",
"results_df.to_csv(\n",
" os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n",
"\n",
"improve_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\n",
"improve_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\n",
"print(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\n",
"print(f'Category D improvement (vs Tuned): +{improve_D:.4f}')"
]
},
{
@@ -377,7 +842,58 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== K-MEANS & GMM CLUSTERING ===')\n\npreprocessor_eval = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n ],\n remainder='drop'\n)\n\nX_train_scaled = preprocessor_eval.fit_transform(X_train)\nprint(f'Scaled training data shape: {X_train_scaled.shape}')\n\npca = PCA(n_components=2, random_state=RANDOM_STATE)\nX_train_pca = pca.fit_transform(X_train_scaled)\nprint(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n\nk_range = range(2, 9)\nkmeans_results = []\ngmm_results = []\n\nfor k in k_range:\n print(f' Running k={k}...')\n \n km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n km_labels = km.fit_predict(X_train_scaled)\n sil_km = silhouette_score(X_train_scaled, km_labels)\n \n gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n gmm_labels = gmm_model.fit_predict(X_train_scaled)\n sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n \n kmeans_results.append({\n 'k': k,\n 'inertia': km.inertia_,\n 'silhouette_x': sil_km\n })\n gmm_results.append({\n 'k': k,\n 'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n 'bic': gmm_model.bic(X_train_scaled),\n 'aic': gmm_model.aic(X_train_scaled),\n 'silhouette_y': sil_gmm\n })\n\nkm_df = pd.DataFrame(kmeans_results)\ngmm_df = pd.DataFrame(gmm_results)\ncluster_df = km_df.merge(gmm_df, on='k')\nprint('\\n=== CLUSTERING COMPARISON ===')\nprint(cluster_df.round(4).to_string(index=False))\n\ncluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)"
"print('=== K-MEANS & GMM CLUSTERING ===')\n",
"\n",
"preprocessor_eval = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, NUMERIC_FEATURES),\n",
" ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n",
" ],\n",
" remainder='drop'\n",
")\n",
"\n",
"X_train_scaled = preprocessor_eval.fit_transform(X_train)\n",
"print(f'Scaled training data shape: {X_train_scaled.shape}')\n",
"\n",
"pca = PCA(n_components=2, random_state=RANDOM_STATE)\n",
"X_train_pca = pca.fit_transform(X_train_scaled)\n",
"print(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n",
"\n",
"k_range = range(2, 9)\n",
"kmeans_results = []\n",
"gmm_results = []\n",
"\n",
"for k in k_range:\n",
" print(f' Running k={k}...')\n",
" \n",
" km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n",
" km_labels = km.fit_predict(X_train_scaled)\n",
" sil_km = silhouette_score(X_train_scaled, km_labels)\n",
" \n",
" gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n",
" gmm_labels = gmm_model.fit_predict(X_train_scaled)\n",
" sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n",
" \n",
" kmeans_results.append({\n",
" 'k': k,\n",
" 'inertia': km.inertia_,\n",
" 'silhouette_x': sil_km\n",
" })\n",
" gmm_results.append({\n",
" 'k': k,\n",
" 'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n",
" 'bic': gmm_model.bic(X_train_scaled),\n",
" 'aic': gmm_model.aic(X_train_scaled),\n",
" 'silhouette_y': sil_gmm\n",
" })\n",
"\n",
"km_df = pd.DataFrame(kmeans_results)\n",
"gmm_df = pd.DataFrame(gmm_results)\n",
"cluster_df = km_df.merge(gmm_df, on='k')\n",
"print('\\n=== CLUSTERING COMPARISON ===')\n",
"print(cluster_df.round(4).to_string(index=False))\n",
"\n",
"cluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)"
]
},
{
@@ -387,7 +903,33 @@
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n\naxes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\naxes[0].set_xlabel('k')\naxes[0].set_ylabel('Inertia')\naxes[0].set_title('K-Means: Elbow Method')\naxes[0].grid(True)\n\naxes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\naxes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\naxes[1].set_xlabel('k')\naxes[1].set_ylabel('Score')\naxes[1].set_title('GMM: BIC & AIC (lower is better)')\naxes[1].legend()\naxes[1].grid(True)\n\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\naxes[2].set_xlabel('k')\naxes[2].set_ylabel('Silhouette Score')\naxes[2].set_title('Silhouette Score Comparison (higher is better)')\naxes[2].legend()\naxes[2].grid(True)\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\nplt.show()"
"fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n",
"\n",
"axes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\n",
"axes[0].set_xlabel('k')\n",
"axes[0].set_ylabel('Inertia')\n",
"axes[0].set_title('K-Means: Elbow Method')\n",
"axes[0].grid(True)\n",
"\n",
"axes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\n",
"axes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\n",
"axes[1].set_xlabel('k')\n",
"axes[1].set_ylabel('Score')\n",
"axes[1].set_title('GMM: BIC & AIC (lower is better)')\n",
"axes[1].legend()\n",
"axes[1].grid(True)\n",
"\n",
"axes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\n",
"axes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\n",
"axes[2].set_xlabel('k')\n",
"axes[2].set_ylabel('Silhouette Score')\n",
"axes[2].set_title('Silhouette Score Comparison (higher is better)')\n",
"axes[2].legend()\n",
"axes[2].grid(True)\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -397,7 +939,22 @@
"metadata": {},
"outputs": [],
"source": [
"best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\nprint(f'Best K for K-Means (by silhouette): {best_k}')\n\nkm_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\nkm_best_labels = km_best.fit_predict(X_train_scaled)\n\nfig, ax = plt.subplots(figsize=(8, 6))\nscatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\nax.set_xlabel('PC1')\nax.set_ylabel('PC2')\nax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\nplt.colorbar(scatter, ax=ax, label='Cluster')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\nplt.show()"
"best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\n",
"print(f'Best K for K-Means (by silhouette): {best_k}')\n",
"\n",
"km_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\n",
"km_best_labels = km_best.fit_predict(X_train_scaled)\n",
"\n",
"fig, ax = plt.subplots(figsize=(8, 6))\n",
"scatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n",
" c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\n",
"ax.set_xlabel('PC1')\n",
"ax.set_ylabel('PC2')\n",
"ax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\n",
"plt.colorbar(scatter, ax=ax, label='Cluster')\n",
"plt.tight_layout()\n",
"plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\n",
"plt.show()"
]
},
{
@@ -415,7 +972,28 @@
"metadata": {},
"outputs": [],
"source": [
"print('=== FINAL MODEL SELECTION ===')\nprint('Based on val_f1_macro (primary metric):')\nfinal_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\nprint(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n final_pipeline = catA_pipeline\n X_test_final = X_test_A\nelif final_model_name == 'Ensemble_SoftVoting':\n final_pipeline = ensemble_pipeline\n X_test_final = X_test\nelse:\n final_pipeline = tuned_xgb_pipeline\n X_test_final = X_test\n\ny_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\ny_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n\nplot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n y_val_enc, le_target,\n f'Final Model: {final_model_name} - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))"
"print('=== FINAL MODEL SELECTION ===')\n",
"print('Based on val_f1_macro (primary metric):')\n",
"final_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\n",
"print(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n",
"\n",
"if final_model_name == 'XGB_CatA_MissingHandling':\n",
" final_pipeline = catA_pipeline\n",
" X_test_final = X_test_A\n",
"elif final_model_name == 'Ensemble_SoftVoting':\n",
" final_pipeline = ensemble_pipeline\n",
" X_test_final = X_test\n",
"else:\n",
" final_pipeline = tuned_xgb_pipeline\n",
" X_test_final = X_test\n",
"\n",
"y_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\n",
"y_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n",
"\n",
"plot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n",
" y_val_enc, le_target,\n",
" f'Final Model: {final_model_name} - Confusion Matrix',\n",
" os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))"
]
},
{
@@ -425,7 +1003,9 @@
"metadata": {},
"outputs": [],
"source": [
"print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\ny_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\nprint(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))"
"print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\n",
"y_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\n",
"print(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))"
]
},
{
@@ -435,7 +1015,34 @@
"metadata": {},
"outputs": [],
"source": [
"STUDENT_ID = '1234560'\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n y_test_pred = final_pipeline.predict(X_test_A)\nelif final_model_name == 'Ensemble_SoftVoting':\n y_test_pred = final_pipeline.predict(X_test)\nelse:\n y_test_pred = final_pipeline.predict(X_test)\n\ny_test_labels = le_target.inverse_transform(y_test_pred)\n\nsubmission_df = pd.DataFrame({\n 'applicant_id': test_df['applicant_id'],\n 'customer_key': test_df['customer_key'],\n 'premium_risk': y_test_labels\n})\n\nprint('=== SUBMISSION CSV VALIDATION ===')\nprint(f'Shape: {submission_df.shape}')\nprint(f'Columns: {list(submission_df.columns)}')\nprint(submission_df.head())\n\nprint('\\nPrediction counts:')\nprint(submission_df['premium_risk'].value_counts())\n\ncsv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\nsubmission_df.to_csv(csv_path, index=False)\nprint(f'\\n*** CSV saved to: {csv_path} ***')"
"STUDENT_ID = '1234560'\n",
"\n",
"if final_model_name == 'XGB_CatA_MissingHandling':\n",
" y_test_pred = final_pipeline.predict(X_test_A)\n",
"elif final_model_name == 'Ensemble_SoftVoting':\n",
" y_test_pred = final_pipeline.predict(X_test)\n",
"else:\n",
" y_test_pred = final_pipeline.predict(X_test)\n",
"\n",
"y_test_labels = le_target.inverse_transform(y_test_pred)\n",
"\n",
"submission_df = pd.DataFrame({\n",
" 'applicant_id': test_df['applicant_id'],\n",
" 'customer_key': test_df['customer_key'],\n",
" 'premium_risk': y_test_labels\n",
"})\n",
"\n",
"print('=== SUBMISSION CSV VALIDATION ===')\n",
"print(f'Shape: {submission_df.shape}')\n",
"print(f'Columns: {list(submission_df.columns)}')\n",
"print(submission_df.head())\n",
"\n",
"print('\\nPrediction counts:')\n",
"print(submission_df['premium_risk'].value_counts())\n",
"\n",
"csv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\n",
"submission_df.to_csv(csv_path, index=False)\n",
"print(f'\\n*** CSV saved to: {csv_path} ***')"
]
}
],
@@ -452,4 +1059,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 38 KiB

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 59 KiB

After

Width:  |  Height:  |  Size: 59 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 49 KiB

@@ -1,5 +1,2 @@
model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard,train_time
Baseline_LR,0.7593680672268908,0.7341714285714286,0.7492574544185482,0.7237629331592531,0.7665209565440987,0.6489501312335958,0.7558177117000646,
RandomForest,1.0,0.7877333333333333,1.0,0.770789728543472,0.7874554916461244,0.7095334685598377,0.8153802254244543,57.91048526763916
XGBoost,0.8519529411764706,0.8371047619047619,0.8297116592669606,0.8143842728003406,0.8904623073719283,0.6944039941751612,0.8582865168539325,67.63970804214478
XGBoost_Tuned,0.9767663865546219,0.8700190476190476,0.9739400525375727,0.8519502714571496,0.9084439578486383,0.7620280474649407,0.8853788090578697,142.65462470054626
model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard
Baseline_LR,0.7595294117647059,0.7337904761904762,0.7493991157707756,0.7234383324236036,0.7663239074550129,0.6487372909150542,0.7552537989007436
1 model train_accuracy val_accuracy train_f1_macro val_f1_macro val_f1_High val_f1_Low val_f1_Standard train_time
2 Baseline_LR 0.7593680672268908 0.7595294117647059 0.7341714285714286 0.7337904761904762 0.7492574544185482 0.7493991157707756 0.7237629331592531 0.7234383324236036 0.7665209565440987 0.7663239074550129 0.6489501312335958 0.6487372909150542 0.7558177117000646 0.7552537989007436
RandomForest 1.0 0.7877333333333333 1.0 0.770789728543472 0.7874554916461244 0.7095334685598377 0.8153802254244543 57.91048526763916
XGBoost 0.8519529411764706 0.8371047619047619 0.8297116592669606 0.8143842728003406 0.8904623073719283 0.6944039941751612 0.8582865168539325 67.63970804214478
XGBoost_Tuned 0.9767663865546219 0.8700190476190476 0.9739400525375727 0.8519502714571496 0.9084439578486383 0.7620280474649407 0.8853788090578697 142.65462470054626
@@ -1,16 +1,19 @@
"""
运行 insurance_premium_risk.ipynb 的脚本
将 notebook 代码单元格提取出来逐个执行
"""
import json, sys, os, warnings, traceback, time
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('Agg')
matplotlib.use("Agg")
import matplotlib.pyplot as _real_mpl_plt
_real_mpl_plt.show = lambda *a, **kw: None
import os
import sys
import time
import json
import traceback
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
@@ -32,34 +35,18 @@ import xgboost as xgb
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['font.size'] = 12
sns.set_style('whitegrid')
from src.notebook_runner import execute_notebook
from src.runtime_paths import build_paths
# ===== 读取 notebook =====
nb_path = r'd:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb'
cells = json.load(open(nb_path, encoding='utf-8'))['cells']
code_cells = [c for c in cells if c['cell_type'] == 'code']
print(f"Total code cells: {len(code_cells)}")
paths = build_paths()
print(f"Project root : {paths.project_root}")
print(f"Notebook : {paths.notebook}")
print(f"Data dir : {paths.data_dir}")
print(f"Output dir : {paths.output_dir}")
# ===== 执行每个单元格 =====
# 使用全局 __main__ 命名空间,变量跨单元格持久化
main_ns = globals().copy()
ns = vars()
for i, cell in enumerate(code_cells):
src = ''.join(cell['source'])
print(f"\n{'='*60}")
print(f"Running cell {i+1}/{len(code_cells)}...")
print(f" Source: {src[:80].replace(chr(10), ' ')}")
try:
exec(compile(src, f'cell_{i+1}', 'exec'), main_ns)
except Exception as e:
print(f"ERROR in cell {i+1}: {e}")
traceback.print_exc()
print("Stopping execution.")
break
print("\n\nAll cells executed successfully!")
print(f"Results saved to: outputs/figures/ and outputs/tables/")
result = execute_notebook(namespace=ns)
print(f"\nExecution finished: {result['status']}")
print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}")
print(f"Output dir: {result['outputs']['output_dir']}")
@@ -0,0 +1,55 @@
import json
import traceback
from pathlib import Path
from .runtime_paths import build_paths
def execute_notebook(
start_at: int | None = None,
stop_at: int | None = None,
namespace: dict | None = None,
) -> dict:
paths = build_paths()
paths.ensure_outputs()
nb_data = json.loads(paths.notebook.read_text(encoding="utf-8"))
code_cells = [c for c in nb_data["cells"] if c["cell_type"] == "code"]
if not code_cells:
return {"status": "skipped", "reason": "no code cells found"}
ns = (namespace or {}).copy()
ns.update(paths.as_injection())
ns["RANDOM_STATE"] = 42
start = max((start_at or 1) - 1, 0)
stop = stop_at if stop_at is not None else len(code_cells)
cells_to_run = code_cells[start:stop]
results = []
for i, cell in enumerate(cells_to_run, start=start + 1):
src = "".join(cell["source"])
tag = f"cell_{i}"
try:
exec(compile(src, tag, "exec"), ns)
results.append({"cell": i, "status": "ok"})
except Exception as exc:
results.append({"cell": i, "status": "error", "error": str(exc)})
traceback.print_exc()
print(f"Stopping at cell {i} due to error.")
break
results_summary = {
"status": "completed",
"total": len(cells_to_run),
"cells": results,
"outputs": {
"data_dir": str(paths.data_dir),
"output_dir": str(paths.output_dir),
},
}
return results_summary
if __name__ == "__main__":
execute_notebook()
@@ -1,32 +1,52 @@
"""
Part 2: 运行完整的 notebook cells 1-35
解决中文路径编码问题
"""
import warnings, time, os, sys, json, traceback
warnings.filterwarnings('ignore')
import warnings
warnings.filterwarnings("ignore")
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as _p
_p.show = lambda *a, **kw: None
nb = r'D:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb'
cells = json.load(open(nb, encoding='utf-8'))['cells']
code_cells = [c for c in cells if c['cell_type'] == 'code']
print(f"Total code cells: {len(code_cells)}")
matplotlib.use("Agg")
import matplotlib.pyplot as _real_mpl_plt
main_ns = globals().copy()
main_ns['RANDOM_STATE'] = 42
_real_mpl_plt.show = lambda *a, **kw: None
for i, cell in enumerate(code_cells, start=1):
src = ''.join(cell['source'])
print(f"\n{'='*60}")
print(f"Running cell {i}/{len(code_cells)}...")
try:
exec(compile(src, f'cell_{i}', 'exec'), main_ns)
except Exception as e:
print(f"ERROR cell {i}: {e}")
traceback.print_exc()
print("Stopping.")
break
import os
import sys
import time
import json
import traceback
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
import xgboost as xgb
import optuna
optuna.logging.set_verbosity(optuna.logging.WARNING)
print("\n\nAll cells executed!")
from src.notebook_runner import execute_notebook
from src.runtime_paths import build_paths
paths = build_paths()
print(f"Project root : {paths.project_root}")
print(f"Notebook : {paths.notebook}")
print(f"Data dir : {paths.data_dir}")
print(f"Output dir : {paths.output_dir}")
ns = vars()
result = execute_notebook(start_at=1, namespace=ns)
print(f"\nExecution finished: {result['status']}")
print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}")
print(f"Output dir: {result['outputs']['output_dir']}")
@@ -0,0 +1,31 @@
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class RuntimePaths:
project_root: Path
notebook: Path
data_dir: Path
output_dir: Path
def ensure_outputs(self) -> None:
(self.output_dir / "figures").mkdir(parents=True, exist_ok=True)
(self.output_dir / "tables").mkdir(parents=True, exist_ok=True)
(self.output_dir / "predictions").mkdir(parents=True, exist_ok=True)
def as_injection(self) -> dict:
return {
"DATA_DIR": str(self.data_dir),
"OUTPUT_DIR": str(self.output_dir),
}
def build_paths() -> RuntimePaths:
root = Path(__file__).resolve().parents[1]
return RuntimePaths(
project_root=root,
notebook=root / "notebooks" / "insurance_premium_risk.ipynb",
data_dir=root / "dataset_final",
output_dir=root / "outputs",
)
+57
View File
@@ -0,0 +1,57 @@
# PPO for CarRacing-v3
From-scratch PPO implementation for CarRacing-v3. No Stable-Baselines or other RL libraries used.
## Setup
```bash
conda activate my_env
uv pip install -r requirements.txt
```
## Train
```bash
python train.py --steps 500000
```
## Evaluate
```bash
python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10
```
## TensorBoard
```bash
tensorboard --logdir logs/tensorboard
```
## Project Structure
```
src/
├── network.py # Actor (Gaussian policy) and Critic (Value) networks
├── replay_buffer.py # Rollout buffer with GAE computation
├── trainer.py # PPO update with clipped surrogate objective
├── utils.py # Environment wrappers (grayscale, resize, frame stack)
└── evaluate.py # Evaluation script
train.py # Main training entry point
models/ # Saved checkpoints
logs/tensorboard/ # TensorBoard logs
```
## Hyperparameters
| Parameter | Value |
|-----------|-------|
| Learning rate | 3e-4 |
| Gamma | 0.99 |
| GAE lambda | 0.95 |
| Clip epsilon | 0.2 |
| PPO epochs | 4 |
| Mini-batch size | 64 |
| Rollout steps | 2048 |
| Entropy coefficient | 0.01 |
| Value coefficient | 0.5 |
| Max gradient norm | 0.5 |
@@ -0,0 +1,136 @@
# PPO + CarRacing-v3 任务进度追踪
> 生成时间:2026/04/30
---
## 作业要求
用 Python 从零实现 PPO 算法,在 CarRacing-v3 环境训练智能体,提交:
- 技术报告(≤3000 词,英文)PDF
- 源代码 + 训练模型 zip 文件
- 截止:04/May/2026 23:59
- **禁止使用**Stable-Baselines 等 RL 专用库
- **允许使用**TensorBoard、PyTorch、Gymnasium
---
## 一、已完成 ✅
| 步骤 | 内容 | 文件 |
|------|------|------|
| ✅ 项目结构 | src/ 目录、requirements.txt、README.md | [requirements.txt](requirements.txt)、[README.md](README.md) |
| ✅ 策略/价值网络 | Actor(高斯策略输出 μ, σ)+ Critic 实现,CNN 结构 | [src/network.py](src/network.py) |
| ✅ Rollout Buffer | 轨迹存储 + GAE 优势估计 + 返回值计算 | [src/replay_buffer.py](src/replay_buffer.py) |
| ✅ PPO Trainer | PPO 更新(clip 目标函数 + 熵正则 + 价值损失) | [src/trainer.py](src/trainer.py) |
| ✅ 环境预处理 | 灰度化 + Resize(84×84) + 帧堆叠(4帧) Wrapper | [src/utils.py](src/utils.py) |
| ✅ 评估脚本 | 渲染测试 + 多回合平均分数评估 | [src/evaluate.py](src/evaluate.py) |
| ✅ 训练入口 | 主训练循环、TensorBoard 记录、模型保存 | [train.py](train.py) |
**核心算法实现要点**
- 策略网络:3 层 CNN + FC(512) → μ, σ(高斯策略,tanh 激活)
- 价值网络:3 层 CNN + FC(512) → V(s)
- GAE:λ=0.95,优势归一化
- PPO clip:ε=0.24 epoch 更新,mini-batch 64
---
## 二、待完成 ⬜
| 步骤 | 内容 | 优先级 |
|------|------|--------|
| ⬜ 安装依赖 | `uv pip install --system -r requirements.txt` | **高** |
| ⬜ 环境测试 | 短时间(~10000步)验证代码能跑通 | **高** |
| ⬜ 完整训练 | 运行 500k+ 步,预计 5-8 小时(后台) | **高(耗时)** |
| ⬜ 生成图表 | 从 TensorBoard 提取数据,用 matplotlib 绘图 | 中 |
| ⬜ 撰写报告 | 英文技术报告(≤3000 词),LaTeX 排版 | 中 |
| ⬜ 编译 PDF | XeLaTeX 编译生成 CW1_1234560.pdf | 中 |
| ⬜ 打包 zip | 源代码 + 模型打包 CW1_1234560.zip | 低 |
---
## 三、文件结构
```
强化学习个人项目报告/
├── src/
│ ├── __init__.py
│ ├── network.py # Actor + Critic CNN 网络
│ ├── replay_buffer.py # Rollout buffer + GAE
│ ├── trainer.py # PPO 更新逻辑
│ ├── utils.py # 环境预处理 wrappers
│ └── evaluate.py # 评估脚本
├── train.py # 主训练入口
├── requirements.txt
├── README.md
└── TASK_PROGRESS.md # 本文档
```
---
## 四、超参数配置
| 参数 | 值 |
|------|-----|
| Learning rate | 3e-4 |
| Gamma | 0.99 |
| GAE lambda | 0.95 |
| Clip epsilon | 0.2 |
| PPO epochs | 4 |
| Mini-batch size | 64 |
| Rollout steps | 2048 |
| Entropy coefficient | 0.01 |
| Value coefficient | 0.5 |
| Max gradient norm | 0.5 |
| State shape | (84, 84, 4) |
| Action dim | 3(连续:steer, gas, brake |
---
## 五、下一步行动
### 立即执行
```bash
# 1. 安装依赖
uv pip install --system -r requirements.txt
# 2. 验证代码能跑(短测试)
python train.py --steps 10000
# 3. 开始正式训练(后台运行,预计 5-8 小时)
python train.py --steps 500000
```
### 训练完成后
```bash
# TensorBoard 可视化
tensorboard --logdir logs/tensorboard
# 评估模型
python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10
```
### 报告撰写后
```bash
# 编译 PDF
cd tex && xelatex CW1_1234560.tex
```
---
## 六、报告结构(≤3000 词)
1. **Introduction** — RL 背景、CarRacing-v3 任务、状态/动作/奖励空间定义
2. **Methodology** — PPO 数学公式、clip 机制、GAE 优势估计
3. **Implementation Details** — 网络结构、训练流程、超参数、问题与解决
4. **Results and Analysis** — 训练曲线图、评估结果、与 SB3 基线对比
5. **Conclusion** — PPO 敏感性、actor-critic 有效性总结
---
## 七、提交清单
- [ ] `CW1_1234560.pdf` — 技术报告(封面 + ≤3000 词)
- [ ] `CW1_1234560.zip` — 源代码 + 训练好的模型 .pt 文件
- [ ] 所有代码使用英文注释
- [ ] 图表坐标轴和图例使用英文
@@ -0,0 +1,5 @@
torch
gymnasium[box2d]
numpy
matplotlib
tensorboard
@@ -0,0 +1,6 @@
"""PPO Agent for CarRacing-v3 environment."""
from .network import Actor, Critic
from .replay_buffer import RolloutBuffer
from .trainer import PPOTrainer
__all__ = ['Actor', 'Critic', 'RolloutBuffer', 'PPOTrainer']
@@ -0,0 +1,92 @@
"""Evaluation script for trained PPO agent."""
import torch
import numpy as np
import gymnasium as gym
from src.utils import make_env, get_device
from src.network import Actor, Critic
def evaluate(actor, env, num_episodes=10, device=torch.device("cpu")):
"""Evaluate actor and return average return."""
actor.eval()
returns = []
for ep in range(num_episodes):
obs, _ = env.reset()
obs = np.transpose(obs, (1, 2, 0)) # (C, H, W) -> (H, W, C) for storage
total_reward = 0
done = False
steps = 0
while not done and steps < 1000:
with torch.no_grad():
# Convert to tensor (B, C, H, W)
obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
mu, std = actor(obs_t)
# Sample action
dist = torch.distributions.Normal(mu, std)
action = dist.sample()
action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy()
obs, reward, terminated, truncated, _ = env.step(action)
# Convert to (C, H, W) format
obs = np.transpose(obs, (1, 2, 0))
total_reward += reward
done = terminated or truncated
steps += 1
returns.append(total_reward)
print(f"Episode {ep+1}/{num_episodes}: return={total_reward:.1f}, steps={steps}")
actor.train()
return np.mean(returns), np.std(returns)
def evaluate_render(actor, env, device):
"""Render and evaluate agent with visualization."""
actor.eval()
obs, _ = env.reset()
obs = np.transpose(obs, (1, 2, 0))
env.render_mode = "human"
done = False
total_reward = 0
while not done:
with torch.no_grad():
obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
mu, std = actor(obs_t)
dist = torch.distributions.Normal(mu, std)
action = dist.sample()
action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy()
obs, reward, terminated, truncated, _ = env.step(action)
obs = np.transpose(obs, (1, 2, 0))
total_reward += reward
done = terminated or truncated
env.render()
actor.train()
print(f"Final return: {total_reward:.1f}")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model", type=str, required=True, help="Path to trained model")
parser.add_argument("--episodes", type=int, default=5, help="Number of evaluation episodes")
args = parser.parse_args()
device = get_device()
env = make_env()
actor = Actor().to(device)
critic = Critic().to(device)
# Load model
checkpoint = torch.load(args.model, map_location=device, weights_only=False)
actor.load_state_dict(checkpoint["actor"])
print(f"Loaded model from {args.model}")
mean_return, std_return = evaluate(actor, env, num_episodes=args.episodes, device=device)
print(f"\nEvaluation: mean={mean_return:.2f}, std={std_return:.2f}")
@@ -0,0 +1,78 @@
"""Neural network architectures for Actor and Critic."""
import torch
import torch.nn as nn
import torch.nn.functional as F
class Actor(nn.Module):
"""Actor network outputting Gaussian policy parameters (mu, sigma)."""
def __init__(self, state_shape=(84, 84, 4), action_dim=3):
super().__init__()
c, h, w = state_shape[2], state_shape[0], state_shape[1] # channels, height, width
self.conv = nn.Sequential(
nn.Conv2d(c, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(),
)
# Calculate feature map size: 84x84 -> 20x20 after conv layers
feat_size = 64 * 20 * 20
self.fc = nn.Sequential(
nn.Linear(feat_size, 512),
nn.ReLU(),
)
self.mu_head = nn.Linear(512, action_dim)
self.log_std_head = nn.Linear(512, action_dim)
# Initialize output layers
nn.init.orthogonal_(self.mu_head.weight, gain=0.01)
nn.init.orthogonal_(self.log_std_head.weight, gain=0.01)
def forward(self, x):
"""Forward pass returning (mu, log_std)."""
x = x / 255.0 # Normalize
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
mu = torch.tanh(self.mu_head(x))
log_std = self.log_std_head(x)
log_std = torch.clamp(log_std, -20, 2)
return mu, log_std.exp()
class Critic(nn.Module):
"""Critic network estimating state value V(s)."""
def __init__(self, state_shape=(84, 84, 4)):
super().__init__()
c, h, w = state_shape[2], state_shape[0], state_shape[1]
self.conv = nn.Sequential(
nn.Conv2d(c, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(),
)
feat_size = 64 * 20 * 20
self.fc = nn.Sequential(
nn.Linear(feat_size, 512),
nn.ReLU(),
nn.Linear(512, 1)
)
def forward(self, x):
"""Forward pass returning V(s)."""
x = x / 255.0
x = self.conv(x)
x = x.view(x.size(0), -1)
return self.fc(x)
@@ -0,0 +1,64 @@
"""Rollout buffer for storing trajectories."""
import numpy as np
class RolloutBuffer:
"""Stores trajectories for PPO training."""
def __init__(self, buffer_size, state_shape, action_dim):
self.buffer_size = buffer_size
self.ptr = 0
self.size = 0
self.states = np.zeros((buffer_size, *state_shape), dtype=np.uint8)
self.actions = np.zeros((buffer_size, action_dim), dtype=np.float32)
self.rewards = np.zeros(buffer_size, dtype=np.float32)
self.dones = np.zeros(buffer_size, dtype=np.bool_)
self.values = np.zeros(buffer_size, dtype=np.float32)
self.log_probs = np.zeros((buffer_size, action_dim), dtype=np.float32)
def add(self, state, action, reward, done, value, log_prob):
"""Add a transition to the buffer."""
self.states[self.ptr] = state
self.actions[self.ptr] = action
self.rewards[self.ptr] = reward
self.dones[self.ptr] = done
self.values[self.ptr] = value
self.log_probs[self.ptr] = log_prob
self.ptr = (self.ptr + 1) % self.buffer_size
self.size = min(self.size + 1, self.buffer_size)
def compute_returns(self, last_value, gamma=0.99, gae_lambda=0.95):
"""Compute returns and advantages using GAE."""
advantages = np.zeros(self.size, dtype=np.float32)
last_gae = 0
# Compute GAE backwards
for t in reversed(range(self.size)):
if t == self.size - 1:
next_value = last_value
else:
next_value = self.values[t + 1]
delta = self.rewards[t] + gamma * next_value * (1 - self.dones[t]) - self.values[t]
last_gae = delta + gamma * gae_lambda * (1 - self.dones[t]) * last_gae
advantages[t] = last_gae
returns = advantages + self.values[:self.size]
return returns, advantages
def get(self):
"""Return all data as numpy arrays."""
return (
self.states[:self.size],
self.actions[:self.size],
self.rewards[:self.size],
self.dones[:self.size],
self.values[:self.size],
self.log_probs[:self.size],
)
def reset(self):
"""Reset buffer."""
self.ptr = 0
self.size = 0
@@ -0,0 +1,123 @@
"""PPO Trainer with GAE advantage estimation."""
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
class PPOTrainer:
"""PPO trainer handling the training loop."""
def __init__(
self,
actor,
critic,
rollout_buffer,
device,
clip_eps=0.2,
gamma=0.99,
gae_lambda=0.95,
lr=3e-4,
ent_coef=0.01,
vf_coef=0.5,
max_grad_norm=0.5,
ppo_epochs=4,
mini_batch_size=64,
):
self.actor = actor
self.critic = critic
self.buffer = rollout_buffer
self.device = device
self.clip_eps = clip_eps
self.gamma = gamma
self.gae_lambda = gae_lambda
self.ent_coef = ent_coef
self.vf_coef = vf_coef
self.max_grad_norm = max_grad_norm
self.ppo_epochs = ppo_epochs
self.mini_batch_size = mini_batch_size
# Separate optimizers
self.actor_optim = optim.Adam(actor.parameters(), lr=lr)
self.critic_optim = optim.Adam(critic.parameters(), lr=lr)
self.loss_history = {'actor': [], 'critic': [], 'entropy': [], 'total': []}
def update(self, last_value):
"""Perform one PPO update."""
states, actions, rewards, dones, values, log_probs_old = self.buffer.get()
# Compute returns and advantages
returns, advantages = self.buffer.compute_returns(
last_value, self.gamma, self.gae_lambda
)
# Normalize advantages
advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
# Convert to tensors
states_t = torch.from_numpy(states).float().to(self.device)
actions_t = torch.from_numpy(actions).float().to(self.device)
log_probs_old_t = torch.from_numpy(log_probs_old).float().to(self.device)
returns_t = torch.from_numpy(returns).float().to(self.device)
advantages_t = torch.from_numpy(advantages).float().to(self.device)
dataset = TensorDataset(states_t, actions_t, log_probs_old_t, returns_t, advantages_t)
loader = DataLoader(dataset, batch_size=self.mini_batch_size, shuffle=True)
total_actor_loss = 0
total_critic_loss = 0
total_entropy = 0
count = 0
for _ in range(self.ppo_epochs):
for batch in loader:
s, a, log_pi_old, ret, adv = batch
# Get current policy distribution
mu, std = self.actor(s)
dist = torch.distributions.Normal(mu, std)
log_pi = dist.log_prob(a).sum(dim=-1, keepdim=True)
entropy = dist.entropy().sum(dim=-1, keepdim=True)
# Probability ratio
ratio = torch.exp(log_pi - log_pi_old)
# Clipped surrogate objective
surr1 = ratio * adv
surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * adv
actor_loss = -torch.min(surr1, surr2).mean()
# Value loss
value = self.critic(s)
critic_loss = nn.MSELoss()(value.squeeze(), ret)
# Total loss
loss = actor_loss + self.vf_coef * critic_loss - self.ent_coef * entropy.mean()
# Update
self.actor_optim.zero_grad()
self.critic_optim.zero_grad()
loss.backward()
nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm)
nn.utils.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm)
self.actor_optim.step()
self.critic_optim.step()
total_actor_loss += actor_loss.item()
total_critic_loss += critic_loss.item()
total_entropy += entropy.mean().item()
count += 1
avg_actor = total_actor_loss / count
avg_critic = total_critic_loss / count
avg_entropy = total_entropy / count
self.loss_history['actor'].append(avg_actor)
self.loss_history['critic'].append(avg_critic)
self.loss_history['entropy'].append(avg_entropy)
self.loss_history['total'].append(avg_actor + avg_critic)
self.buffer.reset()
return avg_actor, avg_critic, avg_entropy
@@ -0,0 +1,87 @@
"""Utility functions for environment, device detection, and TensorBoard."""
import gymnasium as gym
import numpy as np
import torch
from collections import deque
class GrayScaleWrapper(gym.ObservationWrapper):
"""Convert RGB observation to grayscale."""
def __init__(self, env):
super().__init__(env)
def observation(self, obs):
# RGB to grayscale: weighted average
gray = 0.299 * obs[:, :, 0] + 0.587 * obs[:, :, 1] + 0.114 * obs[:, :, 2]
return gray.astype(np.uint8)
class ResizeWrapper(gym.ObservationWrapper):
"""Resize observation to target size."""
def __init__(self, env, size=(84, 84)):
super().__init__(env)
self.size = size
def observation(self, obs):
import cv2
return cv2.resize(obs, self.size, interpolation=cv2.INTER_AREA)
class FrameStackWrapper(gym.ObservationWrapper):
"""Stack last N frames."""
def __init__(self, env, num_stack=4):
super().__init__(env)
self.num_stack = num_stack
self.frames = deque(maxlen=num_stack)
obs_shape = env.observation_space.shape
self.observation_space = gym.spaces.Box(
low=0, high=255,
shape=(num_stack, *obs_shape[-2:]),
dtype=np.uint8
)
def reset(self, **kwargs):
obs, info = self.env.reset(**kwargs)
for _ in range(self.num_stack):
self.frames.append(obs)
return self._get_observation(), info
def observation(self, obs):
self.frames.append(obs)
return self._get_observation()
def _get_observation(self):
return np.stack(list(self.frames), axis=0)
def make_env(env_id="CarRacing-v3", gray_scale=True, resize=True, frame_stack=4):
"""Create preprocessed CarRacing environment."""
env = gym.make(env_id, render_mode="rgb_array")
if resize:
env = ResizeWrapper(env, size=(84, 84))
if gray_scale:
env = GrayScaleWrapper(env)
if frame_stack > 1:
env = FrameStackWrapper(env, num_stack=frame_stack)
return env
def get_device():
"""Detect and return available device."""
if torch.cuda.is_available():
device = torch.device("cuda")
print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
device = torch.device("cpu")
print("Using CPU")
return device
def preprocess_obs(obs):
"""Ensure observation is in correct format for network."""
if len(obs.shape) == 2: # single channel
obs = np.expand_dims(obs, axis=0)
return obs
+192
View File
@@ -0,0 +1,192 @@
"""Main training script for PPO on CarRacing-v3."""
import os
import time
import argparse
import numpy as np
import torch
from torch.utils.tensorboard import SummaryWriter
from src.network import Actor, Critic
from src.replay_buffer import RolloutBuffer
from src.trainer import PPOTrainer
from src.utils import make_env, get_device
def collect_rollout(actor, critic, env, buffer, device, rollout_steps):
"""Collect rollout data."""
obs, _ = env.reset()
# Convert to (C, H, W) format for storage
obs = np.transpose(obs, (1, 2, 0))
for step in range(rollout_steps):
with torch.no_grad():
# Convert to (B, C, H, W)
obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
mu, std = actor(obs_t)
dist = torch.distributions.Normal(mu, std)
action = dist.sample()
action = torch.clamp(action, -1, 1)
log_prob = dist.log_prob(action).sum(dim=-1, keepdim=True)
value = critic(obs_t).squeeze(0).item()
action_np = action.squeeze(0).cpu().numpy()
log_prob_np = log_prob.squeeze(0).cpu().numpy()
next_obs, reward, terminated, truncated, _ = env.step(action_np)
done = terminated or truncated
# Convert next_obs to (C, H, W) for storage
next_obs_stored = np.transpose(next_obs, (1, 2, 0))
buffer.add(obs.copy(), action_np, reward, done, value, log_prob_np)
obs = next_obs_stored
if done:
obs, _ = env.reset()
obs = np.transpose(obs, (1, 2, 0))
def train(
total_steps=500000,
rollout_steps=2048,
eval_interval=10,
save_interval=50,
device=None,
):
"""Main training loop."""
if device is None:
device = get_device()
env = make_env()
eval_env = make_env()
state_shape = (84, 84, 4)
action_dim = 3
actor = Actor(state_shape=state_shape, action_dim=action_dim).to(device)
critic = Critic(state_shape=state_shape).to(device)
buffer = RolloutBuffer(
buffer_size=rollout_steps,
state_shape=state_shape,
action_dim=action_dim,
)
trainer = PPOTrainer(
actor=actor,
critic=critic,
rollout_buffer=buffer,
device=device,
clip_eps=0.2,
gamma=0.99,
gae_lambda=0.95,
lr=3e-4,
ent_coef=0.01,
vf_coef=0.5,
max_grad_norm=0.5,
ppo_epochs=4,
mini_batch_size=64,
)
# TensorBoard
log_dir = os.path.join("logs", "tensorboard", f"run_{int(time.time())}")
writer = SummaryWriter(log_dir)
print(f"Training on {device}")
print(f"Log directory: {log_dir}")
episode = 0
total_timesteps = 0
episode_rewards = []
recent_rewards = []
while total_timesteps < total_steps:
# Collect rollout
collect_rollout(actor, critic, env, buffer, device, rollout_steps)
# Get last value for GAE
with torch.no_grad():
obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
last_value = critic(obs_t).squeeze(0).item()
# PPO update
actor_loss, critic_loss, entropy = trainer.update(last_value)
# Logging
writer.add_scalar("Loss/Actor", actor_loss, total_timesteps)
writer.add_scalar("Loss/Critic", critic_loss, total_timesteps)
writer.add_scalar("Loss/Entropy", entropy, total_timesteps)
total_timesteps += rollout_steps
episode += 1
# Estimate episode reward from buffer
ep_reward = buffer.rewards[:buffer.size].sum()
episode_rewards.append(ep_reward)
recent_rewards.append(ep_reward)
# Running average of last 10 episodes
avg_reward = np.mean(recent_rewards[-10:]) if len(recent_rewards) >= 10 else np.mean(recent_rewards)
writer.add_scalar("Reward/Episode", ep_reward, total_timesteps)
writer.add_scalar("Reward/AvgLast10", avg_reward, total_timesteps)
print(f"Episode {episode}, steps {total_timesteps}, ep_reward={ep_reward:.1f}, avg_10={avg_reward:.1f}")
# Evaluation
if episode % eval_interval == 0:
eval_returns = []
for _ in range(5):
eval_obs, _ = eval_env.reset()
eval_obs = np.transpose(eval_obs, (1, 2, 0))
eval_reward = 0
done = False
while not done:
with torch.no_grad():
eval_obs_t = torch.from_numpy(eval_obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device)
mu, std = actor(eval_obs_t)
action = torch.clamp(mu, -1, 1).squeeze(0).cpu().numpy()
eval_obs, reward, terminated, truncated, _ = eval_env.step(action)
eval_obs = np.transpose(eval_obs, (1, 2, 0))
eval_reward += reward
done = terminated or truncated
eval_returns.append(eval_reward)
mean_eval = np.mean(eval_returns)
writer.add_scalar("Eval/MeanReturn", mean_eval, episode)
print(f" Eval: mean_return={mean_eval:.2f}")
# Save model
if episode % save_interval == 0:
os.makedirs("models", exist_ok=True)
torch.save({
"actor": actor.state_dict(),
"critic": critic.state_dict(),
"episode": episode,
"timesteps": total_timesteps,
}, os.path.join("models", f"ppo_carracing_ep{episode}.pt"))
print(f" Saved model at episode {episode}")
# Save final model
os.makedirs("models", exist_ok=True)
torch.save({
"actor": actor.state_dict(),
"critic": critic.state_dict(),
"episode": episode,
"timesteps": total_timesteps,
}, os.path.join("models", "ppo_carracing_final.pt"))
writer.close()
print(f"Training complete! Total episodes: {episode}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--steps", type=int, default=500000, help="Total training steps")
parser.add_argument("--rollout", type=int, default=2048, help="Rollout buffer size")
args = parser.parse_args()
device = get_device()
train(total_steps=args.steps, rollout_steps=args.rollout, device=device)