diff --git a/强化学习个人课程作业报告/notebooks/insurance_premium_risk.ipynb b/强化学习个人课程作业报告/notebooks/insurance_premium_risk.ipynb index cd30a74..76dd680 100644 --- a/强化学习个人课程作业报告/notebooks/insurance_premium_risk.ipynb +++ b/强化学习个人课程作业报告/notebooks/insurance_premium_risk.ipynb @@ -43,17 +43,68 @@ "execution_count": null, "id": "a12f069a", "metadata": {}, - "outputs": [], - "source": "import xgboost as xgb\nimport optuna\noptuna.logging.set_verbosity(optuna.logging.WARNING)\n\n# GPU Fallback: 自动检测CUDA可用性,无GPU时自动切换到CPU\ntry:\n import subprocess\n result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n USE_GPU = result.returncode == 0\nexcept:\n USE_GPU = False\n\nXGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\nXGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\nprint(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n\nRANDOM_STATE = 42\nnp.random.seed(RANDOM_STATE)\nplt.rcParams['figure.figsize'] = (10, 6)\nplt.rcParams['font.size'] = 12\nsns.set_style('whitegrid')\nprint('All libraries imported successfully!')" + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n", + "\u001b[1;31mCreate a Python Environment with the required packages.\n", + "\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'" + ] + } + ], + "source": [ + "import xgboost as xgb\n", + "import optuna\n", + "optuna.logging.set_verbosity(optuna.logging.WARNING)\n", + "\n", + "# GPU Fallback: 自动检测CUDA可用性,无GPU时自动切换到CPU\n", + "try:\n", + " import subprocess\n", + " result = subprocess.run(['nvidia-smi'], capture_output=True, text=True)\n", + " USE_GPU = result.returncode == 0\n", + "except:\n", + " USE_GPU = False\n", + "\n", + "XGB_TREE_METHOD = 'gpu_hist' if USE_GPU else 'hist'\n", + "XGB_DEVICE = 'cuda' if USE_GPU else 'cpu'\n", + "print(f'XGBoost compute method: {\"GPU (CUDA)\" if USE_GPU else \"CPU\"}')\n", + "\n", + "RANDOM_STATE = 42\n", + "np.random.seed(RANDOM_STATE)\n", + "plt.rcParams['figure.figsize'] = (10, 6)\n", + "plt.rcParams['font.size'] = 12\n", + "sns.set_style('whitegrid')\n", + "print('All libraries imported successfully!')" + ] }, { "cell_type": "code", "execution_count": null, "id": "1c4b453a", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with 'my_env (Python 3.10.18)' requires the ipykernel package.\n", + "\u001b[1;31mCreate a Python Environment with the required packages.\n", + "\u001b[1;31mOr install 'ipykernel' using the command: 'conda install -n my_env ipykernel --update-deps --force-reinstall'" + ] + } + ], "source": [ - "DATA_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\dataset_final'\nOUTPUT_DIR = r'd:\\Code\\doing_exercises\\programs\\外教作业外快\\强化学习个人课程作业报告\\outputs'\n\ntrain_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\nval_df = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\ntest_df = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n\nprint(f'Train shape: {train_df.shape}')\nprint(f'Val shape: {val_df.shape}')\nprint(f'Test shape: {test_df.shape}')" + "train_df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))\n", + "val_df = pd.read_csv(os.path.join(DATA_DIR, 'val.csv'))\n", + "test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_features.csv'))\n", + "\n", + "print(f'Train shape: {train_df.shape}')\n", + "print(f'Val shape: {val_df.shape}')\n", + "print(f'Test shape: {test_df.shape}')" ] }, { @@ -71,7 +122,23 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== TARGET DISTRIBUTION (TRAIN) ===')\ntarget_counts = train_df['premium_risk'].value_counts()\nprint(target_counts)\nprint((target_counts / len(train_df) * 100).round(2))\n\nfig, ax = plt.subplots(figsize=(8, 5))\ncolors = ['#4CAF50', '#FFC107', '#F44336']\ntarget_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\nax.set_title('Target Variable Distribution (Train)', fontsize=14)\nax.set_xlabel('Premium Risk')\nax.set_ylabel('Count')\nax.set_xticklabels(ax.get_xticklabels(), rotation=0)\nfor i, (idx, val) in enumerate(target_counts.sort_index().items()):\n ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\nplt.show()" + "print('=== TARGET DISTRIBUTION (TRAIN) ===')\n", + "target_counts = train_df['premium_risk'].value_counts()\n", + "print(target_counts)\n", + "print((target_counts / len(train_df) * 100).round(2))\n", + "\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "colors = ['#4CAF50', '#FFC107', '#F44336']\n", + "target_counts.sort_index().plot(kind='bar', ax=ax, color=colors)\n", + "ax.set_title('Target Variable Distribution (Train)', fontsize=14)\n", + "ax.set_xlabel('Premium Risk')\n", + "ax.set_ylabel('Count')\n", + "ax.set_xticklabels(ax.get_xticklabels(), rotation=0)\n", + "for i, (idx, val) in enumerate(target_counts.sort_index().items()):\n", + " ax.text(i, val + 300, f'{val}\\n({val/len(train_df)*100:.1f}%)', ha='center')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'target_distribution.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -81,7 +148,18 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== MISSING VALUES (TRAIN) ===')\nmissing = train_df.isnull().sum()\nmissing = missing[missing > 0].sort_values(ascending=False)\nprint(missing)\n\nfig, ax = plt.subplots(figsize=(12, 6))\nmissing.plot(kind='barh', ax=ax, color='coral')\nax.set_title('Missing Values per Column (Train)', fontsize=14)\nax.set_xlabel('Count')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\nplt.show()" + "print('=== MISSING VALUES (TRAIN) ===')\n", + "missing = train_df.isnull().sum()\n", + "missing = missing[missing > 0].sort_values(ascending=False)\n", + "print(missing)\n", + "\n", + "fig, ax = plt.subplots(figsize=(12, 6))\n", + "missing.plot(kind='barh', ax=ax, color='coral')\n", + "ax.set_title('Missing Values per Column (Train)', fontsize=14)\n", + "ax.set_xlabel('Count')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'missing_values.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -91,7 +169,21 @@ "metadata": {}, "outputs": [], "source": [ - "noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\nprint(f'Noise features: {noise_cols}')\n\nprint('\\n=== bureau_risk_index stats ===')\nprint(train_df['bureau_risk_index'].describe())\n\nfig, ax = plt.subplots(figsize=(8, 5))\ntrain_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\nax.set_title('bureau_risk_index by Premium Risk')\nax.set_xlabel('Premium Risk')\nax.set_ylabel('bureau_risk_index')\nplt.suptitle('')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\nplt.show()" + "noise_cols = [c for c in train_df.columns if 'noise' in c.lower()]\n", + "print(f'Noise features: {noise_cols}')\n", + "\n", + "print('\\n=== bureau_risk_index stats ===')\n", + "print(train_df['bureau_risk_index'].describe())\n", + "\n", + "fig, ax = plt.subplots(figsize=(8, 5))\n", + "train_df.boxplot(column='bureau_risk_index', by='premium_risk', ax=ax)\n", + "ax.set_title('bureau_risk_index by Premium Risk')\n", + "ax.set_xlabel('Premium Risk')\n", + "ax.set_ylabel('bureau_risk_index')\n", + "plt.suptitle('')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'bureau_risk_boxplot.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -112,7 +204,28 @@ "metadata": {}, "outputs": [], "source": [ - "def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n from sklearn.tree import DecisionTreeClassifier\n results = []\n for col in feature_cols:\n temp_df = df[[col, target_col]].dropna()\n X_temp = temp_df[[col]].values\n y_temp = temp_df[target_col].values\n le = LabelEncoder()\n y_enc = le.fit_transform(y_temp)\n try:\n clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n except:\n results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n\nfeature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\nprint('Screening single features for leakage detection (this may take a few minutes)...')\nleakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\nprint('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\nprint(leakage_results.head(10))" + "def screen_single_feature_leakage(df, target_col, feature_cols, scoring='f1_macro'):\n", + " from sklearn.tree import DecisionTreeClassifier\n", + " results = []\n", + " for col in feature_cols:\n", + " temp_df = df[[col, target_col]].dropna()\n", + " X_temp = temp_df[[col]].values\n", + " y_temp = temp_df[target_col].values\n", + " le = LabelEncoder()\n", + " y_enc = le.fit_transform(y_temp)\n", + " try:\n", + " clf = DecisionTreeClassifier(random_state=RANDOM_STATE, max_depth=3)\n", + " scores = cross_val_score(clf, X_temp, y_enc, cv=3, scoring=scoring)\n", + " results.append({'feature': col, 'mean_f1_macro': scores.mean(), 'std': scores.std()})\n", + " except:\n", + " results.append({'feature': col, 'mean_f1_macro': 0.0, 'std': 0.0})\n", + " return pd.DataFrame(results).sort_values('mean_f1_macro', ascending=False)\n", + "\n", + "feature_to_test = [c for c in train_df.columns if c not in ['applicant_id', 'customer_key', 'premium_risk']]\n", + "print('Screening single features for leakage detection (this may take a few minutes)...')\n", + "leakage_results = screen_single_feature_leakage(train_df, 'premium_risk', feature_to_test)\n", + "print('\\n=== TOP 10 SINGLE-FEATURE F1 MACRO SCORES ===')\n", + "print(leakage_results.head(10))" ] }, { @@ -122,7 +235,26 @@ "metadata": {}, "outputs": [], "source": [ - "LEAKAGE_THRESHOLD = 0.85\nprint('=== LEAKAGE DETECTION RESULTS ===')\nprint(leakage_results.head(10))\n\nbureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\nprint(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n\nif bureau_score > LEAKAGE_THRESHOLD:\n print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n print('*** This is consistent with a leakage feature. ***')\n print('*** ACTION: bureau_risk_index will be removed from features. ***')\n LEAKAGE_FEATURE = 'bureau_risk_index'\nelse:\n top_feat = leakage_results.iloc[0]['feature']\n top_score = leakage_results.iloc[0]['mean_f1_macro']\n print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n if top_score > 0.80:\n LEAKAGE_FEATURE = top_feat\n else:\n LEAKAGE_FEATURE = None" + "LEAKAGE_THRESHOLD = 0.85\n", + "print('=== LEAKAGE DETECTION RESULTS ===')\n", + "print(leakage_results.head(10))\n", + "\n", + "bureau_score = leakage_results[leakage_results['feature'] == 'bureau_risk_index']['mean_f1_macro'].values[0]\n", + "print(f'\\nbureau_risk_index F1 macro: {bureau_score:.4f}')\n", + "\n", + "if bureau_score > LEAKAGE_THRESHOLD:\n", + " print('\\n*** ALERT: bureau_risk_index shows abnormally high predictive power! ***')\n", + " print('*** This is consistent with a leakage feature. ***')\n", + " print('*** ACTION: bureau_risk_index will be removed from features. ***')\n", + " LEAKAGE_FEATURE = 'bureau_risk_index'\n", + "else:\n", + " top_feat = leakage_results.iloc[0]['feature']\n", + " top_score = leakage_results.iloc[0]['mean_f1_macro']\n", + " print(f'\\nTop feature: {top_feat} with F1 macro = {top_score:.4f}')\n", + " if top_score > 0.80:\n", + " LEAKAGE_FEATURE = top_feat\n", + " else:\n", + " LEAKAGE_FEATURE = None" ] }, { @@ -132,7 +264,18 @@ "metadata": {}, "outputs": [], "source": [ - "if LEAKAGE_FEATURE:\n print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n val_df_clean = val_df.drop(columns=[LEAKAGE_FEATURE])\n test_df_clean = test_df.drop(columns=[LEAKAGE_FEATURE])\nelse:\n print('No leakage feature to remove.')\n train_df_clean = train_df.copy()\n val_df_clean = val_df.copy()\n test_df_clean = test_df.copy()\n\nprint(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')" + "if LEAKAGE_FEATURE:\n", + " print(f'Removing leakage feature: {LEAKAGE_FEATURE}')\n", + " train_df_clean = train_df.drop(columns=[LEAKAGE_FEATURE])\n", + " val_df_clean = val_df.drop(columns=[LEAKAGE_FEATURE])\n", + " test_df_clean = test_df.drop(columns=[LEAKAGE_FEATURE])\n", + "else:\n", + " print('No leakage feature to remove.')\n", + " train_df_clean = train_df.copy()\n", + " val_df_clean = val_df.copy()\n", + " test_df_clean = test_df.copy()\n", + "\n", + "print(f'After removal - Train: {train_df_clean.shape}, Val: {val_df_clean.shape}, Test: {test_df_clean.shape}')" ] }, { @@ -150,7 +293,19 @@ "metadata": {}, "outputs": [], "source": [ - "ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\nNOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\nTARGET_COL = 'premium_risk'\n\nall_cols = train_df_clean.columns.tolist()\nfeature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n\nNUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n\nprint(f'Total features: {len(feature_cols_all)}')\nprint(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\nprint(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')" + "ID_COLS = ['applicant_id', 'customer_key', 'applicant_ref_code']\n", + "NOISE_COLS = ['noise_feature_1', 'noise_feature_2', 'noise_feature_3', 'noise_feature_4', 'noise_feature_5']\n", + "TARGET_COL = 'premium_risk'\n", + "\n", + "all_cols = train_df_clean.columns.tolist()\n", + "feature_cols_all = [c for c in all_cols if c not in ID_COLS + NOISE_COLS + [TARGET_COL]]\n", + "\n", + "NUMERIC_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=[np.number]).columns.tolist()\n", + "CATEGORICAL_FEATURES = train_df_clean[feature_cols_all].select_dtypes(include=['object']).columns.tolist()\n", + "\n", + "print(f'Total features: {len(feature_cols_all)}')\n", + "print(f'Numeric ({len(NUMERIC_FEATURES)}): {NUMERIC_FEATURES}')\n", + "print(f'Categorical ({len(CATEGORICAL_FEATURES)}): {CATEGORICAL_FEATURES}')" ] }, { @@ -160,7 +315,24 @@ "metadata": {}, "outputs": [], "source": [ - "numeric_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='median')),\n ('scaler', StandardScaler())\n])\n\ncategorical_transformer = Pipeline(steps=[\n ('imputer', SimpleImputer(strategy='most_frequent')),\n ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n])\n\npreprocessor = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n ],\n remainder='drop'\n)\nprint('Preprocessing pipeline created!')" + "numeric_transformer = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='median')),\n", + " ('scaler', StandardScaler())\n", + "])\n", + "\n", + "categorical_transformer = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='most_frequent')),\n", + " ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n", + "])\n", + "\n", + "preprocessor = ColumnTransformer(\n", + " transformers=[\n", + " ('num', numeric_transformer, NUMERIC_FEATURES),\n", + " ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n", + " ],\n", + " remainder='drop'\n", + ")\n", + "print('Preprocessing pipeline created!')" ] }, { @@ -170,7 +342,18 @@ "metadata": {}, "outputs": [], "source": [ - "X_train = train_df_clean[feature_cols_all]\ny_train = train_df_clean[TARGET_COL]\nX_val = val_df_clean[feature_cols_all]\ny_val = val_df_clean[TARGET_COL]\nX_test = test_df_clean[feature_cols_all]\n\nle_target = LabelEncoder()\ny_train_enc = le_target.fit_transform(y_train)\ny_val_enc = le_target.transform(y_val)\n\nprint(f'Classes: {le_target.classes_}')\nprint(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')" + "X_train = train_df_clean[feature_cols_all]\n", + "y_train = train_df_clean[TARGET_COL]\n", + "X_val = val_df_clean[feature_cols_all]\n", + "y_val = val_df_clean[TARGET_COL]\n", + "X_test = test_df_clean[feature_cols_all]\n", + "\n", + "le_target = LabelEncoder()\n", + "y_train_enc = le_target.fit_transform(y_train)\n", + "y_val_enc = le_target.transform(y_val)\n", + "\n", + "print(f'Classes: {le_target.classes_}')\n", + "print(f'X_train: {X_train.shape} | X_val: {X_val.shape} | X_test: {X_test.shape}')" ] }, { @@ -188,7 +371,32 @@ "metadata": {}, "outputs": [], "source": [ - "def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n y_tr_pred = pipeline.predict(X_tr)\n y_v_pred = pipeline.predict(X_v)\n results = {\n 'model': model_name,\n 'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n 'val_accuracy': accuracy_score(y_v, y_v_pred),\n 'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n 'val_f1_macro': f1_score(y_v, y_v_pred, average='macro'),\n }\n f1_per_class = f1_score(y_v, y_v_pred, average=None)\n for i, cls in enumerate(le.classes_):\n results[f'val_f1_{cls}'] = f1_per_class[i]\n return results\n\ndef plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n y_pred = pipeline.predict(X_v)\n fig, ax = plt.subplots(figsize=(8, 6))\n cm = confusion_matrix(y_v, y_pred)\n disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n disp.plot(ax=ax, cmap='Blues', values_format='d')\n ax.set_title(title, fontsize=14)\n plt.tight_layout()\n plt.savefig(save_path, dpi=150)\n plt.show()\n return cm" + "def evaluate_model(pipeline, X_tr, y_tr, X_v, y_v, le, model_name='Model'):\n", + " y_tr_pred = pipeline.predict(X_tr)\n", + " y_v_pred = pipeline.predict(X_v)\n", + " results = {\n", + " 'model': model_name,\n", + " 'train_accuracy': accuracy_score(y_tr, y_tr_pred),\n", + " 'val_accuracy': accuracy_score(y_v, y_v_pred),\n", + " 'train_f1_macro': f1_score(y_tr, y_tr_pred, average='macro'),\n", + " 'val_f1_macro': f1_score(y_v, y_v_pred, average='macro'),\n", + " }\n", + " f1_per_class = f1_score(y_v, y_v_pred, average=None)\n", + " for i, cls in enumerate(le.classes_):\n", + " results[f'val_f1_{cls}'] = f1_per_class[i]\n", + " return results\n", + "\n", + "def plot_confusion_matrix(pipeline, X_v, y_v, le, title, save_path):\n", + " y_pred = pipeline.predict(X_v)\n", + " fig, ax = plt.subplots(figsize=(8, 6))\n", + " cm = confusion_matrix(y_v, y_pred)\n", + " disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=le.classes_)\n", + " disp.plot(ax=ax, cmap='Blues', values_format='d')\n", + " ax.set_title(title, fontsize=14)\n", + " plt.tight_layout()\n", + " plt.savefig(save_path, dpi=150)\n", + " plt.show()\n", + " return cm" ] }, { @@ -198,7 +406,19 @@ "metadata": {}, "outputs": [], "source": [ - "print('Training Baseline: Logistic Regression...')\nbaseline_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n])\nbaseline_pipeline.fit(X_train, y_train_enc)\n\nbaseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n\nprint('\\n=== BASELINE MODEL RESULTS ===')\nfor k, v in baseline_results.items():\n if k != 'model':\n print(f'{k}: {v:.4f}')" + "print('Training Baseline: Logistic Regression...')\n", + "baseline_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', LogisticRegression(class_weight='balanced', max_iter=1000, random_state=RANDOM_STATE, n_jobs=-1))\n", + "])\n", + "baseline_pipeline.fit(X_train, y_train_enc)\n", + "\n", + "baseline_results = evaluate_model(baseline_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Baseline_LR')\n", + "\n", + "print('\\n=== BASELINE MODEL RESULTS ===')\n", + "for k, v in baseline_results.items():\n", + " if k != 'model':\n", + " print(f'{k}: {v:.4f}')" ] }, { @@ -208,7 +428,17 @@ "metadata": {}, "outputs": [], "source": [ - "plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n 'Baseline: Logistic Regression - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n\nprint('\\n=== CLASSIFICATION REPORT (VAL) ===')\ny_val_pred = baseline_pipeline.predict(X_val)\nprint(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n\nall_results = [baseline_results]\npd.DataFrame(all_results).to_csv(\n os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)" + "plot_confusion_matrix(baseline_pipeline, X_val, y_val_enc, le_target,\n", + " 'Baseline: Logistic Regression - Confusion Matrix',\n", + " os.path.join(OUTPUT_DIR, 'figures', 'baseline_confusion_matrix.png'))\n", + "\n", + "print('\\n=== CLASSIFICATION REPORT (VAL) ===')\n", + "y_val_pred = baseline_pipeline.predict(X_val)\n", + "print(classification_report(y_val_enc, y_val_pred, target_names=le_target.classes_))\n", + "\n", + "all_results = [baseline_results]\n", + "pd.DataFrame(all_results).to_csv(\n", + " os.path.join(OUTPUT_DIR, 'tables', 'model_comparison_summary.csv'), index=False)" ] }, { @@ -225,7 +455,36 @@ "id": "30cd02ce", "metadata": {}, "outputs": [], - "source": "print('Training Random Forest...')\nstart = time.time()\nrf_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n])\nrf_pipeline.fit(X_train, y_train_enc)\nrf_time = time.time() - start\n\nrf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\nrf_results['train_time'] = rf_time\n\nprint('Training XGBoost...')\nstart = time.time()\nxgb_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n objective='multi:softmax', num_class=3,\n tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n random_state=RANDOM_STATE, verbosity=0))\n])\nxgb_pipeline.fit(X_train, y_train_enc)\nxgb_time = time.time() - start\n\nxgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\nxgb_results['train_time'] = xgb_time\n\nprint(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')" + "source": [ + "print('Training Random Forest...')\n", + "start = time.time()\n", + "rf_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1))\n", + "])\n", + "rf_pipeline.fit(X_train, y_train_enc)\n", + "rf_time = time.time() - start\n", + "\n", + "rf_results = evaluate_model(rf_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'RandomForest')\n", + "rf_results['train_time'] = rf_time\n", + "\n", + "print('Training XGBoost...')\n", + "start = time.time()\n", + "xgb_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', xgb.XGBClassifier(n_estimators=200, learning_rate=0.1, max_depth=6,\n", + " objective='multi:softmax', num_class=3,\n", + " tree_method=XGB_TREE_METHOD, device=XGB_DEVICE,\n", + " random_state=RANDOM_STATE, verbosity=0))\n", + "])\n", + "xgb_pipeline.fit(X_train, y_train_enc)\n", + "xgb_time = time.time() - start\n", + "\n", + "xgb_results = evaluate_model(xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost')\n", + "xgb_results['train_time'] = xgb_time\n", + "\n", + "print(f'RF time: {rf_time:.2f}s | XGB time: {xgb_time:.2f}s')" + ] }, { "cell_type": "code", @@ -234,7 +493,17 @@ "metadata": {}, "outputs": [], "source": [ - "all_results.append(rf_results)\nall_results.append(xgb_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== MODEL COMPARISON SUMMARY ===')\ndisplay_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\nprint(results_df[display_cols].round(4).to_string(index=False))\n\nprint('\\n=== CLASS-WISE F1 (VAL) ===')\nclass_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\nprint(results_df[['model'] + class_cols].round(4).to_string(index=False))" + "all_results.append(rf_results)\n", + "all_results.append(xgb_results)\n", + "results_df = pd.DataFrame(all_results)\n", + "\n", + "print('\\n=== MODEL COMPARISON SUMMARY ===')\n", + "display_cols = ['model', 'train_accuracy', 'val_accuracy', 'train_f1_macro', 'val_f1_macro', 'train_time']\n", + "print(results_df[display_cols].round(4).to_string(index=False))\n", + "\n", + "print('\\n=== CLASS-WISE F1 (VAL) ===')\n", + "class_cols = [c for c in results_df.columns if c.startswith('val_f1_') and c != 'val_f1_macro']\n", + "print(results_df[['model'] + class_cols].round(4).to_string(index=False))" ] }, { @@ -244,7 +513,28 @@ "metadata": {}, "outputs": [], "source": [ - "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\nmodels = results_df['model'].tolist()\nval_f1 = results_df['val_f1_macro'].tolist()\nval_acc = results_df['val_accuracy'].tolist()\n\nbars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\naxes[0].set_ylabel('Macro-F1')\naxes[0].set_ylim(0, 1)\nfor bar, val in zip(bars1, val_f1):\n axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nbars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\naxes[1].set_title('Validation Accuracy Comparison', fontsize=13)\naxes[1].set_ylabel('Accuracy')\naxes[1].set_ylim(0, 1)\nfor bar, val in zip(bars2, val_acc):\n axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\nplt.show()" + "fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n", + "models = results_df['model'].tolist()\n", + "val_f1 = results_df['val_f1_macro'].tolist()\n", + "val_acc = results_df['val_accuracy'].tolist()\n", + "\n", + "bars1 = axes[0].bar(models, val_f1, color=['#2196F3', '#4CAF50', '#FF9800'])\n", + "axes[0].set_title('Validation Macro-F1 Comparison', fontsize=13)\n", + "axes[0].set_ylabel('Macro-F1')\n", + "axes[0].set_ylim(0, 1)\n", + "for bar, val in zip(bars1, val_f1):\n", + " axes[0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n", + "\n", + "bars2 = axes[1].bar(models, val_acc, color=['#2196F3', '#4CAF50', '#FF9800'])\n", + "axes[1].set_title('Validation Accuracy Comparison', fontsize=13)\n", + "axes[1].set_ylabel('Accuracy')\n", + "axes[1].set_ylim(0, 1)\n", + "for bar, val in zip(bars2, val_acc):\n", + " axes[1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01, f'{val:.4f}', ha='center')\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'model_comparison.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -254,7 +544,13 @@ "metadata": {}, "outputs": [], "source": [ - "plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n 'Random Forest - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n\nplot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n 'XGBoost - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))" + "plot_confusion_matrix(rf_pipeline, X_val, y_val_enc, le_target,\n", + " 'Random Forest - Confusion Matrix',\n", + " os.path.join(OUTPUT_DIR, 'figures', 'rf_confusion_matrix.png'))\n", + "\n", + "plot_confusion_matrix(xgb_pipeline, X_val, y_val_enc, le_target,\n", + " 'XGBoost - Confusion Matrix',\n", + " os.path.join(OUTPUT_DIR, 'figures', 'xgb_confusion_matrix.png'))" ] }, { @@ -272,7 +568,17 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== BAGGING VS BOOSTING ANALYSIS ===')\nrf_val_f1 = rf_results['val_f1_macro']\nrf_train_f1 = rf_results['train_f1_macro']\nrf_gap = rf_train_f1 - rf_val_f1\n\nxgb_val_f1 = xgb_results['val_f1_macro']\nxgb_train_f1 = xgb_results['train_f1_macro']\nxgb_gap = xgb_train_f1 - xgb_val_f1\n\nprint(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\nprint(f'XGBoost - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')" + "print('=== BAGGING VS BOOSTING ANALYSIS ===')\n", + "rf_val_f1 = rf_results['val_f1_macro']\n", + "rf_train_f1 = rf_results['train_f1_macro']\n", + "rf_gap = rf_train_f1 - rf_val_f1\n", + "\n", + "xgb_val_f1 = xgb_results['val_f1_macro']\n", + "xgb_train_f1 = xgb_results['train_f1_macro']\n", + "xgb_gap = xgb_train_f1 - xgb_val_f1\n", + "\n", + "print(f'Random Forest - val_f1_macro: {rf_val_f1:.4f}, overfitting gap: {rf_gap:.4f}')\n", + "print(f'XGBoost - val_f1_macro: {xgb_val_f1:.4f}, overfitting gap: {xgb_gap:.4f}')" ] }, { @@ -289,7 +595,40 @@ "id": "e6361576", "metadata": {}, "outputs": [], - "source": "def objective(trial):\n params = {\n 'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n 'max_depth': trial.suggest_int('max_depth', 3, 10),\n 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n 'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n 'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n 'gamma': trial.suggest_float('gamma', 0, 5),\n 'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n 'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n 'objective': 'multi:softmax',\n 'num_class': 3,\n 'random_state': RANDOM_STATE,\n 'tree_method': XGB_TREE_METHOD,\n 'device': XGB_DEVICE,\n 'verbosity': 0\n }\n pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(**params))\n ])\n pipeline.fit(X_train, y_train_enc)\n y_pred = pipeline.predict(X_val)\n score = f1_score(y_val_enc, y_pred, average='macro')\n return score\n\nprint('Starting Optuna hyperparameter optimisation (30 trials)...')\nstudy = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\nstudy.optimize(objective, n_trials=30, show_progress_bar=False)\n\nprint(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')" + "source": [ + "def objective(trial):\n", + " params = {\n", + " 'n_estimators': trial.suggest_int('n_estimators', 100, 500),\n", + " 'max_depth': trial.suggest_int('max_depth', 3, 10),\n", + " 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),\n", + " 'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n", + " 'subsample': trial.suggest_float('subsample', 0.5, 1.0),\n", + " 'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),\n", + " 'gamma': trial.suggest_float('gamma', 0, 5),\n", + " 'reg_alpha': trial.suggest_float('reg_alpha', 1e-4, 10.0, log=True),\n", + " 'reg_lambda': trial.suggest_float('reg_lambda', 1e-4, 10.0, log=True),\n", + " 'objective': 'multi:softmax',\n", + " 'num_class': 3,\n", + " 'random_state': RANDOM_STATE,\n", + " 'tree_method': XGB_TREE_METHOD,\n", + " 'device': XGB_DEVICE,\n", + " 'verbosity': 0\n", + " }\n", + " pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', xgb.XGBClassifier(**params))\n", + " ])\n", + " pipeline.fit(X_train, y_train_enc)\n", + " y_pred = pipeline.predict(X_val)\n", + " score = f1_score(y_val_enc, y_pred, average='macro')\n", + " return score\n", + "\n", + "print('Starting Optuna hyperparameter optimisation (30 trials)...')\n", + "study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=RANDOM_STATE))\n", + "study.optimize(objective, n_trials=30, show_progress_bar=False)\n", + "\n", + "print(f'Best trial: {study.best_trial.number} | Best macro-F1: {study.best_value:.4f}')" + ] }, { "cell_type": "code", @@ -298,7 +637,22 @@ "metadata": {}, "outputs": [], "source": [ - "print('\\n=== BEST HYPERPARAMETERS ===')\nbest_params = study.best_params\nfor k, v in best_params.items():\n print(f' {k}: {v}')\n\nfig = optuna.visualization.matplotlib.plot_optimization_history(study)\nplt.title('Optuna Optimization History')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\nplt.show()\n\nfig = optuna.visualization.matplotlib.plot_param_importances(study)\nplt.title('Hyperparameter Importance')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\nplt.show()" + "print('\\n=== BEST HYPERPARAMETERS ===')\n", + "best_params = study.best_params\n", + "for k, v in best_params.items():\n", + " print(f' {k}: {v}')\n", + "\n", + "fig = optuna.visualization.matplotlib.plot_optimization_history(study)\n", + "plt.title('Optuna Optimization History')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_optimization_history.png'), dpi=150)\n", + "plt.show()\n", + "\n", + "fig = optuna.visualization.matplotlib.plot_param_importances(study)\n", + "plt.title('Hyperparameter Importance')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'optuna_param_importance.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -307,7 +661,37 @@ "id": "640263ea", "metadata": {}, "outputs": [], - "source": "best_xgb_params = {\n **study.best_params,\n 'objective': 'multi:softmax',\n 'num_class': 3,\n 'random_state': RANDOM_STATE,\n 'tree_method': XGB_TREE_METHOD,\n 'device': XGB_DEVICE,\n 'verbosity': 0\n}\n\nprint('Training tuned XGBoost...')\nimport time\nstart = time.time()\ntuned_xgb_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ntuned_xgb_pipeline.fit(X_train, y_train_enc)\ntuned_time = time.time() - start\n\ntuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\ntuned_results['train_time'] = tuned_time\n\nprint('\\n=== TUNED XGBOOST RESULTS ===')\nfor k, v in tuned_results.items():\n if k != 'model':\n print(f'{k}: {v:.4f}')\n\nprint(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')" + "source": [ + "best_xgb_params = {\n", + " **study.best_params,\n", + " 'objective': 'multi:softmax',\n", + " 'num_class': 3,\n", + " 'random_state': RANDOM_STATE,\n", + " 'tree_method': XGB_TREE_METHOD,\n", + " 'device': XGB_DEVICE,\n", + " 'verbosity': 0\n", + "}\n", + "\n", + "print('Training tuned XGBoost...')\n", + "import time\n", + "start = time.time()\n", + "tuned_xgb_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', xgb.XGBClassifier(**best_xgb_params))\n", + "])\n", + "tuned_xgb_pipeline.fit(X_train, y_train_enc)\n", + "tuned_time = time.time() - start\n", + "\n", + "tuned_results = evaluate_model(tuned_xgb_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'XGBoost_Tuned')\n", + "tuned_results['train_time'] = tuned_time\n", + "\n", + "print('\\n=== TUNED XGBOOST RESULTS ===')\n", + "for k, v in tuned_results.items():\n", + " if k != 'model':\n", + " print(f'{k}: {v:.4f}')\n", + "\n", + "print(f'\\nTuning improvement (macro-F1): +{tuned_results[\"val_f1_macro\"] - xgb_results[\"val_f1_macro\"]:.4f}')" + ] }, { "cell_type": "code", @@ -316,7 +700,11 @@ "metadata": {}, "outputs": [], "source": [ - "all_results.append(tuned_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== BEFORE VS AFTER TUNING ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))" + "all_results.append(tuned_results)\n", + "results_df = pd.DataFrame(all_results)\n", + "\n", + "print('\\n=== BEFORE VS AFTER TUNING ===')\n", + "print(results_df[['model', 'val_f1_macro', 'val_accuracy', 'train_time']].round(4).to_string(index=False))" ] }, { @@ -339,7 +727,47 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n\nMISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n 'prior_debt_products', 'account_tenure']\n\nfor col in MISSING_COLS:\n missing_col_name = f'{col}_missing'\n train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n val_df_clean[missing_col_name] = val_df_clean[col].isnull().astype(int)\n test_df_clean[missing_col_name] = test_df_clean[col].isnull().astype(int)\n print(f'Added missing indicator: {missing_col_name}')\n\nfeature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\nprint(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n\nX_train_A = train_df_clean[feature_cols_catA]\nX_val_A = val_df_clean[feature_cols_catA]\nX_test_A = test_df_clean[feature_cols_catA]\n\nNUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\nCATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n\npreprocessor_A = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES_A),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n ],\n remainder='drop'\n)\n\ncatA_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor_A),\n ('classifier', xgb.XGBClassifier(**best_xgb_params))\n])\ncatA_pipeline.fit(X_train_A, y_train_enc)\n\ncatA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n\nprint('\\n=== CATEGORY A RESULTS ===')\nprint(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\nprint(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')" + "print('=== CATEGORY A: IMPROVED MISSING VALUE HANDLING ===')\n", + "\n", + "MISSING_COLS = ['net_monthly_income_gbp', 'avg_payment_delay_days', 'monthly_investment_gbp',\n", + " 'prior_debt_products', 'account_tenure']\n", + "\n", + "for col in MISSING_COLS:\n", + " missing_col_name = f'{col}_missing'\n", + " train_df_clean[missing_col_name] = train_df_clean[col].isnull().astype(int)\n", + " val_df_clean[missing_col_name] = val_df_clean[col].isnull().astype(int)\n", + " test_df_clean[missing_col_name] = test_df_clean[col].isnull().astype(int)\n", + " print(f'Added missing indicator: {missing_col_name}')\n", + "\n", + "feature_cols_catA = feature_cols_all + [f'{c}_missing' for c in MISSING_COLS]\n", + "print(f'\\nFeature columns after adding indicators: {len(feature_cols_catA)}')\n", + "\n", + "X_train_A = train_df_clean[feature_cols_catA]\n", + "X_val_A = val_df_clean[feature_cols_catA]\n", + "X_test_A = test_df_clean[feature_cols_catA]\n", + "\n", + "NUMERIC_FEATURES_A = X_train_A.select_dtypes(include=[np.number]).columns.tolist()\n", + "CATEGORICAL_FEATURES_A = X_train_A.select_dtypes(include=['object']).columns.tolist()\n", + "\n", + "preprocessor_A = ColumnTransformer(\n", + " transformers=[\n", + " ('num', numeric_transformer, NUMERIC_FEATURES_A),\n", + " ('cat', categorical_transformer, CATEGORICAL_FEATURES_A)\n", + " ],\n", + " remainder='drop'\n", + ")\n", + "\n", + "catA_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor_A),\n", + " ('classifier', xgb.XGBClassifier(**best_xgb_params))\n", + "])\n", + "catA_pipeline.fit(X_train_A, y_train_enc)\n", + "\n", + "catA_results = evaluate_model(catA_pipeline, X_train_A, y_train_enc, X_val_A, y_val_enc, le_target, 'XGB_CatA_MissingHandling')\n", + "\n", + "print('\\n=== CATEGORY A RESULTS ===')\n", + "print(f'val_f1_macro: {catA_results[\"val_f1_macro\"]:.4f}')\n", + "print(f'val_accuracy: {catA_results[\"val_accuracy\"]:.4f}')" ] }, { @@ -349,7 +777,31 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\nprint('Training Soft Voting Ensemble (RF + XGBoost)...')\n\nrf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\nxgb_clf = xgb.XGBClassifier(**best_xgb_params)\n\nvoting_clf = VotingClassifier(\n estimators=[\n ('rf', rf_clf),\n ('xgb', xgb_clf)\n ],\n voting='soft',\n n_jobs=-1\n)\n\nensemble_pipeline = Pipeline(steps=[\n ('preprocessor', preprocessor),\n ('classifier', voting_clf)\n])\nensemble_pipeline.fit(X_train, y_train_enc)\n\nensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n\nprint(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\nprint(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')" + "print('=== CATEGORY D: SOFT VOTING ENSEMBLE ===')\n", + "print('Training Soft Voting Ensemble (RF + XGBoost)...')\n", + "\n", + "rf_clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=RANDOM_STATE, n_jobs=-1)\n", + "xgb_clf = xgb.XGBClassifier(**best_xgb_params)\n", + "\n", + "voting_clf = VotingClassifier(\n", + " estimators=[\n", + " ('rf', rf_clf),\n", + " ('xgb', xgb_clf)\n", + " ],\n", + " voting='soft',\n", + " n_jobs=-1\n", + ")\n", + "\n", + "ensemble_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('classifier', voting_clf)\n", + "])\n", + "ensemble_pipeline.fit(X_train, y_train_enc)\n", + "\n", + "ensemble_results = evaluate_model(ensemble_pipeline, X_train, y_train_enc, X_val, y_val_enc, le_target, 'Ensemble_SoftVoting')\n", + "\n", + "print(f'Ensemble val_f1_macro: {ensemble_results[\"val_f1_macro\"]:.4f}')\n", + "print(f'Ensemble val_accuracy: {ensemble_results[\"val_accuracy\"]:.4f}')" ] }, { @@ -359,7 +811,20 @@ "metadata": {}, "outputs": [], "source": [ - "all_results.append(catA_results)\nall_results.append(ensemble_results)\nresults_df = pd.DataFrame(all_results)\n\nprint('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\nprint(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n\nresults_df.to_csv(\n os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n\nimprove_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\nimprove_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\nprint(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\nprint(f'Category D improvement (vs Tuned): +{improve_D:.4f}')" + "all_results.append(catA_results)\n", + "all_results.append(ensemble_results)\n", + "results_df = pd.DataFrame(all_results)\n", + "\n", + "print('\\n=== PERSONALISED IMPROVEMENT SUMMARY ===')\n", + "print(results_df[['model', 'val_f1_macro', 'val_accuracy']].round(4).to_string(index=False))\n", + "\n", + "results_df.to_csv(\n", + " os.path.join(OUTPUT_DIR, 'tables', 'personalised_improvement_summary.csv'), index=False)\n", + "\n", + "improve_A = catA_results['val_f1_macro'] - tuned_results['val_f1_macro']\n", + "improve_D = ensemble_results['val_f1_macro'] - tuned_results['val_f1_macro']\n", + "print(f'\\nCategory A improvement (vs Tuned): +{improve_A:.4f}')\n", + "print(f'Category D improvement (vs Tuned): +{improve_D:.4f}')" ] }, { @@ -377,7 +842,58 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== K-MEANS & GMM CLUSTERING ===')\n\npreprocessor_eval = ColumnTransformer(\n transformers=[\n ('num', numeric_transformer, NUMERIC_FEATURES),\n ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n ],\n remainder='drop'\n)\n\nX_train_scaled = preprocessor_eval.fit_transform(X_train)\nprint(f'Scaled training data shape: {X_train_scaled.shape}')\n\npca = PCA(n_components=2, random_state=RANDOM_STATE)\nX_train_pca = pca.fit_transform(X_train_scaled)\nprint(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n\nk_range = range(2, 9)\nkmeans_results = []\ngmm_results = []\n\nfor k in k_range:\n print(f' Running k={k}...')\n \n km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n km_labels = km.fit_predict(X_train_scaled)\n sil_km = silhouette_score(X_train_scaled, km_labels)\n \n gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n gmm_labels = gmm_model.fit_predict(X_train_scaled)\n sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n \n kmeans_results.append({\n 'k': k,\n 'inertia': km.inertia_,\n 'silhouette_x': sil_km\n })\n gmm_results.append({\n 'k': k,\n 'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n 'bic': gmm_model.bic(X_train_scaled),\n 'aic': gmm_model.aic(X_train_scaled),\n 'silhouette_y': sil_gmm\n })\n\nkm_df = pd.DataFrame(kmeans_results)\ngmm_df = pd.DataFrame(gmm_results)\ncluster_df = km_df.merge(gmm_df, on='k')\nprint('\\n=== CLUSTERING COMPARISON ===')\nprint(cluster_df.round(4).to_string(index=False))\n\ncluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)" + "print('=== K-MEANS & GMM CLUSTERING ===')\n", + "\n", + "preprocessor_eval = ColumnTransformer(\n", + " transformers=[\n", + " ('num', numeric_transformer, NUMERIC_FEATURES),\n", + " ('cat', categorical_transformer, CATEGORICAL_FEATURES)\n", + " ],\n", + " remainder='drop'\n", + ")\n", + "\n", + "X_train_scaled = preprocessor_eval.fit_transform(X_train)\n", + "print(f'Scaled training data shape: {X_train_scaled.shape}')\n", + "\n", + "pca = PCA(n_components=2, random_state=RANDOM_STATE)\n", + "X_train_pca = pca.fit_transform(X_train_scaled)\n", + "print(f'PCA explained variance: {pca.explained_variance_ratio_.sum():.4f}')\n", + "\n", + "k_range = range(2, 9)\n", + "kmeans_results = []\n", + "gmm_results = []\n", + "\n", + "for k in k_range:\n", + " print(f' Running k={k}...')\n", + " \n", + " km = KMeans(n_clusters=k, random_state=RANDOM_STATE, n_init=10)\n", + " km_labels = km.fit_predict(X_train_scaled)\n", + " sil_km = silhouette_score(X_train_scaled, km_labels)\n", + " \n", + " gmm_model = GaussianMixture(n_components=k, random_state=RANDOM_STATE, n_init=5)\n", + " gmm_labels = gmm_model.fit_predict(X_train_scaled)\n", + " sil_gmm = silhouette_score(X_train_scaled, gmm_labels)\n", + " \n", + " kmeans_results.append({\n", + " 'k': k,\n", + " 'inertia': km.inertia_,\n", + " 'silhouette_x': sil_km\n", + " })\n", + " gmm_results.append({\n", + " 'k': k,\n", + " 'log_likelihood': gmm_model.score(X_train_scaled) * X_train_scaled.shape[0],\n", + " 'bic': gmm_model.bic(X_train_scaled),\n", + " 'aic': gmm_model.aic(X_train_scaled),\n", + " 'silhouette_y': sil_gmm\n", + " })\n", + "\n", + "km_df = pd.DataFrame(kmeans_results)\n", + "gmm_df = pd.DataFrame(gmm_results)\n", + "cluster_df = km_df.merge(gmm_df, on='k')\n", + "print('\\n=== CLUSTERING COMPARISON ===')\n", + "print(cluster_df.round(4).to_string(index=False))\n", + "\n", + "cluster_df.to_csv(os.path.join(OUTPUT_DIR, 'tables', 'clustering_comparison.csv'), index=False)" ] }, { @@ -387,7 +903,33 @@ "metadata": {}, "outputs": [], "source": [ - "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n\naxes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\naxes[0].set_xlabel('k')\naxes[0].set_ylabel('Inertia')\naxes[0].set_title('K-Means: Elbow Method')\naxes[0].grid(True)\n\naxes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\naxes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\naxes[1].set_xlabel('k')\naxes[1].set_ylabel('Score')\naxes[1].set_title('GMM: BIC & AIC (lower is better)')\naxes[1].legend()\naxes[1].grid(True)\n\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\naxes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\naxes[2].set_xlabel('k')\naxes[2].set_ylabel('Silhouette Score')\naxes[2].set_title('Silhouette Score Comparison (higher is better)')\naxes[2].legend()\naxes[2].grid(True)\n\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\nplt.show()" + "fig, axes = plt.subplots(1, 3, figsize=(15, 4))\n", + "\n", + "axes[0].plot(cluster_df['k'], cluster_df['inertia'], 'bo-', label='K-Means Inertia', linewidth=2)\n", + "axes[0].set_xlabel('k')\n", + "axes[0].set_ylabel('Inertia')\n", + "axes[0].set_title('K-Means: Elbow Method')\n", + "axes[0].grid(True)\n", + "\n", + "axes[1].plot(cluster_df['k'], cluster_df['bic'], 'g^-', label='BIC', linewidth=2)\n", + "axes[1].plot(cluster_df['k'], cluster_df['aic'], 'rs--', label='AIC', linewidth=2)\n", + "axes[1].set_xlabel('k')\n", + "axes[1].set_ylabel('Score')\n", + "axes[1].set_title('GMM: BIC & AIC (lower is better)')\n", + "axes[1].legend()\n", + "axes[1].grid(True)\n", + "\n", + "axes[2].plot(cluster_df['k'], cluster_df['silhouette_x'], 'bo-', label='K-Means', linewidth=2)\n", + "axes[2].plot(cluster_df['k'], cluster_df['silhouette_y'], 'g^-', label='GMM', linewidth=2)\n", + "axes[2].set_xlabel('k')\n", + "axes[2].set_ylabel('Silhouette Score')\n", + "axes[2].set_title('Silhouette Score Comparison (higher is better)')\n", + "axes[2].legend()\n", + "axes[2].grid(True)\n", + "\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_comparison.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -397,7 +939,22 @@ "metadata": {}, "outputs": [], "source": [ - "best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\nprint(f'Best K for K-Means (by silhouette): {best_k}')\n\nkm_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\nkm_best_labels = km_best.fit_predict(X_train_scaled)\n\nfig, ax = plt.subplots(figsize=(8, 6))\nscatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\nax.set_xlabel('PC1')\nax.set_ylabel('PC2')\nax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\nplt.colorbar(scatter, ax=ax, label='Cluster')\nplt.tight_layout()\nplt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\nplt.show()" + "best_k = cluster_df.loc[cluster_df['silhouette_x'].idxmax(), 'k']\n", + "print(f'Best K for K-Means (by silhouette): {best_k}')\n", + "\n", + "km_best = KMeans(n_clusters=int(best_k), random_state=RANDOM_STATE, n_init=10)\n", + "km_best_labels = km_best.fit_predict(X_train_scaled)\n", + "\n", + "fig, ax = plt.subplots(figsize=(8, 6))\n", + "scatter = ax.scatter(X_train_pca[:, 0], X_train_pca[:, 1],\n", + " c=km_best_labels, cmap='viridis', alpha=0.5, s=10)\n", + "ax.set_xlabel('PC1')\n", + "ax.set_ylabel('PC2')\n", + "ax.set_title(f'K-Means Clustering (k={best_k}) - PCA Visualization')\n", + "plt.colorbar(scatter, ax=ax, label='Cluster')\n", + "plt.tight_layout()\n", + "plt.savefig(os.path.join(OUTPUT_DIR, 'figures', 'clustering_visualization.png'), dpi=150)\n", + "plt.show()" ] }, { @@ -415,7 +972,28 @@ "metadata": {}, "outputs": [], "source": [ - "print('=== FINAL MODEL SELECTION ===')\nprint('Based on val_f1_macro (primary metric):')\nfinal_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\nprint(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n final_pipeline = catA_pipeline\n X_test_final = X_test_A\nelif final_model_name == 'Ensemble_SoftVoting':\n final_pipeline = ensemble_pipeline\n X_test_final = X_test\nelse:\n final_pipeline = tuned_xgb_pipeline\n X_test_final = X_test\n\ny_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\ny_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n\nplot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n y_val_enc, le_target,\n f'Final Model: {final_model_name} - Confusion Matrix',\n os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))" + "print('=== FINAL MODEL SELECTION ===')\n", + "print('Based on val_f1_macro (primary metric):')\n", + "final_model_name = results_df.loc[results_df['val_f1_macro'].idxmax(), 'model']\n", + "print(f'Selected model: {final_model_name} (val_f1_macro = {results_df[\"val_f1_macro\"].max():.4f})')\n", + "\n", + "if final_model_name == 'XGB_CatA_MissingHandling':\n", + " final_pipeline = catA_pipeline\n", + " X_test_final = X_test_A\n", + "elif final_model_name == 'Ensemble_SoftVoting':\n", + " final_pipeline = ensemble_pipeline\n", + " X_test_final = X_test\n", + "else:\n", + " final_pipeline = tuned_xgb_pipeline\n", + " X_test_final = X_test\n", + "\n", + "y_val_final_pred = final_pipeline.predict(X_test_final if final_model_name == 'XGBoost_Tuned' else X_test)\n", + "y_val_final_decoded = le_target.inverse_transform(y_val_final_pred)\n", + "\n", + "plot_confusion_matrix(final_pipeline, X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val,\n", + " y_val_enc, le_target,\n", + " f'Final Model: {final_model_name} - Confusion Matrix',\n", + " os.path.join(OUTPUT_DIR, 'figures', 'final_model_confusion_matrix.png'))" ] }, { @@ -425,7 +1003,9 @@ "metadata": {}, "outputs": [], "source": [ - "print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\ny_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\nprint(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))" + "print('\\n=== FINAL CLASSIFICATION REPORT (VAL) ===')\n", + "y_val_pred_final = final_pipeline.predict(X_val_A if final_model_name == 'XGB_CatA_MissingHandling' else X_val)\n", + "print(classification_report(y_val_enc, y_val_pred_final, target_names=le_target.classes_))" ] }, { @@ -435,7 +1015,34 @@ "metadata": {}, "outputs": [], "source": [ - "STUDENT_ID = '1234560'\n\nif final_model_name == 'XGB_CatA_MissingHandling':\n y_test_pred = final_pipeline.predict(X_test_A)\nelif final_model_name == 'Ensemble_SoftVoting':\n y_test_pred = final_pipeline.predict(X_test)\nelse:\n y_test_pred = final_pipeline.predict(X_test)\n\ny_test_labels = le_target.inverse_transform(y_test_pred)\n\nsubmission_df = pd.DataFrame({\n 'applicant_id': test_df['applicant_id'],\n 'customer_key': test_df['customer_key'],\n 'premium_risk': y_test_labels\n})\n\nprint('=== SUBMISSION CSV VALIDATION ===')\nprint(f'Shape: {submission_df.shape}')\nprint(f'Columns: {list(submission_df.columns)}')\nprint(submission_df.head())\n\nprint('\\nPrediction counts:')\nprint(submission_df['premium_risk'].value_counts())\n\ncsv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\nsubmission_df.to_csv(csv_path, index=False)\nprint(f'\\n*** CSV saved to: {csv_path} ***')" + "STUDENT_ID = '1234560'\n", + "\n", + "if final_model_name == 'XGB_CatA_MissingHandling':\n", + " y_test_pred = final_pipeline.predict(X_test_A)\n", + "elif final_model_name == 'Ensemble_SoftVoting':\n", + " y_test_pred = final_pipeline.predict(X_test)\n", + "else:\n", + " y_test_pred = final_pipeline.predict(X_test)\n", + "\n", + "y_test_labels = le_target.inverse_transform(y_test_pred)\n", + "\n", + "submission_df = pd.DataFrame({\n", + " 'applicant_id': test_df['applicant_id'],\n", + " 'customer_key': test_df['customer_key'],\n", + " 'premium_risk': y_test_labels\n", + "})\n", + "\n", + "print('=== SUBMISSION CSV VALIDATION ===')\n", + "print(f'Shape: {submission_df.shape}')\n", + "print(f'Columns: {list(submission_df.columns)}')\n", + "print(submission_df.head())\n", + "\n", + "print('\\nPrediction counts:')\n", + "print(submission_df['premium_risk'].value_counts())\n", + "\n", + "csv_path = os.path.join(OUTPUT_DIR, 'predictions', f'test_result_{STUDENT_ID}.csv')\n", + "submission_df.to_csv(csv_path, index=False)\n", + "print(f'\\n*** CSV saved to: {csv_path} ***')" ] } ], @@ -452,4 +1059,4 @@ }, "nbformat": 4, "nbformat_minor": 5 -} \ No newline at end of file +} diff --git a/强化学习个人课程作业报告/outputs/figures/baseline_confusion_matrix.png b/强化学习个人课程作业报告/outputs/figures/baseline_confusion_matrix.png index ecc055f..ed93551 100644 Binary files a/强化学习个人课程作业报告/outputs/figures/baseline_confusion_matrix.png and b/强化学习个人课程作业报告/outputs/figures/baseline_confusion_matrix.png differ diff --git a/强化学习个人课程作业报告/outputs/figures/bureau_risk_boxplot.png b/强化学习个人课程作业报告/outputs/figures/bureau_risk_boxplot.png index d3c0c67..2dc28ad 100644 Binary files a/强化学习个人课程作业报告/outputs/figures/bureau_risk_boxplot.png and b/强化学习个人课程作业报告/outputs/figures/bureau_risk_boxplot.png differ diff --git a/强化学习个人课程作业报告/outputs/figures/missing_values.png b/强化学习个人课程作业报告/outputs/figures/missing_values.png index 9489b75..6f1b218 100644 Binary files a/强化学习个人课程作业报告/outputs/figures/missing_values.png and b/强化学习个人课程作业报告/outputs/figures/missing_values.png differ diff --git a/强化学习个人课程作业报告/outputs/figures/target_distribution.png b/强化学习个人课程作业报告/outputs/figures/target_distribution.png index baae2d7..ebb241e 100644 Binary files a/强化学习个人课程作业报告/outputs/figures/target_distribution.png and b/强化学习个人课程作业报告/outputs/figures/target_distribution.png differ diff --git a/强化学习个人课程作业报告/outputs/tables/model_comparison_summary.csv b/强化学习个人课程作业报告/outputs/tables/model_comparison_summary.csv index eff42f7..a1d7d6d 100644 --- a/强化学习个人课程作业报告/outputs/tables/model_comparison_summary.csv +++ b/强化学习个人课程作业报告/outputs/tables/model_comparison_summary.csv @@ -1,5 +1,2 @@ -model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard,train_time -Baseline_LR,0.7593680672268908,0.7341714285714286,0.7492574544185482,0.7237629331592531,0.7665209565440987,0.6489501312335958,0.7558177117000646, -RandomForest,1.0,0.7877333333333333,1.0,0.770789728543472,0.7874554916461244,0.7095334685598377,0.8153802254244543,57.91048526763916 -XGBoost,0.8519529411764706,0.8371047619047619,0.8297116592669606,0.8143842728003406,0.8904623073719283,0.6944039941751612,0.8582865168539325,67.63970804214478 -XGBoost_Tuned,0.9767663865546219,0.8700190476190476,0.9739400525375727,0.8519502714571496,0.9084439578486383,0.7620280474649407,0.8853788090578697,142.65462470054626 +model,train_accuracy,val_accuracy,train_f1_macro,val_f1_macro,val_f1_High,val_f1_Low,val_f1_Standard +Baseline_LR,0.7595294117647059,0.7337904761904762,0.7493991157707756,0.7234383324236036,0.7663239074550129,0.6487372909150542,0.7552537989007436 diff --git a/强化学习个人课程作业报告/run_notebook.py b/强化学习个人课程作业报告/run_notebook.py index 1f249bb..321f413 100644 --- a/强化学习个人课程作业报告/run_notebook.py +++ b/强化学习个人课程作业报告/run_notebook.py @@ -1,16 +1,19 @@ -""" -运行 insurance_premium_risk.ipynb 的脚本 -将 notebook 代码单元格提取出来逐个执行 -""" -import json, sys, os, warnings, traceback, time +import warnings -warnings.filterwarnings('ignore') +warnings.filterwarnings("ignore") import matplotlib -matplotlib.use('Agg') + +matplotlib.use("Agg") import matplotlib.pyplot as _real_mpl_plt + _real_mpl_plt.show = lambda *a, **kw: None +import os +import sys +import time +import json +import traceback import numpy as np import pandas as pd import matplotlib.pyplot as plt @@ -32,34 +35,18 @@ import xgboost as xgb import optuna optuna.logging.set_verbosity(optuna.logging.WARNING) -RANDOM_STATE = 42 -np.random.seed(RANDOM_STATE) -plt.rcParams['figure.figsize'] = (10, 6) -plt.rcParams['font.size'] = 12 -sns.set_style('whitegrid') +from src.notebook_runner import execute_notebook +from src.runtime_paths import build_paths -# ===== 读取 notebook ===== -nb_path = r'd:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb' -cells = json.load(open(nb_path, encoding='utf-8'))['cells'] -code_cells = [c for c in cells if c['cell_type'] == 'code'] -print(f"Total code cells: {len(code_cells)}") +paths = build_paths() +print(f"Project root : {paths.project_root}") +print(f"Notebook : {paths.notebook}") +print(f"Data dir : {paths.data_dir}") +print(f"Output dir : {paths.output_dir}") -# ===== 执行每个单元格 ===== -# 使用全局 __main__ 命名空间,变量跨单元格持久化 -main_ns = globals().copy() +ns = vars() -for i, cell in enumerate(code_cells): - src = ''.join(cell['source']) - print(f"\n{'='*60}") - print(f"Running cell {i+1}/{len(code_cells)}...") - print(f" Source: {src[:80].replace(chr(10), ' ')}") - try: - exec(compile(src, f'cell_{i+1}', 'exec'), main_ns) - except Exception as e: - print(f"ERROR in cell {i+1}: {e}") - traceback.print_exc() - print("Stopping execution.") - break - -print("\n\nAll cells executed successfully!") -print(f"Results saved to: outputs/figures/ and outputs/tables/") +result = execute_notebook(namespace=ns) +print(f"\nExecution finished: {result['status']}") +print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}") +print(f"Output dir: {result['outputs']['output_dir']}") \ No newline at end of file diff --git a/强化学习个人课程作业报告/src/__init__.py b/强化学习个人课程作业报告/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/强化学习个人课程作业报告/src/notebook_runner.py b/强化学习个人课程作业报告/src/notebook_runner.py new file mode 100644 index 0000000..c0f2faa --- /dev/null +++ b/强化学习个人课程作业报告/src/notebook_runner.py @@ -0,0 +1,55 @@ +import json +import traceback +from pathlib import Path + +from .runtime_paths import build_paths + + +def execute_notebook( + start_at: int | None = None, + stop_at: int | None = None, + namespace: dict | None = None, +) -> dict: + paths = build_paths() + paths.ensure_outputs() + + nb_data = json.loads(paths.notebook.read_text(encoding="utf-8")) + code_cells = [c for c in nb_data["cells"] if c["cell_type"] == "code"] + if not code_cells: + return {"status": "skipped", "reason": "no code cells found"} + + ns = (namespace or {}).copy() + ns.update(paths.as_injection()) + ns["RANDOM_STATE"] = 42 + + start = max((start_at or 1) - 1, 0) + stop = stop_at if stop_at is not None else len(code_cells) + cells_to_run = code_cells[start:stop] + + results = [] + for i, cell in enumerate(cells_to_run, start=start + 1): + src = "".join(cell["source"]) + tag = f"cell_{i}" + try: + exec(compile(src, tag, "exec"), ns) + results.append({"cell": i, "status": "ok"}) + except Exception as exc: + results.append({"cell": i, "status": "error", "error": str(exc)}) + traceback.print_exc() + print(f"Stopping at cell {i} due to error.") + break + + results_summary = { + "status": "completed", + "total": len(cells_to_run), + "cells": results, + "outputs": { + "data_dir": str(paths.data_dir), + "output_dir": str(paths.output_dir), + }, + } + return results_summary + + +if __name__ == "__main__": + execute_notebook() \ No newline at end of file diff --git a/强化学习个人课程作业报告/src/run_remaining.py b/强化学习个人课程作业报告/src/run_remaining.py index 661bb22..db440ce 100644 --- a/强化学习个人课程作业报告/src/run_remaining.py +++ b/强化学习个人课程作业报告/src/run_remaining.py @@ -1,32 +1,52 @@ -""" -Part 2: 运行完整的 notebook cells 1-35 -解决中文路径编码问题 -""" -import warnings, time, os, sys, json, traceback -warnings.filterwarnings('ignore') +import warnings + +warnings.filterwarnings("ignore") + import matplotlib -matplotlib.use('Agg') -import matplotlib.pyplot as _p -_p.show = lambda *a, **kw: None -nb = r'D:\Code\doing_exercises\programs\外教作业外快\强化学习个人课程作业报告\notebooks\insurance_premium_risk.ipynb' -cells = json.load(open(nb, encoding='utf-8'))['cells'] -code_cells = [c for c in cells if c['cell_type'] == 'code'] -print(f"Total code cells: {len(code_cells)}") +matplotlib.use("Agg") +import matplotlib.pyplot as _real_mpl_plt -main_ns = globals().copy() -main_ns['RANDOM_STATE'] = 42 +_real_mpl_plt.show = lambda *a, **kw: None -for i, cell in enumerate(code_cells, start=1): - src = ''.join(cell['source']) - print(f"\n{'='*60}") - print(f"Running cell {i}/{len(code_cells)}...") - try: - exec(compile(src, f'cell_{i}', 'exec'), main_ns) - except Exception as e: - print(f"ERROR cell {i}: {e}") - traceback.print_exc() - print("Stopping.") - break +import os +import sys +import time +import json +import traceback +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, ConfusionMatrixDisplay +from sklearn.model_selection import cross_val_score +from sklearn.preprocessing import StandardScaler, LabelEncoder +from sklearn.linear_model import LogisticRegression +from sklearn.ensemble import RandomForestClassifier, VotingClassifier +from sklearn.pipeline import Pipeline +from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import OneHotEncoder +from sklearn.impute import SimpleImputer +from sklearn.cluster import KMeans +from sklearn.mixture import GaussianMixture +from sklearn.metrics import silhouette_score +from sklearn.decomposition import PCA +import xgboost as xgb +import optuna +optuna.logging.set_verbosity(optuna.logging.WARNING) -print("\n\nAll cells executed!") +from src.notebook_runner import execute_notebook +from src.runtime_paths import build_paths + +paths = build_paths() +print(f"Project root : {paths.project_root}") +print(f"Notebook : {paths.notebook}") +print(f"Data dir : {paths.data_dir}") +print(f"Output dir : {paths.output_dir}") + +ns = vars() + +result = execute_notebook(start_at=1, namespace=ns) +print(f"\nExecution finished: {result['status']}") +print(f"Cells run: {len([c for c in result['cells'] if c['status'] == 'ok'])}/{result['total']}") +print(f"Output dir: {result['outputs']['output_dir']}") \ No newline at end of file diff --git a/强化学习个人课程作业报告/src/runtime_paths.py b/强化学习个人课程作业报告/src/runtime_paths.py new file mode 100644 index 0000000..d3ed533 --- /dev/null +++ b/强化学习个人课程作业报告/src/runtime_paths.py @@ -0,0 +1,31 @@ +from dataclasses import dataclass +from pathlib import Path + + +@dataclass(frozen=True) +class RuntimePaths: + project_root: Path + notebook: Path + data_dir: Path + output_dir: Path + + def ensure_outputs(self) -> None: + (self.output_dir / "figures").mkdir(parents=True, exist_ok=True) + (self.output_dir / "tables").mkdir(parents=True, exist_ok=True) + (self.output_dir / "predictions").mkdir(parents=True, exist_ok=True) + + def as_injection(self) -> dict: + return { + "DATA_DIR": str(self.data_dir), + "OUTPUT_DIR": str(self.output_dir), + } + + +def build_paths() -> RuntimePaths: + root = Path(__file__).resolve().parents[1] + return RuntimePaths( + project_root=root, + notebook=root / "notebooks" / "insurance_premium_risk.ipynb", + data_dir=root / "dataset_final", + output_dir=root / "outputs", + ) diff --git a/强化学习个人项目报告/README.md b/强化学习个人项目报告/README.md new file mode 100644 index 0000000..21c359f --- /dev/null +++ b/强化学习个人项目报告/README.md @@ -0,0 +1,57 @@ +# PPO for CarRacing-v3 + +From-scratch PPO implementation for CarRacing-v3. No Stable-Baselines or other RL libraries used. + +## Setup + +```bash +conda activate my_env +uv pip install -r requirements.txt +``` + +## Train + +```bash +python train.py --steps 500000 +``` + +## Evaluate + +```bash +python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10 +``` + +## TensorBoard + +```bash +tensorboard --logdir logs/tensorboard +``` + +## Project Structure + +``` +src/ +├── network.py # Actor (Gaussian policy) and Critic (Value) networks +├── replay_buffer.py # Rollout buffer with GAE computation +├── trainer.py # PPO update with clipped surrogate objective +├── utils.py # Environment wrappers (grayscale, resize, frame stack) +└── evaluate.py # Evaluation script +train.py # Main training entry point +models/ # Saved checkpoints +logs/tensorboard/ # TensorBoard logs +``` + +## Hyperparameters + +| Parameter | Value | +|-----------|-------| +| Learning rate | 3e-4 | +| Gamma | 0.99 | +| GAE lambda | 0.95 | +| Clip epsilon | 0.2 | +| PPO epochs | 4 | +| Mini-batch size | 64 | +| Rollout steps | 2048 | +| Entropy coefficient | 0.01 | +| Value coefficient | 0.5 | +| Max gradient norm | 0.5 | \ No newline at end of file diff --git a/强化学习个人项目报告/TASK_PROGRESS.md b/强化学习个人项目报告/TASK_PROGRESS.md new file mode 100644 index 0000000..160836a --- /dev/null +++ b/强化学习个人项目报告/TASK_PROGRESS.md @@ -0,0 +1,136 @@ +# PPO + CarRacing-v3 任务进度追踪 + +> 生成时间:2026/04/30 + +--- + +## 作业要求 + +用 Python 从零实现 PPO 算法,在 CarRacing-v3 环境训练智能体,提交: +- 技术报告(≤3000 词,英文)PDF +- 源代码 + 训练模型 zip 文件 +- 截止:04/May/2026 23:59 +- **禁止使用**:Stable-Baselines 等 RL 专用库 +- **允许使用**:TensorBoard、PyTorch、Gymnasium + +--- + +## 一、已完成 ✅ + +| 步骤 | 内容 | 文件 | +|------|------|------| +| ✅ 项目结构 | src/ 目录、requirements.txt、README.md | [requirements.txt](requirements.txt)、[README.md](README.md) | +| ✅ 策略/价值网络 | Actor(高斯策略输出 μ, σ)+ Critic 实现,CNN 结构 | [src/network.py](src/network.py) | +| ✅ Rollout Buffer | 轨迹存储 + GAE 优势估计 + 返回值计算 | [src/replay_buffer.py](src/replay_buffer.py) | +| ✅ PPO Trainer | PPO 更新(clip 目标函数 + 熵正则 + 价值损失) | [src/trainer.py](src/trainer.py) | +| ✅ 环境预处理 | 灰度化 + Resize(84×84) + 帧堆叠(4帧) Wrapper | [src/utils.py](src/utils.py) | +| ✅ 评估脚本 | 渲染测试 + 多回合平均分数评估 | [src/evaluate.py](src/evaluate.py) | +| ✅ 训练入口 | 主训练循环、TensorBoard 记录、模型保存 | [train.py](train.py) | + +**核心算法实现要点**: +- 策略网络:3 层 CNN + FC(512) → μ, σ(高斯策略,tanh 激活) +- 价值网络:3 层 CNN + FC(512) → V(s) +- GAE:λ=0.95,优势归一化 +- PPO clip:ε=0.2,4 epoch 更新,mini-batch 64 + +--- + +## 二、待完成 ⬜ + +| 步骤 | 内容 | 优先级 | +|------|------|--------| +| ⬜ 安装依赖 | `uv pip install --system -r requirements.txt` | **高** | +| ⬜ 环境测试 | 短时间(~10000步)验证代码能跑通 | **高** | +| ⬜ 完整训练 | 运行 500k+ 步,预计 5-8 小时(后台) | **高(耗时)** | +| ⬜ 生成图表 | 从 TensorBoard 提取数据,用 matplotlib 绘图 | 中 | +| ⬜ 撰写报告 | 英文技术报告(≤3000 词),LaTeX 排版 | 中 | +| ⬜ 编译 PDF | XeLaTeX 编译生成 CW1_1234560.pdf | 中 | +| ⬜ 打包 zip | 源代码 + 模型打包 CW1_1234560.zip | 低 | + +--- + +## 三、文件结构 + +``` +强化学习个人项目报告/ +├── src/ +│ ├── __init__.py +│ ├── network.py # Actor + Critic CNN 网络 +│ ├── replay_buffer.py # Rollout buffer + GAE +│ ├── trainer.py # PPO 更新逻辑 +│ ├── utils.py # 环境预处理 wrappers +│ └── evaluate.py # 评估脚本 +├── train.py # 主训练入口 +├── requirements.txt +├── README.md +└── TASK_PROGRESS.md # 本文档 +``` + +--- + +## 四、超参数配置 + +| 参数 | 值 | +|------|-----| +| Learning rate | 3e-4 | +| Gamma | 0.99 | +| GAE lambda | 0.95 | +| Clip epsilon | 0.2 | +| PPO epochs | 4 | +| Mini-batch size | 64 | +| Rollout steps | 2048 | +| Entropy coefficient | 0.01 | +| Value coefficient | 0.5 | +| Max gradient norm | 0.5 | +| State shape | (84, 84, 4) | +| Action dim | 3(连续:steer, gas, brake) | + +--- + +## 五、下一步行动 + +### 立即执行 +```bash +# 1. 安装依赖 +uv pip install --system -r requirements.txt + +# 2. 验证代码能跑(短测试) +python train.py --steps 10000 + +# 3. 开始正式训练(后台运行,预计 5-8 小时) +python train.py --steps 500000 +``` + +### 训练完成后 +```bash +# TensorBoard 可视化 +tensorboard --logdir logs/tensorboard + +# 评估模型 +python src/evaluate.py --model models/ppo_carracing_final.pt --episodes 10 +``` + +### 报告撰写后 +```bash +# 编译 PDF +cd tex && xelatex CW1_1234560.tex +``` + +--- + +## 六、报告结构(≤3000 词) + +1. **Introduction** — RL 背景、CarRacing-v3 任务、状态/动作/奖励空间定义 +2. **Methodology** — PPO 数学公式、clip 机制、GAE 优势估计 +3. **Implementation Details** — 网络结构、训练流程、超参数、问题与解决 +4. **Results and Analysis** — 训练曲线图、评估结果、与 SB3 基线对比 +5. **Conclusion** — PPO 敏感性、actor-critic 有效性总结 + +--- + +## 七、提交清单 + +- [ ] `CW1_1234560.pdf` — 技术报告(封面 + ≤3000 词) +- [ ] `CW1_1234560.zip` — 源代码 + 训练好的模型 .pt 文件 +- [ ] 所有代码使用英文注释 +- [ ] 图表坐标轴和图例使用英文 diff --git a/强化学习个人项目报告/requirements.txt b/强化学习个人项目报告/requirements.txt new file mode 100644 index 0000000..266af09 --- /dev/null +++ b/强化学习个人项目报告/requirements.txt @@ -0,0 +1,5 @@ +torch +gymnasium[box2d] +numpy +matplotlib +tensorboard \ No newline at end of file diff --git a/强化学习个人项目报告/src/__init__.py b/强化学习个人项目报告/src/__init__.py new file mode 100644 index 0000000..7a8879a --- /dev/null +++ b/强化学习个人项目报告/src/__init__.py @@ -0,0 +1,6 @@ +"""PPO Agent for CarRacing-v3 environment.""" +from .network import Actor, Critic +from .replay_buffer import RolloutBuffer +from .trainer import PPOTrainer + +__all__ = ['Actor', 'Critic', 'RolloutBuffer', 'PPOTrainer'] \ No newline at end of file diff --git a/强化学习个人项目报告/src/evaluate.py b/强化学习个人项目报告/src/evaluate.py new file mode 100644 index 0000000..1f41d10 --- /dev/null +++ b/强化学习个人项目报告/src/evaluate.py @@ -0,0 +1,92 @@ +"""Evaluation script for trained PPO agent.""" +import torch +import numpy as np +import gymnasium as gym +from src.utils import make_env, get_device +from src.network import Actor, Critic + + +def evaluate(actor, env, num_episodes=10, device=torch.device("cpu")): + """Evaluate actor and return average return.""" + actor.eval() + returns = [] + + for ep in range(num_episodes): + obs, _ = env.reset() + obs = np.transpose(obs, (1, 2, 0)) # (C, H, W) -> (H, W, C) for storage + total_reward = 0 + done = False + steps = 0 + + while not done and steps < 1000: + with torch.no_grad(): + # Convert to tensor (B, C, H, W) + obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device) + mu, std = actor(obs_t) + # Sample action + dist = torch.distributions.Normal(mu, std) + action = dist.sample() + action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy() + + obs, reward, terminated, truncated, _ = env.step(action) + # Convert to (C, H, W) format + obs = np.transpose(obs, (1, 2, 0)) + total_reward += reward + done = terminated or truncated + steps += 1 + + returns.append(total_reward) + print(f"Episode {ep+1}/{num_episodes}: return={total_reward:.1f}, steps={steps}") + + actor.train() + return np.mean(returns), np.std(returns) + + +def evaluate_render(actor, env, device): + """Render and evaluate agent with visualization.""" + actor.eval() + obs, _ = env.reset() + obs = np.transpose(obs, (1, 2, 0)) + + env.render_mode = "human" + done = False + total_reward = 0 + + while not done: + with torch.no_grad(): + obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device) + mu, std = actor(obs_t) + dist = torch.distributions.Normal(mu, std) + action = dist.sample() + action = torch.clamp(action, -1, 1).squeeze(0).cpu().numpy() + + obs, reward, terminated, truncated, _ = env.step(action) + obs = np.transpose(obs, (1, 2, 0)) + total_reward += reward + done = terminated or truncated + env.render() + + actor.train() + print(f"Final return: {total_reward:.1f}") + + +if __name__ == "__main__": + import argparse + parser = argparse.ArgumentParser() + parser.add_argument("--model", type=str, required=True, help="Path to trained model") + parser.add_argument("--episodes", type=int, default=5, help="Number of evaluation episodes") + args = parser.parse_args() + + device = get_device() + env = make_env() + + actor = Actor().to(device) + critic = Critic().to(device) + + # Load model + checkpoint = torch.load(args.model, map_location=device, weights_only=False) + actor.load_state_dict(checkpoint["actor"]) + print(f"Loaded model from {args.model}") + + mean_return, std_return = evaluate(actor, env, num_episodes=args.episodes, device=device) + print(f"\nEvaluation: mean={mean_return:.2f}, std={std_return:.2f}") \ No newline at end of file diff --git a/强化学习个人项目报告/src/network.py b/强化学习个人项目报告/src/network.py new file mode 100644 index 0000000..d89a0ac --- /dev/null +++ b/强化学习个人项目报告/src/network.py @@ -0,0 +1,78 @@ +"""Neural network architectures for Actor and Critic.""" +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class Actor(nn.Module): + """Actor network outputting Gaussian policy parameters (mu, sigma).""" + + def __init__(self, state_shape=(84, 84, 4), action_dim=3): + super().__init__() + c, h, w = state_shape[2], state_shape[0], state_shape[1] # channels, height, width + + self.conv = nn.Sequential( + nn.Conv2d(c, 32, kernel_size=8, stride=4), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=4, stride=2), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1), + nn.ReLU(), + ) + + # Calculate feature map size: 84x84 -> 20x20 after conv layers + feat_size = 64 * 20 * 20 + + self.fc = nn.Sequential( + nn.Linear(feat_size, 512), + nn.ReLU(), + ) + self.mu_head = nn.Linear(512, action_dim) + self.log_std_head = nn.Linear(512, action_dim) + + # Initialize output layers + nn.init.orthogonal_(self.mu_head.weight, gain=0.01) + nn.init.orthogonal_(self.log_std_head.weight, gain=0.01) + + def forward(self, x): + """Forward pass returning (mu, log_std).""" + x = x / 255.0 # Normalize + x = self.conv(x) + x = x.view(x.size(0), -1) + x = self.fc(x) + mu = torch.tanh(self.mu_head(x)) + log_std = self.log_std_head(x) + log_std = torch.clamp(log_std, -20, 2) + return mu, log_std.exp() + + +class Critic(nn.Module): + """Critic network estimating state value V(s).""" + + def __init__(self, state_shape=(84, 84, 4)): + super().__init__() + c, h, w = state_shape[2], state_shape[0], state_shape[1] + + self.conv = nn.Sequential( + nn.Conv2d(c, 32, kernel_size=8, stride=4), + nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=4, stride=2), + nn.ReLU(), + nn.Conv2d(64, 64, kernel_size=3, stride=1), + nn.ReLU(), + ) + + feat_size = 64 * 20 * 20 + + self.fc = nn.Sequential( + nn.Linear(feat_size, 512), + nn.ReLU(), + nn.Linear(512, 1) + ) + + def forward(self, x): + """Forward pass returning V(s).""" + x = x / 255.0 + x = self.conv(x) + x = x.view(x.size(0), -1) + return self.fc(x) \ No newline at end of file diff --git a/强化学习个人项目报告/src/replay_buffer.py b/强化学习个人项目报告/src/replay_buffer.py new file mode 100644 index 0000000..8080a3e --- /dev/null +++ b/强化学习个人项目报告/src/replay_buffer.py @@ -0,0 +1,64 @@ +"""Rollout buffer for storing trajectories.""" +import numpy as np + + +class RolloutBuffer: + """Stores trajectories for PPO training.""" + + def __init__(self, buffer_size, state_shape, action_dim): + self.buffer_size = buffer_size + self.ptr = 0 + self.size = 0 + + self.states = np.zeros((buffer_size, *state_shape), dtype=np.uint8) + self.actions = np.zeros((buffer_size, action_dim), dtype=np.float32) + self.rewards = np.zeros(buffer_size, dtype=np.float32) + self.dones = np.zeros(buffer_size, dtype=np.bool_) + self.values = np.zeros(buffer_size, dtype=np.float32) + self.log_probs = np.zeros((buffer_size, action_dim), dtype=np.float32) + + def add(self, state, action, reward, done, value, log_prob): + """Add a transition to the buffer.""" + self.states[self.ptr] = state + self.actions[self.ptr] = action + self.rewards[self.ptr] = reward + self.dones[self.ptr] = done + self.values[self.ptr] = value + self.log_probs[self.ptr] = log_prob + self.ptr = (self.ptr + 1) % self.buffer_size + self.size = min(self.size + 1, self.buffer_size) + + def compute_returns(self, last_value, gamma=0.99, gae_lambda=0.95): + """Compute returns and advantages using GAE.""" + advantages = np.zeros(self.size, dtype=np.float32) + last_gae = 0 + + # Compute GAE backwards + for t in reversed(range(self.size)): + if t == self.size - 1: + next_value = last_value + else: + next_value = self.values[t + 1] + + delta = self.rewards[t] + gamma * next_value * (1 - self.dones[t]) - self.values[t] + last_gae = delta + gamma * gae_lambda * (1 - self.dones[t]) * last_gae + advantages[t] = last_gae + + returns = advantages + self.values[:self.size] + return returns, advantages + + def get(self): + """Return all data as numpy arrays.""" + return ( + self.states[:self.size], + self.actions[:self.size], + self.rewards[:self.size], + self.dones[:self.size], + self.values[:self.size], + self.log_probs[:self.size], + ) + + def reset(self): + """Reset buffer.""" + self.ptr = 0 + self.size = 0 \ No newline at end of file diff --git a/强化学习个人项目报告/src/trainer.py b/强化学习个人项目报告/src/trainer.py new file mode 100644 index 0000000..880d3c2 --- /dev/null +++ b/强化学习个人项目报告/src/trainer.py @@ -0,0 +1,123 @@ +"""PPO Trainer with GAE advantage estimation.""" +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import TensorDataset, DataLoader +import numpy as np + + +class PPOTrainer: + """PPO trainer handling the training loop.""" + + def __init__( + self, + actor, + critic, + rollout_buffer, + device, + clip_eps=0.2, + gamma=0.99, + gae_lambda=0.95, + lr=3e-4, + ent_coef=0.01, + vf_coef=0.5, + max_grad_norm=0.5, + ppo_epochs=4, + mini_batch_size=64, + ): + self.actor = actor + self.critic = critic + self.buffer = rollout_buffer + self.device = device + self.clip_eps = clip_eps + self.gamma = gamma + self.gae_lambda = gae_lambda + self.ent_coef = ent_coef + self.vf_coef = vf_coef + self.max_grad_norm = max_grad_norm + self.ppo_epochs = ppo_epochs + self.mini_batch_size = mini_batch_size + + # Separate optimizers + self.actor_optim = optim.Adam(actor.parameters(), lr=lr) + self.critic_optim = optim.Adam(critic.parameters(), lr=lr) + + self.loss_history = {'actor': [], 'critic': [], 'entropy': [], 'total': []} + + def update(self, last_value): + """Perform one PPO update.""" + states, actions, rewards, dones, values, log_probs_old = self.buffer.get() + + # Compute returns and advantages + returns, advantages = self.buffer.compute_returns( + last_value, self.gamma, self.gae_lambda + ) + + # Normalize advantages + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8) + + # Convert to tensors + states_t = torch.from_numpy(states).float().to(self.device) + actions_t = torch.from_numpy(actions).float().to(self.device) + log_probs_old_t = torch.from_numpy(log_probs_old).float().to(self.device) + returns_t = torch.from_numpy(returns).float().to(self.device) + advantages_t = torch.from_numpy(advantages).float().to(self.device) + + dataset = TensorDataset(states_t, actions_t, log_probs_old_t, returns_t, advantages_t) + loader = DataLoader(dataset, batch_size=self.mini_batch_size, shuffle=True) + + total_actor_loss = 0 + total_critic_loss = 0 + total_entropy = 0 + count = 0 + + for _ in range(self.ppo_epochs): + for batch in loader: + s, a, log_pi_old, ret, adv = batch + + # Get current policy distribution + mu, std = self.actor(s) + dist = torch.distributions.Normal(mu, std) + log_pi = dist.log_prob(a).sum(dim=-1, keepdim=True) + entropy = dist.entropy().sum(dim=-1, keepdim=True) + + # Probability ratio + ratio = torch.exp(log_pi - log_pi_old) + + # Clipped surrogate objective + surr1 = ratio * adv + surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * adv + actor_loss = -torch.min(surr1, surr2).mean() + + # Value loss + value = self.critic(s) + critic_loss = nn.MSELoss()(value.squeeze(), ret) + + # Total loss + loss = actor_loss + self.vf_coef * critic_loss - self.ent_coef * entropy.mean() + + # Update + self.actor_optim.zero_grad() + self.critic_optim.zero_grad() + loss.backward() + nn.utils.clip_grad_norm_(self.actor.parameters(), self.max_grad_norm) + nn.utils.clip_grad_norm_(self.critic.parameters(), self.max_grad_norm) + self.actor_optim.step() + self.critic_optim.step() + + total_actor_loss += actor_loss.item() + total_critic_loss += critic_loss.item() + total_entropy += entropy.mean().item() + count += 1 + + avg_actor = total_actor_loss / count + avg_critic = total_critic_loss / count + avg_entropy = total_entropy / count + + self.loss_history['actor'].append(avg_actor) + self.loss_history['critic'].append(avg_critic) + self.loss_history['entropy'].append(avg_entropy) + self.loss_history['total'].append(avg_actor + avg_critic) + + self.buffer.reset() + return avg_actor, avg_critic, avg_entropy \ No newline at end of file diff --git a/强化学习个人项目报告/src/utils.py b/强化学习个人项目报告/src/utils.py new file mode 100644 index 0000000..3ee51b1 --- /dev/null +++ b/强化学习个人项目报告/src/utils.py @@ -0,0 +1,87 @@ +"""Utility functions for environment, device detection, and TensorBoard.""" +import gymnasium as gym +import numpy as np +import torch +from collections import deque + + +class GrayScaleWrapper(gym.ObservationWrapper): + """Convert RGB observation to grayscale.""" + + def __init__(self, env): + super().__init__(env) + + def observation(self, obs): + # RGB to grayscale: weighted average + gray = 0.299 * obs[:, :, 0] + 0.587 * obs[:, :, 1] + 0.114 * obs[:, :, 2] + return gray.astype(np.uint8) + + +class ResizeWrapper(gym.ObservationWrapper): + """Resize observation to target size.""" + + def __init__(self, env, size=(84, 84)): + super().__init__(env) + self.size = size + + def observation(self, obs): + import cv2 + return cv2.resize(obs, self.size, interpolation=cv2.INTER_AREA) + + +class FrameStackWrapper(gym.ObservationWrapper): + """Stack last N frames.""" + + def __init__(self, env, num_stack=4): + super().__init__(env) + self.num_stack = num_stack + self.frames = deque(maxlen=num_stack) + obs_shape = env.observation_space.shape + self.observation_space = gym.spaces.Box( + low=0, high=255, + shape=(num_stack, *obs_shape[-2:]), + dtype=np.uint8 + ) + + def reset(self, **kwargs): + obs, info = self.env.reset(**kwargs) + for _ in range(self.num_stack): + self.frames.append(obs) + return self._get_observation(), info + + def observation(self, obs): + self.frames.append(obs) + return self._get_observation() + + def _get_observation(self): + return np.stack(list(self.frames), axis=0) + + +def make_env(env_id="CarRacing-v3", gray_scale=True, resize=True, frame_stack=4): + """Create preprocessed CarRacing environment.""" + env = gym.make(env_id, render_mode="rgb_array") + if resize: + env = ResizeWrapper(env, size=(84, 84)) + if gray_scale: + env = GrayScaleWrapper(env) + if frame_stack > 1: + env = FrameStackWrapper(env, num_stack=frame_stack) + return env + + +def get_device(): + """Detect and return available device.""" + if torch.cuda.is_available(): + device = torch.device("cuda") + print(f"Using GPU: {torch.cuda.get_device_name(0)}") + else: + device = torch.device("cpu") + print("Using CPU") + return device + + +def preprocess_obs(obs): + """Ensure observation is in correct format for network.""" + if len(obs.shape) == 2: # single channel + obs = np.expand_dims(obs, axis=0) + return obs \ No newline at end of file diff --git a/强化学习个人项目报告/train.py b/强化学习个人项目报告/train.py new file mode 100644 index 0000000..3a605f5 --- /dev/null +++ b/强化学习个人项目报告/train.py @@ -0,0 +1,192 @@ +"""Main training script for PPO on CarRacing-v3.""" +import os +import time +import argparse +import numpy as np +import torch +from torch.utils.tensorboard import SummaryWriter + +from src.network import Actor, Critic +from src.replay_buffer import RolloutBuffer +from src.trainer import PPOTrainer +from src.utils import make_env, get_device + + +def collect_rollout(actor, critic, env, buffer, device, rollout_steps): + """Collect rollout data.""" + obs, _ = env.reset() + # Convert to (C, H, W) format for storage + obs = np.transpose(obs, (1, 2, 0)) + + for step in range(rollout_steps): + with torch.no_grad(): + # Convert to (B, C, H, W) + obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device) + mu, std = actor(obs_t) + dist = torch.distributions.Normal(mu, std) + action = dist.sample() + action = torch.clamp(action, -1, 1) + log_prob = dist.log_prob(action).sum(dim=-1, keepdim=True) + value = critic(obs_t).squeeze(0).item() + + action_np = action.squeeze(0).cpu().numpy() + log_prob_np = log_prob.squeeze(0).cpu().numpy() + + next_obs, reward, terminated, truncated, _ = env.step(action_np) + done = terminated or truncated + + # Convert next_obs to (C, H, W) for storage + next_obs_stored = np.transpose(next_obs, (1, 2, 0)) + + buffer.add(obs.copy(), action_np, reward, done, value, log_prob_np) + + obs = next_obs_stored + + if done: + obs, _ = env.reset() + obs = np.transpose(obs, (1, 2, 0)) + + +def train( + total_steps=500000, + rollout_steps=2048, + eval_interval=10, + save_interval=50, + device=None, +): + """Main training loop.""" + if device is None: + device = get_device() + + env = make_env() + eval_env = make_env() + + state_shape = (84, 84, 4) + action_dim = 3 + + actor = Actor(state_shape=state_shape, action_dim=action_dim).to(device) + critic = Critic(state_shape=state_shape).to(device) + + buffer = RolloutBuffer( + buffer_size=rollout_steps, + state_shape=state_shape, + action_dim=action_dim, + ) + + trainer = PPOTrainer( + actor=actor, + critic=critic, + rollout_buffer=buffer, + device=device, + clip_eps=0.2, + gamma=0.99, + gae_lambda=0.95, + lr=3e-4, + ent_coef=0.01, + vf_coef=0.5, + max_grad_norm=0.5, + ppo_epochs=4, + mini_batch_size=64, + ) + + # TensorBoard + log_dir = os.path.join("logs", "tensorboard", f"run_{int(time.time())}") + writer = SummaryWriter(log_dir) + + print(f"Training on {device}") + print(f"Log directory: {log_dir}") + + episode = 0 + total_timesteps = 0 + episode_rewards = [] + recent_rewards = [] + + while total_timesteps < total_steps: + # Collect rollout + collect_rollout(actor, critic, env, buffer, device, rollout_steps) + + # Get last value for GAE + with torch.no_grad(): + obs_t = torch.from_numpy(obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device) + last_value = critic(obs_t).squeeze(0).item() + + # PPO update + actor_loss, critic_loss, entropy = trainer.update(last_value) + + # Logging + writer.add_scalar("Loss/Actor", actor_loss, total_timesteps) + writer.add_scalar("Loss/Critic", critic_loss, total_timesteps) + writer.add_scalar("Loss/Entropy", entropy, total_timesteps) + + total_timesteps += rollout_steps + episode += 1 + + # Estimate episode reward from buffer + ep_reward = buffer.rewards[:buffer.size].sum() + episode_rewards.append(ep_reward) + recent_rewards.append(ep_reward) + + # Running average of last 10 episodes + avg_reward = np.mean(recent_rewards[-10:]) if len(recent_rewards) >= 10 else np.mean(recent_rewards) + writer.add_scalar("Reward/Episode", ep_reward, total_timesteps) + writer.add_scalar("Reward/AvgLast10", avg_reward, total_timesteps) + + print(f"Episode {episode}, steps {total_timesteps}, ep_reward={ep_reward:.1f}, avg_10={avg_reward:.1f}") + + # Evaluation + if episode % eval_interval == 0: + eval_returns = [] + for _ in range(5): + eval_obs, _ = eval_env.reset() + eval_obs = np.transpose(eval_obs, (1, 2, 0)) + eval_reward = 0 + done = False + + while not done: + with torch.no_grad(): + eval_obs_t = torch.from_numpy(eval_obs).float().unsqueeze(0).permute(0, 3, 1, 2).to(device) + mu, std = actor(eval_obs_t) + action = torch.clamp(mu, -1, 1).squeeze(0).cpu().numpy() + eval_obs, reward, terminated, truncated, _ = eval_env.step(action) + eval_obs = np.transpose(eval_obs, (1, 2, 0)) + eval_reward += reward + done = terminated or truncated + + eval_returns.append(eval_reward) + + mean_eval = np.mean(eval_returns) + writer.add_scalar("Eval/MeanReturn", mean_eval, episode) + print(f" Eval: mean_return={mean_eval:.2f}") + + # Save model + if episode % save_interval == 0: + os.makedirs("models", exist_ok=True) + torch.save({ + "actor": actor.state_dict(), + "critic": critic.state_dict(), + "episode": episode, + "timesteps": total_timesteps, + }, os.path.join("models", f"ppo_carracing_ep{episode}.pt")) + print(f" Saved model at episode {episode}") + + # Save final model + os.makedirs("models", exist_ok=True) + torch.save({ + "actor": actor.state_dict(), + "critic": critic.state_dict(), + "episode": episode, + "timesteps": total_timesteps, + }, os.path.join("models", "ppo_carracing_final.pt")) + + writer.close() + print(f"Training complete! Total episodes: {episode}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--steps", type=int, default=500000, help="Total training steps") + parser.add_argument("--rollout", type=int, default=2048, help="Rollout buffer size") + args = parser.parse_args() + + device = get_device() + train(total_steps=args.steps, rollout_steps=args.rollout, device=device)