Files
rl-atari/CW1_id_name/docs/checkpoint_scan_vec_main_v3.json
T
Serendipity fb09e66d09 feat: 重构项目结构并添加向量化PPO训练与评估脚本
- 将原始单环境训练代码重构为模块化结构,添加向量化环境支持以提高数据采集效率
- 实现完整的PPO训练流水线,包括共享CNN的Actor-Critic网络、向量化经验回放缓冲和GAE优势估计
- 添加训练脚本(train_vec.py)、评估脚本(evaluate.py)和SB3基线对比脚本(train_sb3_baseline.py)
- 提供详细的文档和开发日志,包含问题解决记录和实验分析
- 移除旧版项目文件,统一项目结构到CW1_id_name目录下
2026-05-02 13:44:08 +08:00

155 lines
3.9 KiB
JSON

[
{
"ckpt": "iter_0420.pt",
"stochastic_mean": 772.8404148499792,
"stochastic_std": 134.0469265187322,
"stochastic_min": 550.1901140684258,
"stochastic_returns": [
815.8249158248987,
914.6999999999905,
550.1901140684258,
885.5072463768003,
697.9797979797816
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0460.pt",
"stochastic_mean": 727.5500057577044,
"stochastic_std": 189.89105860046578,
"stochastic_min": 407.2463768115959,
"stochastic_returns": [
846.1279461279295,
857.4468085106251,
614.8288973383865,
407.2463768115959,
912.099999999985
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0500.pt",
"stochastic_mean": 773.5455635987219,
"stochastic_std": 163.95429075438219,
"stochastic_min": 489.3536121672852,
"stochastic_returns": [
687.8787878787706,
918.1999999999907,
489.3536121672852,
889.1304347825971,
883.1649831649656
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0540.pt",
"stochastic_mean": 745.6481816342452,
"stochastic_std": 139.64872388958386,
"stochastic_min": 534.9809885931408,
"stochastic_returns": [
623.905723905707,
825.5319148936034,
534.9809885931408,
867.3913043478165,
876.4309764309588
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0580.pt",
"stochastic_mean": 884.0969293975589,
"stochastic_std": 24.862095366596368,
"stochastic_min": 846.7680608364823,
"stochastic_returns": [
896.6329966329788,
917.9999999999906,
846.7680608364823,
892.7536231883943,
866.3299663299492
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0620.pt",
"stochastic_mean": 868.8009948145111,
"stochastic_std": 40.7446677294706,
"stochastic_min": 815.8249158248982,
"stochastic_returns": [
815.8249158248982,
878.7234042553056,
827.7566539923755,
920.1999999999931,
901.4999999999828
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0660.pt",
"stochastic_mean": 848.5454627389088,
"stochastic_std": 114.82809175856892,
"stochastic_min": 620.5387205387041,
"stochastic_returns": [
620.5387205387041,
918.8999999999909,
880.9885931558726,
918.1999999999929,
904.0999999999834
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "iter_0700.pt",
"stochastic_mean": 879.5099424741011,
"stochastic_std": 14.825654886509525,
"stochastic_min": 864.5390070921853,
"stochastic_returns": [
876.4309764309584,
864.5390070921853,
869.5817490494093,
907.1999999999905,
879.7979797979622
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
},
{
"ckpt": "final.pt",
"stochastic_mean": 845.6652607187065,
"stochastic_std": 107.32097702884839,
"stochastic_min": 634.0067340067171,
"stochastic_returns": [
634.0067340067171,
918.1999999999908,
880.9885931558729,
918.699999999993,
876.4309764309589
],
"deterministic_mean": NaN,
"deterministic_std": NaN,
"deterministic_min": NaN,
"deterministic_returns": []
}
]