fb09e66d09
- 将原始单环境训练代码重构为模块化结构,添加向量化环境支持以提高数据采集效率 - 实现完整的PPO训练流水线,包括共享CNN的Actor-Critic网络、向量化经验回放缓冲和GAE优势估计 - 添加训练脚本(train_vec.py)、评估脚本(evaluate.py)和SB3基线对比脚本(train_sb3_baseline.py) - 提供详细的文档和开发日志,包含问题解决记录和实验分析 - 移除旧版项目文件,统一项目结构到CW1_id_name目录下
155 lines
3.9 KiB
JSON
155 lines
3.9 KiB
JSON
[
|
|
{
|
|
"ckpt": "iter_0420.pt",
|
|
"stochastic_mean": 772.8404148499792,
|
|
"stochastic_std": 134.0469265187322,
|
|
"stochastic_min": 550.1901140684258,
|
|
"stochastic_returns": [
|
|
815.8249158248987,
|
|
914.6999999999905,
|
|
550.1901140684258,
|
|
885.5072463768003,
|
|
697.9797979797816
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0460.pt",
|
|
"stochastic_mean": 727.5500057577044,
|
|
"stochastic_std": 189.89105860046578,
|
|
"stochastic_min": 407.2463768115959,
|
|
"stochastic_returns": [
|
|
846.1279461279295,
|
|
857.4468085106251,
|
|
614.8288973383865,
|
|
407.2463768115959,
|
|
912.099999999985
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0500.pt",
|
|
"stochastic_mean": 773.5455635987219,
|
|
"stochastic_std": 163.95429075438219,
|
|
"stochastic_min": 489.3536121672852,
|
|
"stochastic_returns": [
|
|
687.8787878787706,
|
|
918.1999999999907,
|
|
489.3536121672852,
|
|
889.1304347825971,
|
|
883.1649831649656
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0540.pt",
|
|
"stochastic_mean": 745.6481816342452,
|
|
"stochastic_std": 139.64872388958386,
|
|
"stochastic_min": 534.9809885931408,
|
|
"stochastic_returns": [
|
|
623.905723905707,
|
|
825.5319148936034,
|
|
534.9809885931408,
|
|
867.3913043478165,
|
|
876.4309764309588
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0580.pt",
|
|
"stochastic_mean": 884.0969293975589,
|
|
"stochastic_std": 24.862095366596368,
|
|
"stochastic_min": 846.7680608364823,
|
|
"stochastic_returns": [
|
|
896.6329966329788,
|
|
917.9999999999906,
|
|
846.7680608364823,
|
|
892.7536231883943,
|
|
866.3299663299492
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0620.pt",
|
|
"stochastic_mean": 868.8009948145111,
|
|
"stochastic_std": 40.7446677294706,
|
|
"stochastic_min": 815.8249158248982,
|
|
"stochastic_returns": [
|
|
815.8249158248982,
|
|
878.7234042553056,
|
|
827.7566539923755,
|
|
920.1999999999931,
|
|
901.4999999999828
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0660.pt",
|
|
"stochastic_mean": 848.5454627389088,
|
|
"stochastic_std": 114.82809175856892,
|
|
"stochastic_min": 620.5387205387041,
|
|
"stochastic_returns": [
|
|
620.5387205387041,
|
|
918.8999999999909,
|
|
880.9885931558726,
|
|
918.1999999999929,
|
|
904.0999999999834
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "iter_0700.pt",
|
|
"stochastic_mean": 879.5099424741011,
|
|
"stochastic_std": 14.825654886509525,
|
|
"stochastic_min": 864.5390070921853,
|
|
"stochastic_returns": [
|
|
876.4309764309584,
|
|
864.5390070921853,
|
|
869.5817490494093,
|
|
907.1999999999905,
|
|
879.7979797979622
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
},
|
|
{
|
|
"ckpt": "final.pt",
|
|
"stochastic_mean": 845.6652607187065,
|
|
"stochastic_std": 107.32097702884839,
|
|
"stochastic_min": 634.0067340067171,
|
|
"stochastic_returns": [
|
|
634.0067340067171,
|
|
918.1999999999908,
|
|
880.9885931558729,
|
|
918.699999999993,
|
|
876.4309764309589
|
|
],
|
|
"deterministic_mean": NaN,
|
|
"deterministic_std": NaN,
|
|
"deterministic_min": NaN,
|
|
"deterministic_returns": []
|
|
}
|
|
] |