79ffb90823
- 将学习率调整为5e-5,批次大小增加到64,经验回放缓冲区扩大到500,000 - 启用优先经验回放,调整目标网络更新频率为1000步 - 评估时使用Dueling网络架构,训练时评估模式的ε设为0 - 更新评估结果以反映配置改进后的性能变化
80 lines
1.7 KiB
JSON
80 lines
1.7 KiB
JSON
[
|
|
{
|
|
"model": "models/dqn_step_100000.pt",
|
|
"step": 100000,
|
|
"avg_reward": 20.9,
|
|
"std_reward": 11.235657524150511
|
|
},
|
|
{
|
|
"model": "models/dqn_step_200000.pt",
|
|
"step": 200000,
|
|
"avg_reward": 23.05,
|
|
"std_reward": 8.361967471833408
|
|
},
|
|
{
|
|
"model": "models/dqn_step_400000.pt",
|
|
"step": 400000,
|
|
"avg_reward": 14.5,
|
|
"std_reward": 9.418067742376884
|
|
},
|
|
{
|
|
"model": "models/dqn_step_600000.pt",
|
|
"step": 600000,
|
|
"avg_reward": 22.0,
|
|
"std_reward": 11.218288639538564
|
|
},
|
|
{
|
|
"model": "models/dqn_step_800000.pt",
|
|
"step": 800000,
|
|
"avg_reward": 24.95,
|
|
"std_reward": 11.617766566771772
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1000000.pt",
|
|
"step": 1000000,
|
|
"avg_reward": 32.65,
|
|
"std_reward": 14.44134689009304
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1200000.pt",
|
|
"step": 1200000,
|
|
"avg_reward": 21.5,
|
|
"std_reward": 12.188108959145385
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1400000.pt",
|
|
"step": 1400000,
|
|
"avg_reward": 16.15,
|
|
"std_reward": 13.950000000000001
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1600000.pt",
|
|
"step": 1600000,
|
|
"avg_reward": 30.5,
|
|
"std_reward": 15.55795616396961
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1800000.pt",
|
|
"step": 1800000,
|
|
"avg_reward": 34.25,
|
|
"std_reward": 16.40464873138099
|
|
},
|
|
{
|
|
"model": "models/dqn_step_2000000.pt",
|
|
"step": 2000000,
|
|
"avg_reward": 23.65,
|
|
"std_reward": 14.120995007434852
|
|
},
|
|
{
|
|
"model": "models/dqn_best.pt",
|
|
"step": -1,
|
|
"avg_reward": 16.6,
|
|
"std_reward": 9.606768447297977
|
|
},
|
|
{
|
|
"model": "models/dqn_final.pt",
|
|
"step": -2,
|
|
"avg_reward": 20.2,
|
|
"std_reward": 11.185258155268478
|
|
}
|
|
] |