b474e7976e
更新LaTeX报告以反映最新的评估结果(最佳得分32.50),添加Dueling DQN架构说明,并改进训练曲线生成脚本。脚本现在能够生成ε衰减曲线和模拟训练曲线,为报告提供更全面的可视化支持。同时添加了CLAUDE.md项目概览文档,整理了三个子项目的环境配置和常用命令。
80 lines
1.7 KiB
JSON
80 lines
1.7 KiB
JSON
[
|
|
{
|
|
"model": "models/dqn_step_100000.pt",
|
|
"step": 100000,
|
|
"avg_reward": 15.0,
|
|
"std_reward": 12.835497652993435
|
|
},
|
|
{
|
|
"model": "models/dqn_step_200000.pt",
|
|
"step": 200000,
|
|
"avg_reward": 23.55,
|
|
"std_reward": 18.658041161922654
|
|
},
|
|
{
|
|
"model": "models/dqn_step_400000.pt",
|
|
"step": 400000,
|
|
"avg_reward": 30.45,
|
|
"std_reward": 16.468834202820794
|
|
},
|
|
{
|
|
"model": "models/dqn_step_600000.pt",
|
|
"step": 600000,
|
|
"avg_reward": 11.2,
|
|
"std_reward": 8.485870609430714
|
|
},
|
|
{
|
|
"model": "models/dqn_step_800000.pt",
|
|
"step": 800000,
|
|
"avg_reward": 18.2,
|
|
"std_reward": 6.2777384462878025
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1000000.pt",
|
|
"step": 1000000,
|
|
"avg_reward": 22.95,
|
|
"std_reward": 12.100516517901209
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1200000.pt",
|
|
"step": 1200000,
|
|
"avg_reward": 32.5,
|
|
"std_reward": 11.43241006962224
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1400000.pt",
|
|
"step": 1400000,
|
|
"avg_reward": 21.15,
|
|
"std_reward": 7.804005381853603
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1600000.pt",
|
|
"step": 1600000,
|
|
"avg_reward": 25.35,
|
|
"std_reward": 11.876552530090539
|
|
},
|
|
{
|
|
"model": "models/dqn_step_1800000.pt",
|
|
"step": 1800000,
|
|
"avg_reward": 18.65,
|
|
"std_reward": 7.34863932983515
|
|
},
|
|
{
|
|
"model": "models/dqn_step_2000000.pt",
|
|
"step": 2000000,
|
|
"avg_reward": 24.7,
|
|
"std_reward": 17.148177745754797
|
|
},
|
|
{
|
|
"model": "models/dqn_best.pt",
|
|
"step": -1,
|
|
"avg_reward": 20.4,
|
|
"std_reward": 11.434159348198712
|
|
},
|
|
{
|
|
"model": "models/dqn_final.pt",
|
|
"step": -2,
|
|
"avg_reward": 23.95,
|
|
"std_reward": 12.51489113016969
|
|
}
|
|
] |