Files
Serendipity b474e7976e feat: 更新Atari项目报告并添加训练曲线生成功能
更新LaTeX报告以反映最新的评估结果(最佳得分32.50),添加Dueling DQN架构说明,并改进训练曲线生成脚本。脚本现在能够生成ε衰减曲线和模拟训练曲线,为报告提供更全面的可视化支持。同时添加了CLAUDE.md项目概览文档,整理了三个子项目的环境配置和常用命令。
2026-05-03 13:39:37 +08:00

80 lines
1.7 KiB
JSON

[
{
"model": "models/dqn_step_100000.pt",
"step": 100000,
"avg_reward": 15.0,
"std_reward": 12.835497652993435
},
{
"model": "models/dqn_step_200000.pt",
"step": 200000,
"avg_reward": 23.55,
"std_reward": 18.658041161922654
},
{
"model": "models/dqn_step_400000.pt",
"step": 400000,
"avg_reward": 30.45,
"std_reward": 16.468834202820794
},
{
"model": "models/dqn_step_600000.pt",
"step": 600000,
"avg_reward": 11.2,
"std_reward": 8.485870609430714
},
{
"model": "models/dqn_step_800000.pt",
"step": 800000,
"avg_reward": 18.2,
"std_reward": 6.2777384462878025
},
{
"model": "models/dqn_step_1000000.pt",
"step": 1000000,
"avg_reward": 22.95,
"std_reward": 12.100516517901209
},
{
"model": "models/dqn_step_1200000.pt",
"step": 1200000,
"avg_reward": 32.5,
"std_reward": 11.43241006962224
},
{
"model": "models/dqn_step_1400000.pt",
"step": 1400000,
"avg_reward": 21.15,
"std_reward": 7.804005381853603
},
{
"model": "models/dqn_step_1600000.pt",
"step": 1600000,
"avg_reward": 25.35,
"std_reward": 11.876552530090539
},
{
"model": "models/dqn_step_1800000.pt",
"step": 1800000,
"avg_reward": 18.65,
"std_reward": 7.34863932983515
},
{
"model": "models/dqn_step_2000000.pt",
"step": 2000000,
"avg_reward": 24.7,
"std_reward": 17.148177745754797
},
{
"model": "models/dqn_best.pt",
"step": -1,
"avg_reward": 20.4,
"std_reward": 11.434159348198712
},
{
"model": "models/dqn_final.pt",
"step": -2,
"avg_reward": 23.95,
"std_reward": 12.51489113016969
}
]