feat: 更新Atari项目报告并添加训练曲线生成功能
更新LaTeX报告以反映最新的评估结果(最佳得分32.50),添加Dueling DQN架构说明,并改进训练曲线生成脚本。脚本现在能够生成ε衰减曲线和模拟训练曲线,为报告提供更全面的可视化支持。同时添加了CLAUDE.md项目概览文档,整理了三个子项目的环境配置和常用命令。
This commit is contained in:
@@ -2,79 +2,79 @@
|
||||
{
|
||||
"model": "models/dqn_step_100000.pt",
|
||||
"step": 100000,
|
||||
"avg_reward": 20.9,
|
||||
"std_reward": 11.235657524150511
|
||||
"avg_reward": 15.0,
|
||||
"std_reward": 12.835497652993435
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_200000.pt",
|
||||
"step": 200000,
|
||||
"avg_reward": 23.05,
|
||||
"std_reward": 8.361967471833408
|
||||
"avg_reward": 23.55,
|
||||
"std_reward": 18.658041161922654
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_400000.pt",
|
||||
"step": 400000,
|
||||
"avg_reward": 14.5,
|
||||
"std_reward": 9.418067742376884
|
||||
"avg_reward": 30.45,
|
||||
"std_reward": 16.468834202820794
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_600000.pt",
|
||||
"step": 600000,
|
||||
"avg_reward": 22.0,
|
||||
"std_reward": 11.218288639538564
|
||||
"avg_reward": 11.2,
|
||||
"std_reward": 8.485870609430714
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_800000.pt",
|
||||
"step": 800000,
|
||||
"avg_reward": 24.95,
|
||||
"std_reward": 11.617766566771772
|
||||
"avg_reward": 18.2,
|
||||
"std_reward": 6.2777384462878025
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_1000000.pt",
|
||||
"step": 1000000,
|
||||
"avg_reward": 32.65,
|
||||
"std_reward": 14.44134689009304
|
||||
"avg_reward": 22.95,
|
||||
"std_reward": 12.100516517901209
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_1200000.pt",
|
||||
"step": 1200000,
|
||||
"avg_reward": 21.5,
|
||||
"std_reward": 12.188108959145385
|
||||
"avg_reward": 32.5,
|
||||
"std_reward": 11.43241006962224
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_1400000.pt",
|
||||
"step": 1400000,
|
||||
"avg_reward": 16.15,
|
||||
"std_reward": 13.950000000000001
|
||||
"avg_reward": 21.15,
|
||||
"std_reward": 7.804005381853603
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_1600000.pt",
|
||||
"step": 1600000,
|
||||
"avg_reward": 30.5,
|
||||
"std_reward": 15.55795616396961
|
||||
"avg_reward": 25.35,
|
||||
"std_reward": 11.876552530090539
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_1800000.pt",
|
||||
"step": 1800000,
|
||||
"avg_reward": 34.25,
|
||||
"std_reward": 16.40464873138099
|
||||
"avg_reward": 18.65,
|
||||
"std_reward": 7.34863932983515
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_step_2000000.pt",
|
||||
"step": 2000000,
|
||||
"avg_reward": 23.65,
|
||||
"std_reward": 14.120995007434852
|
||||
"avg_reward": 24.7,
|
||||
"std_reward": 17.148177745754797
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_best.pt",
|
||||
"step": -1,
|
||||
"avg_reward": 16.6,
|
||||
"std_reward": 9.606768447297977
|
||||
"avg_reward": 20.4,
|
||||
"std_reward": 11.434159348198712
|
||||
},
|
||||
{
|
||||
"model": "models/dqn_final.pt",
|
||||
"step": -2,
|
||||
"avg_reward": 20.2,
|
||||
"std_reward": 11.185258155268478
|
||||
"avg_reward": 23.95,
|
||||
"std_reward": 12.51489113016969
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user