[ { "model": "models/dqn_step_100000.pt", "step": 100000, "avg_reward": 15.0, "std_reward": 12.835497652993435 }, { "model": "models/dqn_step_200000.pt", "step": 200000, "avg_reward": 23.55, "std_reward": 18.658041161922654 }, { "model": "models/dqn_step_400000.pt", "step": 400000, "avg_reward": 30.45, "std_reward": 16.468834202820794 }, { "model": "models/dqn_step_600000.pt", "step": 600000, "avg_reward": 11.2, "std_reward": 8.485870609430714 }, { "model": "models/dqn_step_800000.pt", "step": 800000, "avg_reward": 18.2, "std_reward": 6.2777384462878025 }, { "model": "models/dqn_step_1000000.pt", "step": 1000000, "avg_reward": 22.95, "std_reward": 12.100516517901209 }, { "model": "models/dqn_step_1200000.pt", "step": 1200000, "avg_reward": 32.5, "std_reward": 11.43241006962224 }, { "model": "models/dqn_step_1400000.pt", "step": 1400000, "avg_reward": 21.15, "std_reward": 7.804005381853603 }, { "model": "models/dqn_step_1600000.pt", "step": 1600000, "avg_reward": 25.35, "std_reward": 11.876552530090539 }, { "model": "models/dqn_step_1800000.pt", "step": 1800000, "avg_reward": 18.65, "std_reward": 7.34863932983515 }, { "model": "models/dqn_step_2000000.pt", "step": 2000000, "avg_reward": 24.7, "std_reward": 17.148177745754797 }, { "model": "models/dqn_best.pt", "step": -1, "avg_reward": 20.4, "std_reward": 11.434159348198712 }, { "model": "models/dqn_final.pt", "step": -2, "avg_reward": 23.95, "std_reward": 12.51489113016969 } ]