[ { "model": "models/dqn_step_100000.pt", "step": 100000, "avg_reward": 20.9, "std_reward": 11.235657524150511 }, { "model": "models/dqn_step_200000.pt", "step": 200000, "avg_reward": 23.05, "std_reward": 8.361967471833408 }, { "model": "models/dqn_step_400000.pt", "step": 400000, "avg_reward": 14.5, "std_reward": 9.418067742376884 }, { "model": "models/dqn_step_600000.pt", "step": 600000, "avg_reward": 22.0, "std_reward": 11.218288639538564 }, { "model": "models/dqn_step_800000.pt", "step": 800000, "avg_reward": 24.95, "std_reward": 11.617766566771772 }, { "model": "models/dqn_step_1000000.pt", "step": 1000000, "avg_reward": 32.65, "std_reward": 14.44134689009304 }, { "model": "models/dqn_step_1200000.pt", "step": 1200000, "avg_reward": 21.5, "std_reward": 12.188108959145385 }, { "model": "models/dqn_step_1400000.pt", "step": 1400000, "avg_reward": 16.15, "std_reward": 13.950000000000001 }, { "model": "models/dqn_step_1600000.pt", "step": 1600000, "avg_reward": 30.5, "std_reward": 15.55795616396961 }, { "model": "models/dqn_step_1800000.pt", "step": 1800000, "avg_reward": 34.25, "std_reward": 16.40464873138099 }, { "model": "models/dqn_step_2000000.pt", "step": 2000000, "avg_reward": 23.65, "std_reward": 14.120995007434852 }, { "model": "models/dqn_best.pt", "step": -1, "avg_reward": 16.6, "std_reward": 9.606768447297977 }, { "model": "models/dqn_final.pt", "step": -2, "avg_reward": 20.2, "std_reward": 11.185258155268478 } ]