feat: 改进DQN训练稳定性和性能

- 将奖励裁剪替换为奖励缩放,保留奖励大小信号
- 添加学习率调度器,支持warmup和步进衰减
- 增加经验回放缓冲区大小至200,000
- 默认启用Dueling DQN架构
- 优化代码格式和参数传递
- 添加更多训练中间模型保存点
This commit is contained in:
2026-05-02 02:02:17 +08:00
parent 1c1cccd3f6
commit faf0d5ea42
12 changed files with 122 additions and 77 deletions
@@ -80,14 +80,15 @@ class FrameStackWrapper(gym.ObservationWrapper):
return np.stack(list(self.frames), axis=0)
class RewardClipWrapper(gym.RewardWrapper):
"""裁剪奖励到[-1, 1]"""
class RewardScaleWrapper(gym.RewardWrapper):
"""缩放奖励以稳定训练,同时保留奖励大小信号"""
def __init__(self, env):
def __init__(self, env, scale=10.0):
super().__init__(env)
self.scale = scale
def reward(self, reward):
return np.clip(reward, -1, 1)
return reward / self.scale
class NoopResetWrapper(gym.Wrapper):
@@ -174,7 +175,7 @@ def make_env(env_id="ALE/SpaceInvaders-v5", gray_scale=True, resize=True,
env = GrayScaleWrapper(env)
if reward_clip:
env = RewardClipWrapper(env)
env = RewardScaleWrapper(env, scale=10.0)
if frame_stack > 1:
env = FrameStackWrapper(env, num_stack=frame_stack)