feat: 改进DQN训练稳定性和性能
- 将奖励裁剪替换为奖励缩放,保留奖励大小信号 - 添加学习率调度器,支持warmup和步进衰减 - 增加经验回放缓冲区大小至200,000 - 默认启用Dueling DQN架构 - 优化代码格式和参数传递 - 添加更多训练中间模型保存点
This commit is contained in:
@@ -80,14 +80,15 @@ class FrameStackWrapper(gym.ObservationWrapper):
|
||||
return np.stack(list(self.frames), axis=0)
|
||||
|
||||
|
||||
class RewardClipWrapper(gym.RewardWrapper):
|
||||
"""裁剪奖励到[-1, 1]"""
|
||||
class RewardScaleWrapper(gym.RewardWrapper):
|
||||
"""缩放奖励以稳定训练,同时保留奖励大小信号"""
|
||||
|
||||
def __init__(self, env):
|
||||
def __init__(self, env, scale=10.0):
|
||||
super().__init__(env)
|
||||
self.scale = scale
|
||||
|
||||
def reward(self, reward):
|
||||
return np.clip(reward, -1, 1)
|
||||
return reward / self.scale
|
||||
|
||||
|
||||
class NoopResetWrapper(gym.Wrapper):
|
||||
@@ -174,7 +175,7 @@ def make_env(env_id="ALE/SpaceInvaders-v5", gray_scale=True, resize=True,
|
||||
env = GrayScaleWrapper(env)
|
||||
|
||||
if reward_clip:
|
||||
env = RewardClipWrapper(env)
|
||||
env = RewardScaleWrapper(env, scale=10.0)
|
||||
|
||||
if frame_stack > 1:
|
||||
env = FrameStackWrapper(env, num_stack=frame_stack)
|
||||
|
||||
Reference in New Issue
Block a user