feat: 添加DQN强化学习项目框架和核心实现

实现完整的DQN算法框架，用于Atari Space Invaders游戏训练。包括： - QNetwork和DuelingQNetwork神经网络架构 - 经验回放缓冲区（标准和优先级版本） - DQN智能体实现ε-greedy策略和Double DQN - 环境包装器（灰度化、调整大小、帧堆叠等） - 训练器、评估脚本和图表生成工具 - 详细的项目文档和依赖配置
2026-05-01 10:01:12 +08:00
parent cdec40a7c7
commit e8b51240f9
13 changed files with 1561 additions and 84 deletions
@@ -1,84 +0,0 @@
-"""Neural network architectures for Actor and Critic."""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Actor(nn.Module):
-    """Actor network outputting Gaussian policy parameters (mu, sigma)."""
-
-    def __init__(self, state_shape=(84, 84, 4), action_dim=3):
-        super().__init__()
-        c, h, w = (
-            state_shape[2],
-            state_shape[0],
-            state_shape[1],
-        )  # channels, height, width
-
-        self.conv = nn.Sequential(
-            nn.Conv2d(c, 32, kernel_size=8, stride=4),
-            nn.ReLU(),
-            nn.Conv2d(32, 64, kernel_size=4, stride=2),
-            nn.ReLU(),
-            nn.Conv2d(64, 64, kernel_size=3, stride=1),
-            nn.ReLU(),
-        )
-
-        out_h = (h - 8) // 4 + 1
-        out_h = (out_h - 4) // 2 + 1
-        out_h = (out_h - 3) // 1 + 1
-        feat_size = 64 * out_h * out_h
-
-        self.fc = nn.Sequential(
-            nn.Linear(feat_size, 512),
-            nn.ReLU(),
-        )
-        self.mu_head = nn.Linear(512, action_dim)
-        self.log_std_head = nn.Linear(512, action_dim)
-
-        # Initialize output layers
-        nn.init.orthogonal_(self.mu_head.weight, gain=0.01)
-        nn.init.orthogonal_(self.log_std_head.weight, gain=0.01)
-
-    def forward(self, x):
-        """Forward pass returning (mu, log_std)."""
-        x = x / 255.0  # Normalize
-        x = self.conv(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        mu = torch.tanh(self.mu_head(x))
-        log_std = self.log_std_head(x)
-        log_std = torch.clamp(log_std, -20, 2)
-        return mu, log_std.exp()
-
-
-class Critic(nn.Module):
-    """Critic network estimating state value V(s)."""
-
-    def __init__(self, state_shape=(84, 84, 4)):
-        super().__init__()
-        c, h, w = state_shape[2], state_shape[0], state_shape[1]
-
-        self.conv = nn.Sequential(
-            nn.Conv2d(c, 32, kernel_size=8, stride=4),
-            nn.ReLU(),
-            nn.Conv2d(32, 64, kernel_size=4, stride=2),
-            nn.ReLU(),
-            nn.Conv2d(64, 64, kernel_size=3, stride=1),
-            nn.ReLU(),
-        )
-
-        out_h = (h - 8) // 4 + 1
-        out_h = (out_h - 4) // 2 + 1
-        out_h = (out_h - 3) // 1 + 1
-        feat_size = 64 * out_h * out_h
-
-        self.fc = nn.Sequential(nn.Linear(feat_size, 512), nn.ReLU(), nn.Linear(512, 1))
-
-    def forward(self, x):
-        """Forward pass returning V(s)."""
-        x = x / 255.0
-        x = self.conv(x)
-        x = x.view(x.size(0), -1)
-        return self.fc(x)