feat: 添加DQN强化学习项目框架和核心实现

实现完整的DQN算法框架,用于Atari Space Invaders游戏训练。包括:
- QNetwork和DuelingQNetwork神经网络架构
- 经验回放缓冲区(标准和优先级版本)
- DQN智能体实现ε-greedy策略和Double DQN
- 环境包装器(灰度化、调整大小、帧堆叠等)
- 训练器、评估脚本和图表生成工具
- 详细的项目文档和依赖配置
This commit is contained in:
2026-05-01 10:01:12 +08:00
parent cdec40a7c7
commit e8b51240f9
13 changed files with 1561 additions and 84 deletions
@@ -1,84 +0,0 @@
"""Neural network architectures for Actor and Critic."""
import torch
import torch.nn as nn
import torch.nn.functional as F
class Actor(nn.Module):
"""Actor network outputting Gaussian policy parameters (mu, sigma)."""
def __init__(self, state_shape=(84, 84, 4), action_dim=3):
super().__init__()
c, h, w = (
state_shape[2],
state_shape[0],
state_shape[1],
) # channels, height, width
self.conv = nn.Sequential(
nn.Conv2d(c, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(),
)
out_h = (h - 8) // 4 + 1
out_h = (out_h - 4) // 2 + 1
out_h = (out_h - 3) // 1 + 1
feat_size = 64 * out_h * out_h
self.fc = nn.Sequential(
nn.Linear(feat_size, 512),
nn.ReLU(),
)
self.mu_head = nn.Linear(512, action_dim)
self.log_std_head = nn.Linear(512, action_dim)
# Initialize output layers
nn.init.orthogonal_(self.mu_head.weight, gain=0.01)
nn.init.orthogonal_(self.log_std_head.weight, gain=0.01)
def forward(self, x):
"""Forward pass returning (mu, log_std)."""
x = x / 255.0 # Normalize
x = self.conv(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
mu = torch.tanh(self.mu_head(x))
log_std = self.log_std_head(x)
log_std = torch.clamp(log_std, -20, 2)
return mu, log_std.exp()
class Critic(nn.Module):
"""Critic network estimating state value V(s)."""
def __init__(self, state_shape=(84, 84, 4)):
super().__init__()
c, h, w = state_shape[2], state_shape[0], state_shape[1]
self.conv = nn.Sequential(
nn.Conv2d(c, 32, kernel_size=8, stride=4),
nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=4, stride=2),
nn.ReLU(),
nn.Conv2d(64, 64, kernel_size=3, stride=1),
nn.ReLU(),
)
out_h = (h - 8) // 4 + 1
out_h = (out_h - 4) // 2 + 1
out_h = (out_h - 3) // 1 + 1
feat_size = 64 * out_h * out_h
self.fc = nn.Sequential(nn.Linear(feat_size, 512), nn.ReLU(), nn.Linear(512, 1))
def forward(self, x):
"""Forward pass returning V(s)."""
x = x / 255.0
x = self.conv(x)
x = x.view(x.size(0), -1)
return self.fc(x)