feat: 添加DQN强化学习项目框架和核心实现
实现完整的DQN算法框架,用于Atari Space Invaders游戏训练。包括: - QNetwork和DuelingQNetwork神经网络架构 - 经验回放缓冲区(标准和优先级版本) - DQN智能体实现ε-greedy策略和Double DQN - 环境包装器(灰度化、调整大小、帧堆叠等) - 训练器、评估脚本和图表生成工具 - 详细的项目文档和依赖配置
This commit is contained in:
@@ -1,84 +0,0 @@
|
||||
"""Neural network architectures for Actor and Critic."""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
class Actor(nn.Module):
|
||||
"""Actor network outputting Gaussian policy parameters (mu, sigma)."""
|
||||
|
||||
def __init__(self, state_shape=(84, 84, 4), action_dim=3):
|
||||
super().__init__()
|
||||
c, h, w = (
|
||||
state_shape[2],
|
||||
state_shape[0],
|
||||
state_shape[1],
|
||||
) # channels, height, width
|
||||
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(c, 32, kernel_size=8, stride=4),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(32, 64, kernel_size=4, stride=2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(64, 64, kernel_size=3, stride=1),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
out_h = (h - 8) // 4 + 1
|
||||
out_h = (out_h - 4) // 2 + 1
|
||||
out_h = (out_h - 3) // 1 + 1
|
||||
feat_size = 64 * out_h * out_h
|
||||
|
||||
self.fc = nn.Sequential(
|
||||
nn.Linear(feat_size, 512),
|
||||
nn.ReLU(),
|
||||
)
|
||||
self.mu_head = nn.Linear(512, action_dim)
|
||||
self.log_std_head = nn.Linear(512, action_dim)
|
||||
|
||||
# Initialize output layers
|
||||
nn.init.orthogonal_(self.mu_head.weight, gain=0.01)
|
||||
nn.init.orthogonal_(self.log_std_head.weight, gain=0.01)
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass returning (mu, log_std)."""
|
||||
x = x / 255.0 # Normalize
|
||||
x = self.conv(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.fc(x)
|
||||
mu = torch.tanh(self.mu_head(x))
|
||||
log_std = self.log_std_head(x)
|
||||
log_std = torch.clamp(log_std, -20, 2)
|
||||
return mu, log_std.exp()
|
||||
|
||||
|
||||
class Critic(nn.Module):
|
||||
"""Critic network estimating state value V(s)."""
|
||||
|
||||
def __init__(self, state_shape=(84, 84, 4)):
|
||||
super().__init__()
|
||||
c, h, w = state_shape[2], state_shape[0], state_shape[1]
|
||||
|
||||
self.conv = nn.Sequential(
|
||||
nn.Conv2d(c, 32, kernel_size=8, stride=4),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(32, 64, kernel_size=4, stride=2),
|
||||
nn.ReLU(),
|
||||
nn.Conv2d(64, 64, kernel_size=3, stride=1),
|
||||
nn.ReLU(),
|
||||
)
|
||||
|
||||
out_h = (h - 8) // 4 + 1
|
||||
out_h = (out_h - 4) // 2 + 1
|
||||
out_h = (out_h - 3) // 1 + 1
|
||||
feat_size = 64 * out_h * out_h
|
||||
|
||||
self.fc = nn.Sequential(nn.Linear(feat_size, 512), nn.ReLU(), nn.Linear(512, 1))
|
||||
|
||||
def forward(self, x):
|
||||
"""Forward pass returning V(s)."""
|
||||
x = x / 255.0
|
||||
x = self.conv(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
return self.fc(x)
|
||||
Reference in New Issue
Block a user