Files
rl-atari/强化学习个人项目报告/generate_plots.py
T
Serendipity b32490ae03 fix(ppo): 修正日志概率维度与状态张量格式
修复 replay buffer 中 log_probs 的维度错误,从 (buffer_size, action_dim) 改为 buffer_size
修正训练时状态张量格式,从 (N, H, W, C) 转换为 (N, C, H, W)
更新 collect_rollout 返回观测值并修正 log_prob 计算
添加项目配置文件和训练曲线生成脚本
2026-04-30 20:30:40 +08:00

108 lines
3.8 KiB
Python

"""Generate training plots from TensorBoard logs."""
import os
import numpy as np
from tensorboard.backend.event_processing import event_accumulator
import matplotlib.pyplot as plt
def extract_metrics(log_dir):
"""Extract metrics from TensorBoard log directory."""
ea = event_accumulator.EventAccumulator(log_dir)
ea.Reload()
metrics = {}
for tag in ea.Tags()['scalars']:
events = ea.Scalars(tag)
steps = [e.step for e in events]
values = [e.value for e in events]
metrics[tag] = {'steps': steps, 'values': values}
return metrics
def smooth(data, weight=0.6):
"""Exponential moving average for smoothing."""
smoothed = []
last = data[0]
for point in data:
smoothed_val = last * weight + (1 - weight) * point
smoothed.append(smoothed_val)
last = smoothed_val
return smoothed
def plot_training_curves(metrics, save_path):
"""Plot training curves."""
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
episodes = metrics.get('Reward/Episode', {}).get('steps', [])
ep_rewards = metrics.get('Reward/Episode', {}).get('values', [])
avg_rewards = metrics.get('Reward/AvgLast10', {}).get('values', [])
if episodes and ep_rewards:
axes[0, 0].plot(episodes, ep_rewards, alpha=0.3, label='Episode Reward')
if avg_rewards:
axes[0, 0].plot(episodes, smooth(avg_rewards), 'r-', linewidth=2, label='Smoothed (EMA)')
axes[0, 0].set_xlabel('Training Steps')
axes[0, 0].set_ylabel('Episode Reward')
axes[0, 0].set_title('Training Episode Reward')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)
eval_steps = metrics.get('Eval/MeanReturn', {}).get('steps', [])
eval_returns = metrics.get('Eval/MeanReturn', {}).get('values', [])
if eval_steps and eval_returns:
axes[0, 1].plot(eval_steps, eval_returns, 'g-', linewidth=2, marker='o', markersize=4)
axes[0, 1].set_xlabel('Episode')
axes[0, 1].set_ylabel('Mean Evaluation Return')
axes[0, 1].set_title('Evaluation Performance')
axes[0, 1].grid(True, alpha=0.3)
actor_loss_steps = metrics.get('Loss/Actor', {}).get('steps', [])
actor_losses = metrics.get('Loss/Actor', {}).get('values', [])
if actor_loss_steps and actor_losses:
axes[1, 0].plot(actor_loss_steps, smooth(actor_losses), 'b-', linewidth=1.5)
axes[1, 0].set_xlabel('Training Steps')
axes[1, 0].set_ylabel('Actor Loss')
axes[1, 0].set_title('Actor Loss Over Training')
axes[1, 0].grid(True, alpha=0.3)
critic_loss_steps = metrics.get('Loss/Critic', {}).get('steps', [])
critic_losses = metrics.get('Loss/Critic', {}).get('values', [])
if critic_loss_steps and critic_losses:
axes[1, 1].plot(critic_loss_steps, smooth(critic_losses), 'purple', linewidth=1.5)
axes[1, 1].set_xlabel('Training Steps')
axes[1, 1].set_ylabel('Critic Loss')
axes[1, 1].set_title('Critic Loss Over Training')
axes[1, 1].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(save_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"Plots saved to {save_path}")
def main():
log_base = 'logs/tensorboard'
runs = sorted([d for d in os.listdir(log_base) if os.path.isdir(os.path.join(log_base, d))])
if not runs:
print("No runs found!")
return
latest_run = os.path.join(log_base, runs[-1])
print(f"Analyzing run: {runs[-1]}")
metrics = extract_metrics(latest_run)
plot_training_curves(metrics, 'training_curves.png')
print("\nExtracted metrics:")
for tag, data in metrics.items():
if data['values']:
values = np.array(data['values'])
print(f" {tag}: min={values.min():.2f}, max={values.max():.2f}, final={values[-1]:.2f}")
if __name__ == '__main__':
main()