rl-atari/强化学习个人项目报告/generate_plots.py

"""Generate training plots from TensorBoard logs."""
import os
import numpy as np
from tensorboard.backend.event_processing import event_accumulator
import matplotlib.pyplot as plt

def extract_metrics(log_dir):
    """Extract metrics from TensorBoard log directory."""
    ea = event_accumulator.EventAccumulator(log_dir)
    ea.Reload()

    metrics = {}
    for tag in ea.Tags()['scalars']:
        events = ea.Scalars(tag)
        steps = [e.step for e in events]
        values = [e.value for e in events]
        metrics[tag] = {'steps': steps, 'values': values}

    return metrics

def smooth(data, weight=0.6):
    """Exponential moving average for smoothing."""
    smoothed = []
    last = data[0]
    for point in data:
        smoothed_val = last * weight + (1 - weight) * point
        smoothed.append(smoothed_val)
        last = smoothed_val
    return smoothed

def plot_training_curves(metrics, save_path):
    """Plot training curves."""
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))

    episodes = metrics.get('Reward/Episode', {}).get('steps', [])
    ep_rewards = metrics.get('Reward/Episode', {}).get('values', [])
    avg_rewards = metrics.get('Reward/AvgLast10', {}).get('values', [])

    if episodes and ep_rewards:
        axes[0, 0].plot(episodes, ep_rewards, alpha=0.3, label='Episode Reward')
        if avg_rewards:
            axes[0, 0].plot(episodes, smooth(avg_rewards), 'r-', linewidth=2, label='Smoothed (EMA)')
        axes[0, 0].set_xlabel('Training Steps')
        axes[0, 0].set_ylabel('Episode Reward')
        axes[0, 0].set_title('Training Episode Reward')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)

    eval_steps = metrics.get('Eval/MeanReturn', {}).get('steps', [])
    eval_returns = metrics.get('Eval/MeanReturn', {}).get('values', [])

    if eval_steps and eval_returns:
        axes[0, 1].plot(eval_steps, eval_returns, 'g-', linewidth=2, marker='o', markersize=4)
        axes[0, 1].set_xlabel('Episode')
        axes[0, 1].set_ylabel('Mean Evaluation Return')
        axes[0, 1].set_title('Evaluation Performance')
        axes[0, 1].grid(True, alpha=0.3)

    actor_loss_steps = metrics.get('Loss/Actor', {}).get('steps', [])
    actor_losses = metrics.get('Loss/Actor', {}).get('values', [])

    if actor_loss_steps and actor_losses:
        axes[1, 0].plot(actor_loss_steps, smooth(actor_losses), 'b-', linewidth=1.5)
        axes[1, 0].set_xlabel('Training Steps')
        axes[1, 0].set_ylabel('Actor Loss')
        axes[1, 0].set_title('Actor Loss Over Training')
        axes[1, 0].grid(True, alpha=0.3)

    critic_loss_steps = metrics.get('Loss/Critic', {}).get('steps', [])
    critic_losses = metrics.get('Loss/Critic', {}).get('values', [])

    if critic_loss_steps and critic_losses:
        axes[1, 1].plot(critic_loss_steps, smooth(critic_losses), 'purple', linewidth=1.5)
        axes[1, 1].set_xlabel('Training Steps')
        axes[1, 1].set_ylabel('Critic Loss')
        axes[1, 1].set_title('Critic Loss Over Training')
        axes[1, 1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(save_path, dpi=150, bbox_inches='tight')
    plt.close()
    print(f"Plots saved to {save_path}")

def main():
    log_base = 'logs/tensorboard'

    runs = sorted([d for d in os.listdir(log_base) if os.path.isdir(os.path.join(log_base, d))])

    if not runs:
        print("No runs found!")
        return

    latest_run = os.path.join(log_base, runs[-1])
    print(f"Analyzing run: {runs[-1]}")

    metrics = extract_metrics(latest_run)

    plot_training_curves(metrics, 'training_curves.png')

    print("\nExtracted metrics:")
    for tag, data in metrics.items():
        if data['values']:
            values = np.array(data['values'])
            print(f"  {tag}: min={values.min():.2f}, max={values.max():.2f}, final={values[-1]:.2f}")

if __name__ == '__main__':
    main()