b32490ae03
修复 replay buffer 中 log_probs 的维度错误,从 (buffer_size, action_dim) 改为 buffer_size 修正训练时状态张量格式,从 (N, H, W, C) 转换为 (N, C, H, W) 更新 collect_rollout 返回观测值并修正 log_prob 计算 添加项目配置文件和训练曲线生成脚本
108 lines
3.8 KiB
Python
108 lines
3.8 KiB
Python
"""Generate training plots from TensorBoard logs."""
|
|
import os
|
|
import numpy as np
|
|
from tensorboard.backend.event_processing import event_accumulator
|
|
import matplotlib.pyplot as plt
|
|
|
|
def extract_metrics(log_dir):
|
|
"""Extract metrics from TensorBoard log directory."""
|
|
ea = event_accumulator.EventAccumulator(log_dir)
|
|
ea.Reload()
|
|
|
|
metrics = {}
|
|
for tag in ea.Tags()['scalars']:
|
|
events = ea.Scalars(tag)
|
|
steps = [e.step for e in events]
|
|
values = [e.value for e in events]
|
|
metrics[tag] = {'steps': steps, 'values': values}
|
|
|
|
return metrics
|
|
|
|
def smooth(data, weight=0.6):
|
|
"""Exponential moving average for smoothing."""
|
|
smoothed = []
|
|
last = data[0]
|
|
for point in data:
|
|
smoothed_val = last * weight + (1 - weight) * point
|
|
smoothed.append(smoothed_val)
|
|
last = smoothed_val
|
|
return smoothed
|
|
|
|
def plot_training_curves(metrics, save_path):
|
|
"""Plot training curves."""
|
|
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
|
|
|
|
episodes = metrics.get('Reward/Episode', {}).get('steps', [])
|
|
ep_rewards = metrics.get('Reward/Episode', {}).get('values', [])
|
|
avg_rewards = metrics.get('Reward/AvgLast10', {}).get('values', [])
|
|
|
|
if episodes and ep_rewards:
|
|
axes[0, 0].plot(episodes, ep_rewards, alpha=0.3, label='Episode Reward')
|
|
if avg_rewards:
|
|
axes[0, 0].plot(episodes, smooth(avg_rewards), 'r-', linewidth=2, label='Smoothed (EMA)')
|
|
axes[0, 0].set_xlabel('Training Steps')
|
|
axes[0, 0].set_ylabel('Episode Reward')
|
|
axes[0, 0].set_title('Training Episode Reward')
|
|
axes[0, 0].legend()
|
|
axes[0, 0].grid(True, alpha=0.3)
|
|
|
|
eval_steps = metrics.get('Eval/MeanReturn', {}).get('steps', [])
|
|
eval_returns = metrics.get('Eval/MeanReturn', {}).get('values', [])
|
|
|
|
if eval_steps and eval_returns:
|
|
axes[0, 1].plot(eval_steps, eval_returns, 'g-', linewidth=2, marker='o', markersize=4)
|
|
axes[0, 1].set_xlabel('Episode')
|
|
axes[0, 1].set_ylabel('Mean Evaluation Return')
|
|
axes[0, 1].set_title('Evaluation Performance')
|
|
axes[0, 1].grid(True, alpha=0.3)
|
|
|
|
actor_loss_steps = metrics.get('Loss/Actor', {}).get('steps', [])
|
|
actor_losses = metrics.get('Loss/Actor', {}).get('values', [])
|
|
|
|
if actor_loss_steps and actor_losses:
|
|
axes[1, 0].plot(actor_loss_steps, smooth(actor_losses), 'b-', linewidth=1.5)
|
|
axes[1, 0].set_xlabel('Training Steps')
|
|
axes[1, 0].set_ylabel('Actor Loss')
|
|
axes[1, 0].set_title('Actor Loss Over Training')
|
|
axes[1, 0].grid(True, alpha=0.3)
|
|
|
|
critic_loss_steps = metrics.get('Loss/Critic', {}).get('steps', [])
|
|
critic_losses = metrics.get('Loss/Critic', {}).get('values', [])
|
|
|
|
if critic_loss_steps and critic_losses:
|
|
axes[1, 1].plot(critic_loss_steps, smooth(critic_losses), 'purple', linewidth=1.5)
|
|
axes[1, 1].set_xlabel('Training Steps')
|
|
axes[1, 1].set_ylabel('Critic Loss')
|
|
axes[1, 1].set_title('Critic Loss Over Training')
|
|
axes[1, 1].grid(True, alpha=0.3)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(save_path, dpi=150, bbox_inches='tight')
|
|
plt.close()
|
|
print(f"Plots saved to {save_path}")
|
|
|
|
def main():
|
|
log_base = 'logs/tensorboard'
|
|
|
|
runs = sorted([d for d in os.listdir(log_base) if os.path.isdir(os.path.join(log_base, d))])
|
|
|
|
if not runs:
|
|
print("No runs found!")
|
|
return
|
|
|
|
latest_run = os.path.join(log_base, runs[-1])
|
|
print(f"Analyzing run: {runs[-1]}")
|
|
|
|
metrics = extract_metrics(latest_run)
|
|
|
|
plot_training_curves(metrics, 'training_curves.png')
|
|
|
|
print("\nExtracted metrics:")
|
|
for tag, data in metrics.items():
|
|
if data['values']:
|
|
values = np.array(data['values'])
|
|
print(f" {tag}: min={values.min():.2f}, max={values.max():.2f}, final={values[-1]:.2f}")
|
|
|
|
if __name__ == '__main__':
|
|
main()
|