Files
rl-atari/强化学习个人项目报告/tex/CW1_1234560.aux
T
Serendipity 6b929e9790 docs: 添加强化学习项目报告及相关文件
添加完整的强化学习个人项目报告,包括PDF文档、LaTeX源文件、训练曲线图、TensorBoard日志以及改进的训练脚本。报告详细记录了从零实现PPO算法解决CarRacing-v3环境的过程,包含算法设计、网络架构、超参数配置和实验结果分析。
2026-04-30 22:59:14 +08:00

36 lines
3.6 KiB
TeX

\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\HyPL@Entry{1<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Background: The CarRacing-v3 Environment}{1}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}State Space}{1}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Action Space}{1}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Reward Mechanism}{2}{subsection.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {3}Algorithm: Proximal Policy Optimization}{2}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Policy Gradient Foundation}{2}{subsection.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2} clipped Surrogate Objective}{2}{subsection.3.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Generalized Advantage Estimation}{3}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {4}Network Architecture}{3}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Actor Network (Policy)}{3}{subsection.4.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Actor Network Architecture}}{3}{figure.caption.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Critic Network (Value)}{3}{subsection.4.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Critic Network Architecture}}{3}{figure.caption.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Implementation Details}{4}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Hyperparameters}{4}{subsection.5.1}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Hyperparameter Configuration}}{4}{table.caption.3}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:hyperparams}{{1}{4}{Hyperparameter Configuration}{table.caption.3}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Training Pipeline}{4}{subsection.5.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Problems and Solutions}{4}{subsection.5.3}\protected@file@percent }
ine {1}{\ignorespaces Training and Evaluation Curves}}{6}{figure.caption.2}\protected@file@percent }
\newlabel{fig:training_curves}{{1}{6}{Training and Evaluation Curves}{figure.caption.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {6.2}Test Evaluation}{6}{subsection.6.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {6.3}Comparison with Baselines}{6}{subsection.6.3}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Comparison with Stable-Baselines3 PPO}}{6}{table.caption.3}\protected@file@percent }
\newlabel{tab:comparison}{{2}{6}{Comparison with Stable-Baselines3 PPO}{table.caption.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {7}Conclusion}{7}{section.7}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {8}References}{7}{section.8}\protected@file@percent }
\gdef \@abspage@last{8}