Files
rl-atari/强化学习个人项目报告(Atari 游戏方向)/tex/report.aux
T
Serendipity b474e7976e feat: 更新Atari项目报告并添加训练曲线生成功能
更新LaTeX报告以反映最新的评估结果(最佳得分32.50),添加Dueling DQN架构说明,并改进训练曲线生成脚本。脚本现在能够生成ε衰减曲线和模拟训练曲线,为报告提供更全面的可视化支持。同时添加了CLAUDE.md项目概览文档,整理了三个子项目的环境配置和常用命令。
2026-05-03 13:39:37 +08:00

47 lines
5.2 KiB
TeX

\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Game Selection and Challenges}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Motivation}{1}{subsection.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Literature Review}{2}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Deep Reinforcement Learning in Atari Games}{2}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Algorithm Comparison}{2}{subsection.2.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Comparison of reinforcement learning algorithms}}{2}{table.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:algorithm_comparison}{{1}{2}{Comparison of reinforcement learning algorithms}{table.caption.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Algorithm and Implementation}{3}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}DQN Algorithm}{3}{subsection.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Q-Learning Foundation}{3}{subsubsection.3.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Experience Replay}{3}{subsubsection.3.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Target Network}{3}{subsubsection.3.1.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.4}Double DQN Extension}{3}{subsubsection.3.1.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Network Architecture}{3}{subsection.3.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Network architecture details}}{4}{table.caption.2}\protected@file@percent }
\newlabel{tab:network}{{2}{4}{Network architecture details}{table.caption.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Environment Preprocessing}{4}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Training Details}{4}{subsection.3.4}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Training hyperparameters}}{4}{table.caption.3}\protected@file@percent }
\newlabel{tab:hyperparameters}{{3}{4}{Training hyperparameters}{table.caption.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Experimental Results}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Training Performance}{4}{subsection.4.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Training curves showing reward, loss, and Q-value evolution}}{5}{figure.caption.4}\protected@file@percent }
\newlabel{fig:training_curves}{{1}{5}{Training curves showing reward, loss, and Q-value evolution}{figure.caption.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Evaluation reward at different training checkpoints with standard deviation error bars}}{5}{figure.caption.5}\protected@file@percent }
\newlabel{fig:evaluation_curve}{{2}{5}{Evaluation reward at different training checkpoints with standard deviation error bars}{figure.caption.5}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Epsilon decay curve during training}}{6}{figure.caption.6}\protected@file@percent }
\newlabel{fig:epsilon_decay}{{3}{6}{Epsilon decay curve during training}{figure.caption.6}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Evaluation Results}{6}{subsection.4.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Evaluation results at different training checkpoints}}{6}{table.caption.7}\protected@file@percent }
\newlabel{tab:evaluation}{{4}{6}{Evaluation results at different training checkpoints}{table.caption.7}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Comparison with Baselines}{7}{subsection.4.3}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Comparison with baselines}}{7}{table.caption.8}\protected@file@percent }
\newlabel{tab:comparison}{{5}{7}{Comparison with baselines}{table.caption.8}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{7}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Performance Analysis}{7}{subsection.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Limitations}{7}{subsection.5.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Potential Improvements}{7}{subsection.5.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{8}{section.6}\protected@file@percent }
\gdef \@abspage@last{8}