Files
rl-atari/强化学习个人项目报告(Atari 游戏方向)/tex/report.aux
T
2026-05-05 15:30:07 +08:00

40 lines
4.5 KiB
TeX

\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Game and Motivation}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Related Work}{1}{subsection.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Algorithm}{2}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}DQN Basics}{2}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Double DQN}{2}{subsection.2.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Dueling Architecture}{2}{subsection.2.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Prioritized Experience Replay}{2}{subsection.2.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5}Network and Preprocessing}{3}{subsection.2.5}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Dueling Q-Network architecture}}{3}{table.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:network}{{1}{3}{Dueling Q-Network architecture}{table.caption.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.6}Hyperparameters}{3}{subsection.2.6}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Training hyperparameters}}{3}{table.caption.2}\protected@file@percent }
\newlabel{tab:hyperparameters}{{2}{3}{Training hyperparameters}{table.caption.2}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Results}{4}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Training Progress}{4}{subsection.3.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Training reward, loss, and Q-value over 500 episodes}}{4}{figure.caption.3}\protected@file@percent }
\newlabel{fig:training_curves}{{1}{4}{Training reward, loss, and Q-value over 500 episodes}{figure.caption.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces $\epsilon $ decay from 1.0 to 0.01 over 1M steps}}{5}{figure.caption.4}\protected@file@percent }
\newlabel{fig:epsilon_decay}{{2}{5}{$\epsilon $ decay from 1.0 to 0.01 over 1M steps}{figure.caption.4}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Checkpoint Evaluation}{5}{subsection.3.2}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Evaluation scores across checkpoints, mean $\pm $ std over 10 episodes}}{5}{figure.caption.5}\protected@file@percent }
\newlabel{fig:evaluation_curve}{{3}{5}{Evaluation scores across checkpoints, mean $\pm $ std over 10 episodes}{figure.caption.5}{}}
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Evaluation at each checkpoint, 10 episodes each. The live-tracked best model underperformed most saved checkpoints when re-evaluated.}}{6}{table.caption.6}\protected@file@percent }
\newlabel{tab:evaluation}{{3}{6}{Evaluation at each checkpoint, 10 episodes each. The live-tracked best model underperformed most saved checkpoints when re-evaluated}{table.caption.6}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Baseline Comparison}{6}{subsection.3.3}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Score comparison. Note the large difference in training budget.}}{7}{table.caption.7}\protected@file@percent }
\newlabel{tab:comparison}{{4}{7}{Score comparison. Note the large difference in training budget}{table.caption.7}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Discussion}{7}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Why Things Worked (When They Did)}{7}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Why Things Didn't Converge}{7}{subsection.4.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}What Would Help}{7}{subsection.4.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{8}{section.5}\protected@file@percent }
\gdef \@abspage@last{8}