Files
rl-atari/强化学习个人项目报告(Atari 游戏方向)/tex/report.aux
T
Serendipity 1c1cccd3f6 feat: 添加模型评估脚本并更新实验报告
- 添加 evaluate_checkpoints.py 脚本,用于评估训练过程中的检查点模型
- 更新 generate_plots.py 以支持从真实评估结果生成图表
- 更新实验报告内容,包含具体实验结果数据和分析
- 添加中文支持并更新作者信息
- 生成评估结果JSON文件和相应图表
2026-05-01 18:44:22 +08:00

47 lines
5.2 KiB
TeX

\relax
\providecommand\hyper@newdestlabel[2]{}
\providecommand*\HyPL@Entry[1]{}
\HyPL@Entry{0<</S/D>>}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}{section.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Game Selection and Challenges}{1}{subsection.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Motivation}{1}{subsection.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {2}Literature Review}{2}{section.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Deep Reinforcement Learning in Atari Games}{2}{subsection.2.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Algorithm Comparison}{2}{subsection.2.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Comparison of reinforcement learning algorithms}}{2}{table.caption.1}\protected@file@percent }
\providecommand*\caption@xref[2]{\@setref\relax\@undefined{#1}}
\newlabel{tab:algorithm_comparison}{{1}{2}{Comparison of reinforcement learning algorithms}{table.caption.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {3}Algorithm and Implementation}{3}{section.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}DQN Algorithm}{3}{subsection.3.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Q-Learning Foundation}{3}{subsubsection.3.1.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Experience Replay}{3}{subsubsection.3.1.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Target Network}{3}{subsubsection.3.1.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.4}Double DQN Extension}{3}{subsubsection.3.1.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Network Architecture}{3}{subsection.3.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Network architecture details}}{4}{table.caption.2}\protected@file@percent }
\newlabel{tab:network}{{2}{4}{Network architecture details}{table.caption.2}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Environment Preprocessing}{4}{subsection.3.3}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Training Details}{4}{subsection.3.4}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {3}{\ignorespaces Training hyperparameters}}{4}{table.caption.3}\protected@file@percent }
\newlabel{tab:hyperparameters}{{3}{4}{Training hyperparameters}{table.caption.3}{}}
\@writefile{toc}{\contentsline {section}{\numberline {4}Experimental Results}{4}{section.4}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Training Performance}{4}{subsection.4.1}\protected@file@percent }
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Training curves showing reward, loss, and Q-value evolution}}{5}{figure.caption.4}\protected@file@percent }
\newlabel{fig:training_curves}{{1}{5}{Training curves showing reward, loss, and Q-value evolution}{figure.caption.4}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Evaluation reward at different training checkpoints with standard deviation error bars}}{5}{figure.caption.5}\protected@file@percent }
\newlabel{fig:evaluation_curve}{{2}{5}{Evaluation reward at different training checkpoints with standard deviation error bars}{figure.caption.5}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Epsilon decay curve during training}}{6}{figure.caption.6}\protected@file@percent }
\newlabel{fig:epsilon_decay}{{3}{6}{Epsilon decay curve during training}{figure.caption.6}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Evaluation Results}{6}{subsection.4.2}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {4}{\ignorespaces Evaluation results at different training checkpoints}}{6}{table.caption.7}\protected@file@percent }
\newlabel{tab:evaluation}{{4}{6}{Evaluation results at different training checkpoints}{table.caption.7}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Comparison with Baselines}{6}{subsection.4.3}\protected@file@percent }
\@writefile{lot}{\contentsline {table}{\numberline {5}{\ignorespaces Comparison with baselines}}{6}{table.caption.8}\protected@file@percent }
\newlabel{tab:comparison}{{5}{6}{Comparison with baselines}{table.caption.8}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Discussion}{7}{section.5}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Performance Analysis}{7}{subsection.5.1}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.2}Limitations}{7}{subsection.5.2}\protected@file@percent }
\@writefile{toc}{\contentsline {subsection}{\numberline {5.3}Potential Improvements}{7}{subsection.5.3}\protected@file@percent }
\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{7}{section.6}\protected@file@percent }
\gdef \@abspage@last{8}