From 3a77bbfdbedebe31ecfd6f0de43f1d0c330776cb Mon Sep 17 00:00:00 2001 From: Nathan Lambert Date: Mon, 12 Aug 2024 08:49:04 -0400 Subject: [PATCH] setup --- ...1-introduction.md => 01-1-introduction.md} | 0 chapters/01-2-preferences.md | 0 chapters/01-3-optimization.md | 0 chapters/01-4-related-works.md | 0 .../{02-optimization.md => 02-1-setup.md} | 0 chapters/02-2-preference-data.md | 1 + chapters/02-3-reward-models.md | 1 + chapters/02-4-regularization.md | 1 + chapters/03-1-instructions.md | 1 + ...sampling.md => 03-2-rejection-sampling.md} | 0 chapters/03-3-policy-gradients.md | 1 + chapters/03-4-direct-alignment.md | 1 + chapters/04-1-cai.md | 1 + chapters/04-2-synthetic.md | 1 + chapters/04-3-evaluation.md | 1 + chapters/05-1-over-optimization.md | 1 + metadata.yml | 2 +- templates/chapter.html | 102 +++++++++--------- templates/html.html | 67 ++++++++++-- templates/style.css | 23 +++- 20 files changed, 144 insertions(+), 60 deletions(-) rename chapters/{01-introduction.md => 01-1-introduction.md} (100%) create mode 100644 chapters/01-2-preferences.md create mode 100644 chapters/01-3-optimization.md create mode 100644 chapters/01-4-related-works.md rename chapters/{02-optimization.md => 02-1-setup.md} (100%) create mode 100644 chapters/02-2-preference-data.md create mode 100644 chapters/02-3-reward-models.md create mode 100644 chapters/02-4-regularization.md create mode 100644 chapters/03-1-instructions.md rename chapters/{03-opt-rejection-sampling.md => 03-2-rejection-sampling.md} (100%) create mode 100644 chapters/03-3-policy-gradients.md create mode 100644 chapters/03-4-direct-alignment.md create mode 100644 chapters/04-1-cai.md create mode 100644 chapters/04-2-synthetic.md create mode 100644 chapters/04-3-evaluation.md create mode 100644 chapters/05-1-over-optimization.md diff --git a/chapters/01-introduction.md b/chapters/01-1-introduction.md similarity index 100% rename from chapters/01-introduction.md rename to chapters/01-1-introduction.md diff --git a/chapters/01-2-preferences.md b/chapters/01-2-preferences.md new file mode 100644 index 0000000..e69de29 diff --git a/chapters/01-3-optimization.md b/chapters/01-3-optimization.md new file mode 100644 index 0000000..e69de29 diff --git a/chapters/01-4-related-works.md b/chapters/01-4-related-works.md new file mode 100644 index 0000000..e69de29 diff --git a/chapters/02-optimization.md b/chapters/02-1-setup.md similarity index 100% rename from chapters/02-optimization.md rename to chapters/02-1-setup.md diff --git a/chapters/02-2-preference-data.md b/chapters/02-2-preference-data.md new file mode 100644 index 0000000..7561250 --- /dev/null +++ b/chapters/02-2-preference-data.md @@ -0,0 +1 @@ +# Preference Data \ No newline at end of file diff --git a/chapters/02-3-reward-models.md b/chapters/02-3-reward-models.md new file mode 100644 index 0000000..40a9975 --- /dev/null +++ b/chapters/02-3-reward-models.md @@ -0,0 +1 @@ +# Reward Modeling \ No newline at end of file diff --git a/chapters/02-4-regularization.md b/chapters/02-4-regularization.md new file mode 100644 index 0000000..be47fef --- /dev/null +++ b/chapters/02-4-regularization.md @@ -0,0 +1 @@ +# Regularization \ No newline at end of file diff --git a/chapters/03-1-instructions.md b/chapters/03-1-instructions.md new file mode 100644 index 0000000..0695aa8 --- /dev/null +++ b/chapters/03-1-instructions.md @@ -0,0 +1 @@ +# Instruction Tuning \ No newline at end of file diff --git a/chapters/03-opt-rejection-sampling.md b/chapters/03-2-rejection-sampling.md similarity index 100% rename from chapters/03-opt-rejection-sampling.md rename to chapters/03-2-rejection-sampling.md diff --git a/chapters/03-3-policy-gradients.md b/chapters/03-3-policy-gradients.md new file mode 100644 index 0000000..6f174d7 --- /dev/null +++ b/chapters/03-3-policy-gradients.md @@ -0,0 +1 @@ +# Policy Gradient Algorithms \ No newline at end of file diff --git a/chapters/03-4-direct-alignment.md b/chapters/03-4-direct-alignment.md new file mode 100644 index 0000000..89d7a15 --- /dev/null +++ b/chapters/03-4-direct-alignment.md @@ -0,0 +1 @@ +# Direct Alignment Algorithms \ No newline at end of file diff --git a/chapters/04-1-cai.md b/chapters/04-1-cai.md new file mode 100644 index 0000000..e42f35a --- /dev/null +++ b/chapters/04-1-cai.md @@ -0,0 +1 @@ +# Constitutional AI \ No newline at end of file diff --git a/chapters/04-2-synthetic.md b/chapters/04-2-synthetic.md new file mode 100644 index 0000000..5958f43 --- /dev/null +++ b/chapters/04-2-synthetic.md @@ -0,0 +1 @@ +# Synthetic Data \ No newline at end of file diff --git a/chapters/04-3-evaluation.md b/chapters/04-3-evaluation.md new file mode 100644 index 0000000..5f5eb37 --- /dev/null +++ b/chapters/04-3-evaluation.md @@ -0,0 +1 @@ +# Evaluation \ No newline at end of file diff --git a/chapters/05-1-over-optimization.md b/chapters/05-1-over-optimization.md new file mode 100644 index 0000000..710a3f2 --- /dev/null +++ b/chapters/05-1-over-optimization.md @@ -0,0 +1 @@ +# Over Optimization \ No newline at end of file diff --git a/metadata.yml b/metadata.yml index 1a7e649..81a3ea1 100644 --- a/metadata.yml +++ b/metadata.yml @@ -1,5 +1,5 @@ --- -title: Reinforcement Learning from Human Feedback +title: Reinforcement Learning from Human Feedback Basics biblio-title: Bibliography reference-section-title: Bibliography author: Nathan Lambert diff --git a/templates/chapter.html b/templates/chapter.html index 21d4b81..fa41e48 100644 --- a/templates/chapter.html +++ b/templates/chapter.html @@ -43,61 +43,63 @@

$title$

$subtitle$

$endif$ - $for(author)$ diff --git a/templates/html.html b/templates/html.html index 8c75bde..105dfe4 100644 --- a/templates/html.html +++ b/templates/html.html @@ -42,6 +42,65 @@

$title$

$if(subtitle)$

$subtitle$

$endif$ + + + $for(author)$

$author$

$endfor$ @@ -52,14 +111,6 @@

$title$

Abstract

$abstract$ -

Contents

-
    -
  1. Introduction
  2. -
  3. TODO
  4. -
  5. TODO
  6. -
  7. TODO
  8. - -
$endif$ diff --git a/templates/style.css b/templates/style.css index bb37e58..30c4347 100644 --- a/templates/style.css +++ b/templates/style.css @@ -342,31 +342,36 @@ table td { } /* Header Nav Block */ -.chapter-nav { + .chapter-nav { display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; max-width: 1200px; margin: 0 auto; + text-align: left; } .section { background-color: #f8f8f8; padding: 10px; border-radius: 5px; + text-align: left; } .section p { margin: 0 0 5px 0; font-size: 14px; font-weight: bold; + text-align: left; } .section ol, .section ul { margin: 0; padding-left: 20px; + text-align: left; } .section li { font-size: 12px; line-height: 1.3; margin-bottom: 3px; + text-align: left; } .section a { color: #0066cc; @@ -374,4 +379,20 @@ table td { } .section a:hover { text-decoration: underline; + } + + /* Mobile Responsiveness */ + @media screen and (max-width: 768px) { + .chapter-nav { + grid-template-columns: 1fr; + } + .section { + margin-bottom: 10px; + } + .section p { + font-size: 16px; + } + .section li { + font-size: 14px; + } } \ No newline at end of file