setup

natolambert · Aug 12, 2024 · 3a77bbf · 3a77bbf
1 parent afdf126
commit 3a77bbf
Show file tree

Hide file tree

Showing 20 changed files with 144 additions and 60 deletions.
diff --git a/chapters/01-introduction.md → chapters/01-1-introduction.md b/chapters/01-introduction.md → chapters/01-1-introduction.md
diff --git a/chapters/01-2-preferences.md b/chapters/01-2-preferences.md
diff --git a/chapters/01-3-optimization.md b/chapters/01-3-optimization.md
diff --git a/chapters/01-4-related-works.md b/chapters/01-4-related-works.md
diff --git a/chapters/02-optimization.md → chapters/02-1-setup.md b/chapters/02-optimization.md → chapters/02-1-setup.md
diff --git a/chapters/02-2-preference-data.md b/chapters/02-2-preference-data.md
@@ -0,0 +1 @@
+# Preference Data
diff --git a/chapters/02-3-reward-models.md b/chapters/02-3-reward-models.md
@@ -0,0 +1 @@
+# Reward Modeling
diff --git a/chapters/02-4-regularization.md b/chapters/02-4-regularization.md
@@ -0,0 +1 @@
+# Regularization
diff --git a/chapters/03-1-instructions.md b/chapters/03-1-instructions.md
@@ -0,0 +1 @@
+# Instruction Tuning
diff --git a/chapters/03-opt-rejection-sampling.md → chapters/03-2-rejection-sampling.md b/chapters/03-opt-rejection-sampling.md → chapters/03-2-rejection-sampling.md
diff --git a/chapters/03-3-policy-gradients.md b/chapters/03-3-policy-gradients.md
@@ -0,0 +1 @@
+# Policy Gradient Algorithms
diff --git a/chapters/03-4-direct-alignment.md b/chapters/03-4-direct-alignment.md
@@ -0,0 +1 @@
+# Direct Alignment Algorithms
diff --git a/chapters/04-1-cai.md b/chapters/04-1-cai.md
@@ -0,0 +1 @@
+# Constitutional AI
diff --git a/chapters/04-2-synthetic.md b/chapters/04-2-synthetic.md
@@ -0,0 +1 @@
+# Synthetic Data
diff --git a/chapters/04-3-evaluation.md b/chapters/04-3-evaluation.md
@@ -0,0 +1 @@
+# Evaluation
diff --git a/chapters/05-1-over-optimization.md b/chapters/05-1-over-optimization.md
@@ -0,0 +1 @@
+# Over Optimization
diff --git a/metadata.yml b/metadata.yml
@@ -1,5 +1,5 @@
 ---
-title: Reinforcement Learning from Human Feedback
+title: Reinforcement Learning from Human Feedback Basics
 biblio-title: Bibliography
 reference-section-title: Bibliography
 author: Nathan Lambert

diff --git a/templates/chapter.html b/templates/chapter.html
@@ -43,61 +43,63 @@ <h1 class="title">$title$</h1>
 <p class="subtitle">$subtitle$</p>
 $endif$
 
-  <nav class="chapter-nav">
-    <div class="section">
-      <p><strong>Home</strong></p>
-      <ul>
-        <li><a href="https://github.com/natolambert/rlhf-book">GitHub Repository</a></li>
-      </ul>
-    </div>
+<nav class="chapter-nav">
+  <div class="section">
+    <ul>
+      <li><a href="https://www.rlhfbook.com">Home</a></li>
+      <li><a href="https://github.com/natolambert/rlhf-book">GitHub Repository</a></li>
+      <li>PDF (Soon)</li>
+      <li>Order a copy (Soon)</li>
+    </ul>
+  </div>
 
-    <div class="section">
-      <p><strong>Introductions &amp; History</strong></p>
-      <ol>
-        <li>Introduction</li>
-        <li>Economics, Psychology, Philosophy of preference, etc.: VNM Theory, Bradley Terry, Impossibility theorems, social choice, etc</li>
-        <li>Optimal Control, Deep RL, ML etc.</li>
-        <li>RLHF for LLM lit (pre chatgpt stuff), maybe summarize instrugpt</li>
-      </ol>
-    </div>
+  <div class="section">
+    <p><strong>Introductions</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/01-1-introduction.html">Introduction</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-2-preferences.html">What are preferences?</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-3-optimization.html">Optimization and RL</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-4-related-works.html">Seminal (Recent) Works</a></li>
+    </ol>
+  </div>
 
-    <div class="section">
-      <p><strong>Problem Specification</strong></p>
-      <ol>
-        <li>Definitions, basic stuff, math</li>
-        <li>Preference data collection</li>
-        <li>Preference model training</li>
-        <li>KL constraints and other penalties</li>
-      </ol>
-    </div>
+  <div class="section">
+    <p><strong>Problem Setup</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/02-1-setup.html">Definitions</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-2-preference-data.html">Preference Data</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-3-reward-models.html">Reward Modeling</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-4-regularization.html">Regularization</a></li>
+    </ol>
+  </div>
 
-    <div class="section">
-      <p><strong>Policy Optimization</strong></p>
-      <ol>
-        <li>IFT / SFT / Chat Templates</li>
-        <li>Rejection Sampling / Best of N</li>
-        <li>PPO, REINFORCE, Policy Gradient</li>
-        <li>DPO (Eric, Archit, Rafael)</li>
-        <li>Other variants (short)</li>
-      </ol>
-    </div>
+  <div class="section">
+    <p><strong>Optimization</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/03-1-instructions.html">Instruction Tuning</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-2-rejection-sampling.html">Rejection Sampling</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-3-policy-gradients.html">Policy Gradients</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-4-direct-alignment.html">Direct Alignment Algorithms</a></li>
+    </ol>
+  </div>
 
-    <div class="section">
-      <p><strong>Advanced (optional)</strong></p>
-      <ol>
-        <li>CAI</li>
-        <li>Synthetic vs human data</li>
-        <li>Evaluation</li>
-      </ol>
-    </div>
+  <div class="section">
+    <p><strong>Advanced (TBD)</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/04-1-cai.html">Constitutional AI</a></li>
+      <li><a href="https://www.rlhfbook.com/c/04-2-synthetic.html">Synthetic Data</a></li>
+      <li><a href="https://www.rlhfbook.com/c/04-3-evaluation.html">Evaluation</a></li>
+    </ol>
+  </div>
 
-    <div class="section">
-      <p><strong>Open Questions (TBD / optional)</strong></p>
-      <ol>
-        <li>Reward model over-optimization</li>
-      </ol>
-    </div>
-  </nav>
+  <div class="section">
+    <p><strong>Open Questions (TBD)</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/05-1-over-optimization.html">Over-optimization</a></li>
+      <li>Style</li>
+    </ol>
+  </div>
+</nav>
 
 
 $for(author)$

diff --git a/templates/html.html b/templates/html.html
@@ -42,6 +42,65 @@ <h1 class="title">$title$</h1>
 $if(subtitle)$
 <p class="subtitle">$subtitle$</p>
 $endif$
+
+<nav class="chapter-nav">
+  <div class="section">
+    <ul>
+      <li><a href="https://www.rlhfbook.com">Home</a></li>
+      <li><a href="https://github.com/natolambert/rlhf-book">GitHub Repository</a></li>
+      <li>PDF (Soon)</li>
+      <li>Order a copy (Soon)</li>
+    </ul>
+  </div>
+
+  <div class="section">
+    <p><strong>Introductions</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/01-1-introduction.html">Introduction</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-2-preferences.html">What are preferences?</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-3-optimization.html">Optimization and RL</a></li>
+      <li><a href="https://www.rlhfbook.com/c/01-4-related-works.html">Seminal (Recent) Works</a></li>
+    </ol>
+  </div>
+
+  <div class="section">
+    <p><strong>Problem Setup</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/02-1-setup.html">Definitions</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-2-preference-data.html">Preference Data</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-3-reward-models.html">Reward Modeling</a></li>
+      <li><a href="https://www.rlhfbook.com/c/02-4-regularization.html">Regularization</a></li>
+    </ol>
+  </div>
+
+  <div class="section">
+    <p><strong>Optimization</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/03-1-instructions.html">Instruction Tuning</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-2-rejection-sampling.html">Rejection Sampling</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-3-policy-gradients.html">Policy Gradients</a></li>
+      <li><a href="https://www.rlhfbook.com/c/03-4-direct-alignment.html">Direct Alignment Algorithms</a></li>
+    </ol>
+  </div>
+
+  <div class="section">
+    <p><strong>Advanced (TBD)</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/04-1-cai.html">Constitutional AI</a></li>
+      <li><a href="https://www.rlhfbook.com/c/04-2-synthetic.html">Synthetic Data</a></li>
+      <li><a href="https://www.rlhfbook.com/c/04-3-evaluation.html">Evaluation</a></li>
+    </ol>
+  </div>
+
+  <div class="section">
+    <p><strong>Open Questions (TBD)</strong></p>
+    <ol>
+      <li><a href="https://www.rlhfbook.com/c/05-1-over-optimization.html">Over-optimization</a></li>
+      <li>Style</li>
+    </ol>
+  </div>
+</nav>
+
 $for(author)$
 <p class="author">$author$</p>
 $endfor$
@@ -52,14 +111,6 @@ <h1 class="title">$title$</h1>
 <div class="abstract">
   <h2>Abstract</h2>
   $abstract$
-  <h2>Contents</h2>
-  <ol>
-      <li><a href="c/01-introduction.html">Introduction</a></li>
-      <li><a href="c/02-installation.html">TODO</a></li>
-      <li><a href="c/03-usage.html">TODO</a></li>
-      <li><a href="c/04-references.html">TODO</a></li>
-      <!-- Add more chapters as needed -->
-  </ol>
 </div>
 $endif$
 </header>

diff --git a/templates/style.css b/templates/style.css
@@ -342,36 +342,57 @@ table td {
 }
 
 /* Header Nav Block */
-.chapter-nav {
+  .chapter-nav {
     display: grid;
     grid-template-columns: repeat(3, 1fr);
     gap: 10px;
     max-width: 1200px;
     margin: 0 auto;
+    text-align: left;
   }
   .section {
     background-color: #f8f8f8;
     padding: 10px;
     border-radius: 5px;
+    text-align: left;
   }
   .section p {
     margin: 0 0 5px 0;
     font-size: 14px;
     font-weight: bold;
+    text-align: left;
   }
   .section ol, .section ul {
     margin: 0;
     padding-left: 20px;
+    text-align: left;
   }
   .section li {
     font-size: 12px;
     line-height: 1.3;
     margin-bottom: 3px;
+    text-align: left;
   }
   .section a {
     color: #0066cc;
     text-decoration: none;
   }
   .section a:hover {
     text-decoration: underline;
+  }
+
+  /* Mobile Responsiveness */
+  @media screen and (max-width: 768px) {
+    .chapter-nav {
+      grid-template-columns: 1fr;
+    }
+    .section {
+      margin-bottom: 10px;
+    }
+    .section p {
+      font-size: 16px;
+    }
+    .section li {
+      font-size: 14px;
+    }
   }