HumanCompatibleAI · AdamGleave · Dec 11, 2023 · Dec 7, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/readthedocs.yml → .readthedocs.yml b/readthedocs.yml → .readthedocs.yml
diff --git a/docs/conf.py b/docs/conf.py
@@ -55,7 +55,7 @@
 autosummary_generate = True
 
 nb_execution_mode = os.getenv("NB_EXECUTION_MODE", "cache")
-nb_execution_timeout = 120
+nb_execution_timeout = 200
 nb_merge_streams = True
 nb_output_stderr = "remove"
 nb_execution_raise_on_error = True

diff --git a/docs/tutorials/3_train_gail.ipynb b/docs/tutorials/3_train_gail.ipynb
@@ -78,7 +78,9 @@
    "source": [
     "Now we are ready to set up our GAIL trainer.\n",
     "Note, that the `reward_net` is actually the network of the discriminator.\n",
-    "We evaluate the learner before and after training so we can see if it made any progress."
+    "We evaluate the learner before and after training so we can see if it made any progress.\n",
+    "\n",
+    "First we construct a GAIL trainer ..."
    ]
   },
   {
@@ -117,16 +119,57 @@
     "    venv=env,\n",
     "    gen_algo=learner,\n",
     "    reward_net=reward_net,\n",
-    ")\n",
-    "\n",
-    "# evaluate the learner before training\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "... then we evaluate it before training ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "env.seed(SEED)\n",
     "learner_rewards_before_training, _ = evaluate_policy(\n",
     "    learner, env, 100, return_episode_rewards=True\n",
-    ")\n",
-    "\n",
-    "# train the learner and evaluate again\n",
-    "gail_trainer.train(200_000)\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "... and train it ..."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "gail_trainer.train(200_000)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "... and finally evaluate it again."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "env.seed(SEED)\n",
     "learner_rewards_after_training, _ = evaluate_policy(\n",
     "    learner, env, 100, return_episode_rewards=True\n",

diff --git a/setup.py b/setup.py
@@ -19,6 +19,8 @@
 ]
 PYTYPE = ["pytype==2023.9.27"] if IS_NOT_WINDOWS else []
 
+HYPOTHESIS = ["hypothesis~=6.54.1"]
+
 # Note: the versions of the test and doc requirements should be tightly pinned to known
 #   working versions to make our CI/CD pipeline as stable as possible.
 TESTS_REQUIRE = (
@@ -36,7 +38,6 @@
         "flake8-debugger~=4.1.2",
         "flake8-docstrings~=1.6.0",
         "flake8-isort~=4.1.2",
-        "hypothesis~=6.54.1",
         "ipykernel~=6.15.1",
         "jupyter~=1.0.0",
         # TODO: upgrade jupyter-client once
@@ -58,18 +59,24 @@
     + PARALLEL_REQUIRE
     + ATARI_REQUIRE
     + PYTYPE
+    + HYPOTHESIS
+)
+DOCS_REQUIRE = (
+    [
+        "sphinx~=5.1.1",
+        "sphinx-autodoc-typehints~=1.19.1",
+        "sphinx-rtd-theme~=1.0.0",
+        "sphinxcontrib-napoleon==0.7",
+        "furo==2022.6.21",
+        "sphinx-copybutton==0.5.0",
+        "sphinx-github-changelog~=1.2.0",
+        "myst-nb==0.17.2",
+        "ipykernel~=6.15.2",
+    ]
+    + ATARI_REQUIRE
+    + PARALLEL_REQUIRE
+    + HYPOTHESIS
 )
-DOCS_REQUIRE = [
-    "sphinx~=5.1.1",
-    "sphinx-autodoc-typehints~=1.19.1",
-    "sphinx-rtd-theme~=1.0.0",
-    "sphinxcontrib-napoleon==0.7",
-    "furo==2022.6.21",
-    "sphinx-copybutton==0.5.0",
-    "sphinx-github-changelog~=1.2.0",
-    "myst-nb==0.17.2",
-    "ipykernel~=6.15.2",
-] + ATARI_REQUIRE
 
 
 def get_readme() -> str: