Divide into reinforcement learning and transformers to separate repos…

…itories for efficiency and less bugs when size increases.
hallvardnmbu · Mar 12, 2024 · 4cae69d · 4cae69d
1 parent 0ded3f3
commit 4cae69d
Show file tree

Hide file tree

Showing 36 changed files with 38 additions and 53 deletions.
diff --git a/README.txt b/README.txt
@@ -1,12 +1,7 @@
-Modern applied deep learning with reinforcement and Transformer model methodology
+Modern applied deep learning with reinforcement methodology
 
----
-
-Special syllabus at NMBU (Norwegian University of Life Sciences)
-Spring 2024
-
-* Hallvard H. Lavik
-* Leo Q. T. Bækholt
+Special syllabus Spring 2024
+Norwegian University of Life Sciences (NMBU)
 
 ---
 
@@ -15,33 +10,21 @@ used.
 
 ---
 
-Syllabus:
-
-Reinforcement Learning:
-- "Human-level control through deep reinforcement learning" (doi:10.1038/nature14236)
-- "Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm" (arXiv:1712.01815v1)
-
-Transformer:
-- "Geometry of deep learning" (ISBN 978-981-16-6046-7)
-  - Chapter 9.3 ("Attention")
-  - Chapter 9.4.5 ("Transformer")
-  - Chapter 9.4.7 ("Generative Pre-trained Transformer (GPT)")
-- "Attention Is All You Need" (arXiv:1706.03762v7)
-- "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding" (arXiv:1810.04805v2)
-- "An image is worth 16x16 words: Transformers for image recognition at scale" (arXiv:2010.11929v2)
+Reinforcement learning:
+- "Human-level control through deep reinforcement learning"
+                                    doi:10.1038/nature14236
+- "Mastering Chess and Shogi by Self-Play with a General Reinforcement Learning Algorithm"
+                                                                        arXiv:1712.01815v1
 
 ---
 
 Learning goals:
 
 - Understand and know how to build, use and deploy reinforcement learning algorithms
-  * Experiment with reinforcement agent(s) (for instance playing chess)
-- Understand and know how to build, use and deploy Transformer architectures
-  * Experiment with architectures and applications (for instance, a language translator)
-
----
+  * Experiment with reinforcement agent(s) (for instance playing video-games)
 
 Learning outcomes:
 
 - Be competent in modern deep learning situations
-  * Understand (and to some extent be able to reproduce) cutting-edge “artificial intelligence” models
+  * Understand (and to some extent be able to reproduce) cutting-edge “artificial intelligence”
+    models
diff --git a/reinforcement-learning/breakout/DQN.py → breakout/DQN.py b/reinforcement-learning/breakout/DQN.py → breakout/DQN.py
diff --git a/...forcement-learning/breakout/example.ipynb → breakout/example.ipynb b/...forcement-learning/breakout/example.ipynb → breakout/example.ipynb
@@ -16,8 +16,8 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "WEIGHTS = './weights-10000'  # NB: without '.pth'\n",
-    "METRICS = './metrics.csv'"
+    "WEIGHTS = './_output/weights-15000.pth'\n",
+    "METRICS = './_output/metrics.csv'"
    ],
    "metadata": {
     "collapsed": false
@@ -37,8 +37,8 @@
     "from DQN import VisionDeepQ\n",
     "\n",
     "sys.path.append(\"../\")\n",
-    "from utilities.visualisation.plot import visualise_csv_grouped_rewards  # noqa\n",
-    "from utilities.visualisation.gif import gif_stacked  # noqa"
+    "from utilities.visualisation.plot import graph                                              # noqa\n",
+    "from utilities.visualisation.movie import movie                                             # noqa"
    ],
    "metadata": {
     "collapsed": false
@@ -66,15 +66,17 @@
     "    \"kernels\": [8, 4, 3],\n",
     "    \"padding\": [\"valid\", \"valid\", \"valid\"],\n",
     "    \"strides\": [4, 2, 1],\n",
-    "    \"nodes\": [128],\n",
+    "    \"nodes\": [],\n",
     "}\n",
     "optimizer = {\n",
     "    \"optimizer\": torch.optim.Adam,\n",
-    "    \"lr\": 0.0000625,\n",
-    "    \"hyperparameters\": {\"eps\": 1.5e-4}\n",
+    "    \"lr\": 1e-5,\n",
+    "    \"hyperparameters\": {}\n",
     "}\n",
     "shape = {\n",
     "    \"original\": (1, 1, 210, 160),\n",
+    "    \"width\": slice(7, -7),\n",
+    "    \"height\": slice(31, -17),\n",
     "    \"max_pooling\": 2,\n",
     "}\n",
     "skip = 4"
@@ -101,10 +103,10 @@
    "source": [
     "value_agent = VisionDeepQ(\n",
     "    network=network, optimizer=optimizer, shape=shape,\n",
-    "    exploration_rate=1.0,\n",
+    "    exploration_rate=0.002,\n",
     ")\n",
     "\n",
-    "weights = torch.load(f'{WEIGHTS}.pth', map_location=torch.device('cpu'))\n",
+    "weights = torch.load(WEIGHTS, map_location=torch.device('cpu'))\n",
     "value_agent.load_state_dict(weights)\n",
     "\n",
     "environment = gym.make('ALE/Breakout-v5', render_mode=\"rgb_array\",\n",
@@ -141,7 +143,7 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "visualise_csv_grouped_rewards(METRICS, title=\"Training history\", window=20) if METRICS else None\n",
+    "graph(METRICS, title=\"Training history\", window=20) if METRICS else None\n",
     "plt.show() if METRICS else None"
    ],
    "metadata": {
@@ -164,7 +166,7 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "gif_stacked(environment, value_agent, f'./{WEIGHTS}.gif', skip)"
+    "movie(environment, value_agent, './_output/breakout.avi', fps=60)"
    ],
    "metadata": {
     "collapsed": false

diff --git a/reinforcement-learning/breakout/train.py → breakout/train.py b/reinforcement-learning/breakout/train.py → breakout/train.py
diff --git a/reinforcement-learning/cart-pole/DQN.ipynb → cart-pole/DQN.ipynb b/reinforcement-learning/cart-pole/DQN.ipynb → cart-pole/DQN.ipynb
diff --git a/reinforcement-learning/cart-pole/DQN.py → cart-pole/DQN.py b/reinforcement-learning/cart-pole/DQN.py → cart-pole/DQN.py
diff --git a/...cement-learning/cart-pole/REINFORCE.ipynb → cart-pole/REINFORCE.ipynb b/...cement-learning/cart-pole/REINFORCE.ipynb → cart-pole/REINFORCE.ipynb
diff --git a/...forcement-learning/cart-pole/REINFORCE.py → cart-pole/REINFORCE.py b/...forcement-learning/cart-pole/REINFORCE.py → cart-pole/REINFORCE.py
diff --git a/...orcement-learning/cart-pole/mlx/DQN.ipynb → cart-pole/mlx/DQN.ipynb b/...orcement-learning/cart-pole/mlx/DQN.ipynb → cart-pole/mlx/DQN.ipynb
diff --git a/reinforcement-learning/cart-pole/mlx/DQN.py → cart-pole/mlx/DQN.py b/reinforcement-learning/cart-pole/mlx/DQN.py → cart-pole/mlx/DQN.py
diff --git a/...nt-learning/cart-pole/mlx/REINFORCE.ipynb → cart-pole/mlx/REINFORCE.ipynb b/...nt-learning/cart-pole/mlx/REINFORCE.ipynb → cart-pole/mlx/REINFORCE.ipynb
diff --git a/...ement-learning/cart-pole/mlx/REINFORCE.py → cart-pole/mlx/REINFORCE.py b/...ement-learning/cart-pole/mlx/REINFORCE.py → cart-pole/mlx/REINFORCE.py
diff --git a/reinforcement-learning/enduro/DQN.py → enduro/DQN.py b/reinforcement-learning/enduro/DQN.py → enduro/DQN.py
diff --git a/reinforcement-learning/enduro/example.ipynb → enduro/example.ipynb b/reinforcement-learning/enduro/example.ipynb → enduro/example.ipynb
@@ -16,8 +16,8 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "WEIGHTS = './weights-10000'  # NB: without '.pth'\n",
-    "METRICS = './metrics.csv'"
+    "WEIGHTS = './_output/weights-0.pth'\n",
+    "METRICS = None #'./_output/metrics.csv'"
    ],
    "metadata": {
     "collapsed": false
@@ -37,8 +37,8 @@
     "from DQN import VisionDeepQ\n",
     "\n",
     "sys.path.append(\"../\")\n",
-    "from utilities.visualisation.plot import visualise_csv  # noqa\n",
-    "from utilities.visualisation.gif import gif_stacked  # noqa"
+    "from utilities.visualisation.plot import visualise_csv                                       # noqa\n",
+    "from utilities.visualisation.gif import gif                                                  # noqa"
    ],
    "metadata": {
     "collapsed": false
@@ -61,16 +61,16 @@
    "outputs": [],
    "source": [
     "network = {\n",
-    "    \"input_channels\": 10, \"outputs\": 8,\n",
+    "    \"input_channels\": 2, \"outputs\": 9,\n",
     "    \"channels\": [32, 64, 64],\n",
     "    \"kernels\": [8, 4, 3],\n",
     "    \"padding\": [\"valid\", \"valid\", \"valid\"],\n",
     "    \"strides\": [4, 2, 1],\n",
     "    \"nodes\": [512],\n",
     "}\n",
     "optimizer = {\n",
-    "    \"optimizer\": torch.optim.Adam,\n",
-    "    \"lr\": 0.00025,\n",
+    "    \"optimizer\": torch.optim.RMSprop,\n",
+    "    \"lr\": 0.0001,\n",
     "    \"hyperparameters\": {}\n",
     "}\n",
     "shape = {\n",
@@ -105,7 +105,7 @@
     "    exploration_rate=0.01,\n",
     ")\n",
     "\n",
-    "weights = torch.load(f'{WEIGHTS}.pth', map_location=torch.device('cpu'))\n",
+    "weights = torch.load(WEIGHTS, map_location=torch.device('cpu'))\n",
     "value_agent.load_state_dict(weights)\n",
     "\n",
     "environment = gym.make('ALE/Enduro-v5', render_mode=\"rgb_array\",\n",
@@ -165,7 +165,7 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "gif_stacked(environment, value_agent, f'./{WEIGHTS}.gif', skip)"
+    "gif(environment, value_agent, './_output/enduro-0.gif', skip, 25)"
    ],
    "metadata": {
     "collapsed": false

diff --git a/reinforcement-learning/enduro/train.py → enduro/train.py b/reinforcement-learning/enduro/train.py → enduro/train.py
diff --git a/reinforcement-learning/frozen-lake/Q.ipynb → frozen-lake/Q.ipynb b/reinforcement-learning/frozen-lake/Q.ipynb → frozen-lake/Q.ipynb
diff --git a/reinforcement-learning/frozen-lake/Q.py → frozen-lake/Q.py b/reinforcement-learning/frozen-lake/Q.py → frozen-lake/Q.py
diff --git a/reinforcement-learning/tetris/_output/tetris.gif b/reinforcement-learning/tetris/_output/tetris.gif
diff --git a/reinforcement-learning/report.pdf → report.pdf b/reinforcement-learning/report.pdf → report.pdf
diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,6 @@
 # Might need: brew install swig libjpeg libpng
 
-gymnasium[all]
+gymnasium[atari]
 autorom[accept-rom-license]
 
 torch

diff --git a/reinforcement-learning/tetris/DQN.py → tetris/DQN.py b/reinforcement-learning/tetris/DQN.py → tetris/DQN.py
diff --git a/...ement-learning/tetris/_output/weights.pth → tetris/_output/weights.pth b/...ement-learning/tetris/_output/weights.pth → tetris/_output/weights.pth
diff --git a/reinforcement-learning/tetris/example.ipynb → tetris/example.ipynb b/reinforcement-learning/tetris/example.ipynb → tetris/example.ipynb
@@ -41,8 +41,8 @@
     "from DQN import DeepQ\n",
     "\n",
     "sys.path.append(\"../\")\n",
-    "from utilities.visualisation.plot import visualise_csv_grouped_rewards  # noqa\n",
-    "from utilities.visualisation.gif import gif_stacked  # noqa"
+    "from utilities.visualisation.plot import graph                                              # noqa\n",
+    "from utilities.visualisation.gif import gif                                                 # noqa"
    ],
    "metadata": {
     "collapsed": false,
@@ -160,7 +160,7 @@
     }
    ],
    "source": [
-    "visualise_csv_grouped_rewards(METRICS, title=\"Tetris (RAM)\", window=50) if METRICS else None\n",
+    "graph(METRICS, title=\"Tetris (RAM)\", window=50) if METRICS else None\n",
     "plt.savefig('./_output/metrics.png') if METRICS else None\n",
     "plt.show() if METRICS else None"
    ],
@@ -188,7 +188,7 @@
    "cell_type": "code",
    "outputs": [],
    "source": [
-    "gif_stacked(environment, value_agent, './_output/tetris.gif', skip)"
+    "gif(environment, value_agent, './_output/tetris.gif', skip)"
    ],
    "metadata": {
     "collapsed": false,

diff --git a/reinforcement-learning/tetris/mlx/DQN.ipynb → tetris/mlx/DQN.ipynb b/reinforcement-learning/tetris/mlx/DQN.ipynb → tetris/mlx/DQN.ipynb
diff --git a/reinforcement-learning/tetris/mlx/DQN.py → tetris/mlx/DQN.py b/reinforcement-learning/tetris/mlx/DQN.py → tetris/mlx/DQN.py
diff --git a/reinforcement-learning/tetris/train.py → tetris/train.py b/reinforcement-learning/tetris/train.py → tetris/train.py
diff --git a/...tris/transfer-learning/DQN-ResNet18.ipynb → tetris/transfer-learning/DQN-ResNet18.ipynb b/...tris/transfer-learning/DQN-ResNet18.ipynb → tetris/transfer-learning/DQN-ResNet18.ipynb
diff --git a/...-learning/tetris/transfer-learning/DQN.py → tetris/transfer-learning/DQN.py b/...-learning/tetris/transfer-learning/DQN.py → tetris/transfer-learning/DQN.py
diff --git a/transformer/empty b/transformer/empty
diff --git a/...nt-learning/utilities/orion-hpc/README.md → utilities/orion-hpc/README.md b/...nt-learning/utilities/orion-hpc/README.md → utilities/orion-hpc/README.md
diff --git a/...ies/orion-hpc/singularity/singularity.def → ...ies/orion-hpc/singularity/singularity.def b/...ies/orion-hpc/singularity/singularity.def → ...ies/orion-hpc/singularity/singularity.def
diff --git a/...ties/orion-hpc/singularity/singularity.sh → ...ties/orion-hpc/singularity/singularity.sh b/...ties/orion-hpc/singularity/singularity.sh → ...ties/orion-hpc/singularity/singularity.sh
diff --git a/...ent-learning/utilities/orion-hpc/train.sh → utilities/orion-hpc/train.sh b/...ent-learning/utilities/orion-hpc/train.sh → utilities/orion-hpc/train.sh
diff --git a/...t-learning/utilities/visualisation/gif.py → utilities/visualisation/gif.py b/...t-learning/utilities/visualisation/gif.py → utilities/visualisation/gif.py
diff --git a/...learning/utilities/visualisation/movie.py → utilities/visualisation/movie.py b/...learning/utilities/visualisation/movie.py → utilities/visualisation/movie.py
diff --git a/...-learning/utilities/visualisation/plot.py → utilities/visualisation/plot.py b/...-learning/utilities/visualisation/plot.py → utilities/visualisation/plot.py