From b3f7d70dfdefbd867b9a076ec05e66847b9e8816 Mon Sep 17 00:00:00 2001
From: Matteo Bettini <mb2389@cl.cam.ac.uk>
Date: Mon, 22 Apr 2024 16:17:18 +0100
Subject: [PATCH] link

---
 tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
index eadadcd97ae..2b1f4f3bbbf 100644
--- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
+++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
@@ -59,7 +59,7 @@
 # This kind of algorithm is typicall trained off-policy. For more info on off-policy learning see
 # *Sutton, Richard S., and Andrew G. Barto. Reinforcement learning: An introduction. MIT press, 2018*.
 #
-# .. figure:: https://github.com/matteobettini/vmas-media/blob/main/media/off-policy-vmas-loop.png?raw=true
+# .. figure:: https://pytorch.s3.amazonaws.com/torchrl/github-artifacts/img/off-policy-vmas-loop-min.png
 #    :alt: Off-policy learning
 #
 #    Off-policy learning
@@ -232,7 +232,7 @@
 n_evaders = 1
 n_obstacles = 2
 
-use_vmas = False  # Set this to True for a great performance speedup
+use_vmas = True  # Set this to True for a great performance speedup
 
 if not use_vmas:
     env = PettingZooEnv(
@@ -775,6 +775,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase:
 
 # Training/collection iterations
 for iteration, batch in enumerate(collector):
+    break
     current_frames = batch.numel()
     batch = process_batch(batch)  # Util to expand done keys if needed
     # Loop over groups