From b3f7d70dfdefbd867b9a076ec05e66847b9e8816 Mon Sep 17 00:00:00 2001 From: Matteo Bettini Date: Mon, 22 Apr 2024 16:17:18 +0100 Subject: [PATCH] link --- tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py index eadadcd97ae..2b1f4f3bbbf 100644 --- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py +++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py @@ -59,7 +59,7 @@ # This kind of algorithm is typicall trained off-policy. For more info on off-policy learning see # *Sutton, Richard S., and Andrew G. Barto. Reinforcement learning: An introduction. MIT press, 2018*. # -# .. figure:: https://github.com/matteobettini/vmas-media/blob/main/media/off-policy-vmas-loop.png?raw=true +# .. figure:: https://pytorch.s3.amazonaws.com/torchrl/github-artifacts/img/off-policy-vmas-loop-min.png # :alt: Off-policy learning # # Off-policy learning @@ -232,7 +232,7 @@ n_evaders = 1 n_obstacles = 2 -use_vmas = False # Set this to True for a great performance speedup +use_vmas = True # Set this to True for a great performance speedup if not use_vmas: env = PettingZooEnv( @@ -775,6 +775,7 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase: # Training/collection iterations for iteration, batch in enumerate(collector): + break current_frames = batch.numel() batch = process_batch(batch) # Util to expand done keys if needed # Loop over groups