thu-ml · Trinkle23897 · Oct 4, 2022 · Sep 25, 2022 · Sep 26, 2022 · Oct 2, 2022
diff --git a/docs/tutorials/tictactoe.rst b/docs/tutorials/tictactoe.rst
@@ -428,6 +428,7 @@ With the above preparation, we are close to the first learned agent. The followi
         agent_opponent: Optional[BasePolicy] = None,
     ) -> None:
         env = get_env()
+        env = DummyVectorEnv([lambda: env])
 if isinstance(env, gym.Env) and not hasattr(env, "__len__"): 
     warnings.warn("Single environment detected, wrap to DummyVectorEnv.") 
     self.env = DummyVectorEnv([lambda: env])  # type: ignore 
 if isinstance(env, gym.Env) and not hasattr(env, "__len__"): 
     warnings.warn("Single environment detected, wrap to DummyVectorEnv.") 
     self.env = DummyVectorEnv([lambda: env])  # type: ignore 
         policy, optim, agents = get_agents(
             args, agent_learn=agent_learn, agent_opponent=agent_opponent
         )