diff --git a/docs/misc/changelog.rst b/docs/misc/changelog.rst
index a44a03fe3..6a5691be7 100644
--- a/docs/misc/changelog.rst
+++ b/docs/misc/changelog.rst
@@ -10,10 +10,12 @@ Pre-Release 0.4.0a0 (WIP)
 Breaking Changes:
 ^^^^^^^^^^^^^^^^^
 - Removed CEMRL
+- Model saved with previous versions cannot be loaded (because of the pre-preprocessing)
 
 New Features:
 ^^^^^^^^^^^^^
 - Add support for Discrete observation spaces
+- Add saving/loading for policy weights, so the policy can be used without the model
 
 Bug Fixes:
 ^^^^^^^^^^
@@ -26,6 +28,8 @@ Others:
 ^^^^^^^
 - Refactor handling of observation and action spaces
 - Refactored features extraction to have proper preprocessing
+- Refactored action distributions
+
 
 Documentation:
 ^^^^^^^^^^^^^^
diff --git a/tests/test_distributions.py b/tests/test_distributions.py
index eb9fbaf07..4a1294c47 100644
--- a/tests/test_distributions.py
+++ b/tests/test_distributions.py
@@ -38,7 +38,8 @@ def test_squashed_gaussian(model_class):
     gaussian_mean = th.rand(N_SAMPLES, N_ACTIONS)
     dist = SquashedDiagGaussianDistribution(N_ACTIONS)
     _, log_std = dist.proba_distribution_net(N_FEATURES)
-    actions, _ = dist.proba_distribution(gaussian_mean, log_std)
+    dist = dist.proba_distribution(gaussian_mean, log_std)
+    actions = dist.get_action()
     assert th.max(th.abs(actions)) <= 1.0
 
 def test_sde_distribution():
@@ -51,7 +52,8 @@ def test_sde_distribution():
     _, log_std = dist.proba_distribution_net(N_FEATURES)
     dist.sample_weights(log_std, batch_size=N_SAMPLES)
 
-    actions, _ = dist.proba_distribution(deterministic_actions, log_std, state)
+    dist = dist.proba_distribution(deterministic_actions, log_std, state)
+    actions = dist.get_action()
 
     assert th.allclose(actions.mean(), dist.distribution.mean.mean(), rtol=1e-3)
     assert th.allclose(actions.std(), dist.distribution.scale.mean(), rtol=1e-3)
@@ -71,11 +73,12 @@ def test_entropy(dist):
     _, log_std = dist.proba_distribution_net(N_FEATURES, log_std_init=th.log(th.tensor(0.2)))
 
     if isinstance(dist, DiagGaussianDistribution):
-        actions, dist = dist.proba_distribution(deterministic_actions, log_std)
+        dist = dist.proba_distribution(deterministic_actions, log_std)
     else:
         dist.sample_weights(log_std, batch_size=N_SAMPLES)
-        actions, dist = dist.proba_distribution(deterministic_actions, log_std, state)
+        dist = dist.proba_distribution(deterministic_actions, log_std, state)
 
+    actions = dist.get_action()
     entropy = dist.entropy()
     log_prob = dist.log_prob(actions)
     assert th.allclose(entropy.mean(), -log_prob.mean(), rtol=5e-3)
@@ -88,8 +91,9 @@ def test_categorical():
     set_random_seed(1)
     state = th.rand(N_SAMPLES, N_FEATURES)
     action_logits = th.rand(N_SAMPLES, N_ACTIONS)
-    actions, dist = dist.proba_distribution(action_logits)
+    dist = dist.proba_distribution(action_logits)
 
+    actions = dist.get_action()
     entropy = dist.entropy()
     log_prob = dist.log_prob(actions)
     assert th.allclose(entropy.mean(), -log_prob.mean(), rtol=1e-4)
diff --git a/tests/test_identity.py b/tests/test_identity.py
index 87dc6acbd..b717c0a56 100644
--- a/tests/test_identity.py
+++ b/tests/test_identity.py
@@ -20,7 +20,7 @@ def test_continuous(model_class):
     env = IdentityEnvBox(eps=0.5)
 
     n_steps = {
-        A2C: 3000,
+        A2C: 3500,
         PPO: 3000,
         SAC: 700,
         TD3: 500
diff --git a/tests/test_save_load.py b/tests/test_save_load.py
index 076ec924f..64fe2481b 100644
--- a/tests/test_save_load.py
+++ b/tests/test_save_load.py
@@ -16,7 +16,7 @@
     SAC,
 ]
 
-
+#
 @pytest.mark.parametrize("model_class", MODEL_LIST)
 def test_save_load(model_class):
     """
@@ -160,3 +160,61 @@ def test_save_load_replay_buffer(model_class):
 
     # clear file from os
     os.remove(replay_path)
+
+
+@pytest.mark.parametrize("model_class", MODEL_LIST)
+def test_save_load_policy(model_class):
+    """
+    Test saving and loading policy only.
+
+    :param model_class: (BaseRLModel) A RL model
+    """
+    env = DummyVecEnv([lambda: IdentityEnvBox(10)])
+
+    # create model
+    model = model_class('MlpPolicy', env, policy_kwargs=dict(net_arch=[16]), verbose=1, create_eval_env=True)
+    model.learn(total_timesteps=500, eval_freq=250)
+
+    env.reset()
+    observations = np.array([env.step(env.action_space.sample())[0] for _ in range(10)])
+    observations = observations.reshape(10, -1)
+
+    policy = model.policy
+
+    # Get dictionary of current parameters
+    params = deepcopy(policy.state_dict())
+
+    # Modify all parameters to be random values
+    random_params = dict((param_name, th.rand_like(param)) for param_name, param in params.items())
+
+    # Update model parameters with the new random values
+    policy.load_state_dict(random_params)
+
+    new_params = policy.state_dict()
+    # Check that all params are different now
+    for k in params:
+        assert not th.allclose(params[k], new_params[k]), "Parameters did not change as expected."
+
+    params = new_params
+
+    # get selected actions
+    selected_actions, _ = policy.predict(observations, deterministic=True)
+
+    # Save and load policy
+    policy.save("./logs/policy_weights.pkl")
+    # del policy
+    policy.load("./logs/policy_weights.pkl")
+
+    # check if params are still the same after load
+    new_params = policy.state_dict()
+
+    # Check that all params are the same as before save load procedure now
+    for key in params:
+        assert th.allclose(params[key], new_params[key]), "Policy parameters not the same after save and load."
+
+    # check if model still selects the same actions
+    new_selected_actions, _ = policy.predict(observations, deterministic=True)
+    assert np.allclose(selected_actions, new_selected_actions, 1e-4)
+
+    # clear file from os
+    os.remove("./logs/policy_weights.pkl")
diff --git a/torchy_baselines/common/base_class.py b/torchy_baselines/common/base_class.py
index c171a4cc7..3c36d49cc 100644
--- a/torchy_baselines/common/base_class.py
+++ b/torchy_baselines/common/base_class.py
@@ -158,27 +158,6 @@ def _get_eval_env(self, eval_env: Optional[GymEnv]) -> Optional[GymEnv]:
             assert eval_env.num_envs == 1
         return eval_env
 
-    def scale_action(self, action: np.ndarray) -> np.ndarray:
-        """
-        Rescale the action from [low, high] to [-1, 1]
-        (no need for symmetric action space)
-
-        :param action: (np.ndarray) Action to scale
-        :return: (np.ndarray) Scaled action
-        """
-        low, high = self.action_space.low, self.action_space.high
-        return 2.0 * ((action - low) / (high - low)) - 1.0
-
-    def unscale_action(self, scaled_action: np.ndarray) -> np.ndarray:
-        """
-        Rescale the action from [-1, 1] to [low, high]
-        (no need for symmetric action space)
-
-        :param scaled_action: Action to un-scale
-        """
-        low, high = self.action_space.low, self.action_space.high
-        return low + (0.5 * (scaled_action + 1.0) * (high - low))
-
     def _setup_lr_schedule(self) -> None:
         """Transform to callable if needed."""
         self.lr_schedule = get_schedule_fn(self.learning_rate)
@@ -318,57 +297,6 @@ def learn(self, total_timesteps: int,
         """
         raise NotImplementedError()
 
-    @staticmethod
-    def _is_vectorized_observation(observation: np.ndarray, observation_space: gym.spaces.Space) -> bool:
-        """
-        For every observation type, detects and validates the shape,
-        then returns whether or not the observation is vectorized.
-
-        :param observation: (np.ndarray) the input observation to validate
-        :param observation_space: (gym.spaces) the observation space
-        :return: (bool) whether the given observation is vectorized or not
-        """
-        if isinstance(observation_space, gym.spaces.Box):
-            if observation.shape == observation_space.shape:
-                return False
-            elif observation.shape[1:] == observation_space.shape:
-                return True
-            else:
-                raise ValueError("Error: Unexpected observation shape {} for ".format(observation.shape) +
-                                 "Box environment, please use {} ".format(observation_space.shape) +
-                                 "or (n_env, {}) for the observation shape."
-                                 .format(", ".join(map(str, observation_space.shape))))
-        elif isinstance(observation_space, gym.spaces.Discrete):
-            if observation.shape == ():  # A numpy array of a number, has shape empty tuple '()'
-                return False
-            elif len(observation.shape) == 1:
-                return True
-            else:
-                raise ValueError("Error: Unexpected observation shape {} for ".format(observation.shape) +
-                                 "Discrete environment, please use (1,) or (n_env, 1) for the observation shape.")
-        # TODO: add support for MultiDiscrete and MultiBinary observation spaces
-        # elif isinstance(observation_space, gym.spaces.MultiDiscrete):
-        #     if observation.shape == (len(observation_space.nvec),):
-        #         return False
-        #     elif len(observation.shape) == 2 and observation.shape[1] == len(observation_space.nvec):
-        #         return True
-        #     else:
-        #         raise ValueError("Error: Unexpected observation shape {} for MultiDiscrete ".format(observation.shape) +
-        #                          "environment, please use ({},) or ".format(len(observation_space.nvec)) +
-        #                          "(n_env, {}) for the observation shape.".format(len(observation_space.nvec)))
-        # elif isinstance(observation_space, gym.spaces.MultiBinary):
-        #     if observation.shape == (observation_space.n,):
-        #         return False
-        #     elif len(observation.shape) == 2 and observation.shape[1] == observation_space.n:
-        #         return True
-        #     else:
-        #         raise ValueError("Error: Unexpected observation shape {} for MultiBinary ".format(observation.shape) +
-        #                          "environment, please use ({},) or ".format(observation_space.n) +
-        #                          "(n_env, {}) for the observation shape.".format(observation_space.n))
-        else:
-            raise ValueError("Error: Cannot determine if the observation is vectorized with the space type {}."
-                             .format(observation_space))
-
     def predict(self, observation: np.ndarray,
                 state: Optional[np.ndarray] = None,
                 mask: Optional[np.ndarray] = None,
@@ -383,36 +311,7 @@ def predict(self, observation: np.ndarray,
         :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state
             (used in recurrent policies)
         """
-        # TODO: move this block to BasePolicy
-        # if state is None:
-        #     state = self.initial_state
-        # if mask is None:
-        #     mask = [False for _ in range(self.n_envs)]
-        observation = np.array(observation)
-        vectorized_env = self._is_vectorized_observation(observation, self.observation_space)
-
-        observation = observation.reshape((-1,) + self.observation_space.shape)
-        observation = th.as_tensor(observation).to(self.device)
-        with th.no_grad():
-            actions = self.policy.predict(observation, deterministic=deterministic)
-        # Convert to numpy
-        actions = actions.cpu().numpy()
-
-        # Rescale to proper domain when using squashing
-        if isinstance(self.action_space, gym.spaces.Box) and self.policy.squash_output:
-            actions = self.unscale_action(actions)
-
-        clipped_actions = actions
-        # Clip the actions to avoid out of bound error when using gaussian distribution
-        if isinstance(self.action_space, gym.spaces.Box) and not self.policy.squash_output:
-            clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high)
-
-        if not vectorized_env:
-            if state is not None:
-                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
-            clipped_actions = clipped_actions[0]
-
-        return clipped_actions, state
+        return self.policy.predict(observation, state, mask, deterministic)
 
     @classmethod
     def load(cls, load_path: str, env: Optional[GymEnv] = None, **kwargs):
@@ -484,10 +383,7 @@ def _load_from_file(load_path: str, load_data: bool = True) -> (Tuple[Optional[D
                     raise ValueError(f"Error: the file {load_path} could not be found")
 
         # set device to cpu if cuda is not available
-        if th.cuda.is_available():
-            device = th.device('cuda')
-        else:
-            device = th.device('cpu')
+        device = th.device('cuda') if th.cuda.is_available() else th.device('cpu')
 
         # Open the zip archive and load data
         try:
@@ -534,20 +430,6 @@ def _load_from_file(load_path: str, load_data: bool = True) -> (Tuple[Optional[D
                             # load the parameters with the right `map_location`
                             params[os.path.splitext(file_path)[0]] = th.load(file_content, map_location=device)
 
-                # for backward compatibility
-                if params.get('params') is not None:
-                    params_copy = {}
-                    for name in params:
-                        if name == 'params':
-                            params_copy['policy'] = params[name]
-                        elif name == 'opt':
-                            params_copy['policy.optimizer'] = params[name]
-                        # Special case for SAC
-                        elif name == 'ent_coef_optimizer':
-                            params_copy[name] = params[name]
-                        else:
-                            params_copy[name + '.optimizer'] = params[name]
-                    params = params_copy
         except zipfile.BadZipFile:
             # load_path wasn't a zip file
             raise ValueError(f"Error: the file {load_path} wasn't a zip-file")
@@ -925,7 +807,7 @@ def collect_rollouts(self,
                     unscaled_action, _ = self.predict(obs, deterministic=False)
 
                 # Rescale the action from [low, high] to [-1, 1]
-                scaled_action = self.scale_action(unscaled_action)
+                scaled_action = self.policy.scale_action(unscaled_action)
 
                 if self.use_sde:
                     # When using SDE, the action can be out of bounds
@@ -941,7 +823,7 @@ def collect_rollouts(self,
                     clipped_action = np.clip(clipped_action + action_noise(), -1, 1)
 
                 # Rescale and perform action
-                new_obs, reward, done, infos = env.step(self.unscale_action(clipped_action))
+                new_obs, reward, done, infos = env.step(self.policy.unscale_action(clipped_action))
 
                 # Only stop training if return value is False, not when it is None.
                 if callback.on_step() is False:
diff --git a/torchy_baselines/common/distributions.py b/torchy_baselines/common/distributions.py
index baf54aaa3..62d14b099 100644
--- a/torchy_baselines/common/distributions.py
+++ b/torchy_baselines/common/distributions.py
@@ -33,12 +33,52 @@ def entropy(self) -> Optional[th.Tensor]:
 
     def sample(self) -> th.Tensor:
         """
-        returns a sample from the probabilty distribution
+        Returns a sample from the probabilty distribution
 
         :return: (th.Tensor) the stochastic action
         """
         raise NotImplementedError
 
+    def mode(self) -> th.Tensor:
+        """
+        Returns the most likely action (deterministic output)
+        from the probabilty distribution
+
+        :return: (th.Tensor) the stochastic action
+        """
+        raise NotImplementedError
+
+    def get_action(self, deterministic: bool = False) -> th.Tensor:
+        """
+        Return an action according to the probabilty distribution.
+
+        :param deterministic: (bool)
+        :return: (th.Tensor)
+        """
+        if deterministic:
+            return self.mode()
+        else:
+            return self.sample()
+
+    def action_from_params(self, *args, **kwargs) -> th.Tensor:
+        """
+        Returns a sample from the probabilty distribution
+        given its parameters.
+
+        :return: (th.Tensor) the action
+        """
+        raise NotImplementedError
+
+    def log_prob_from_params(self, *args, **kwargs) -> Tuple[th.Tensor, th.Tensor]:
+        """
+        Returns a sample and the associated log probabilty
+        from the probabilty distribution
+        given its parameters.
+
+        :return: (th.Tuple[th.Tensor, th.Tensor]) action and log prob
+        """
+        raise NotImplementedError
+
 
 def sum_independent_dims(tensor: th.Tensor) -> th.Tensor:
     """
@@ -88,23 +128,17 @@ def proba_distribution_net(self, latent_dim: int,
         return mean_actions, log_std
 
     def proba_distribution(self, mean_actions: th.Tensor,
-                           log_std: th.Tensor,
-                           deterministic: bool = False) -> Tuple[th.Tensor, 'DiagGaussianDistribution']:
+                           log_std: th.Tensor) -> 'DiagGaussianDistribution':
         """
-        Create and sample for the distribution given its parameters (mean, std)
+        Create the distribution given its parameters (mean, std)
 
         :param mean_actions: (th.Tensor)
         :param log_std: (th.Tensor)
-        :param deterministic: (bool)
-        :return: (th.Tensor)
+        :return: (DiagGaussianDistribution)
         """
         action_std = th.ones_like(mean_actions) * log_std.exp()
         self.distribution = Normal(mean_actions, action_std)
-        if deterministic:
-            action = self.mode()
-        else:
-            action = self.sample()
-        return action, self
+        return self
 
     def mode(self) -> th.Tensor:
         return self.distribution.mean
@@ -115,7 +149,15 @@ def sample(self) -> th.Tensor:
     def entropy(self) -> th.Tensor:
         return sum_independent_dims(self.distribution.entropy())
 
-    def log_prob_from_params(self, mean_actions: th.Tensor, log_std: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+    def action_from_params(self, mean_actions: th.Tensor,
+                           log_std: th.Tensor,
+                           deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(mean_actions, log_std)
+        return self.get_action(deterministic=deterministic)
+
+    def log_prob_from_params(self, mean_actions: th.Tensor,
+                             log_std: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
         """
         Compute the log probabilty of taking an action
         given the distribution parameters.
@@ -124,7 +166,7 @@ def log_prob_from_params(self, mean_actions: th.Tensor, log_std: th.Tensor) -> T
         :param log_std: (th.Tensor)
         :return: (Tuple[th.Tensor, th.Tensor])
         """
-        action, _ = self.proba_distribution(mean_actions, log_std)
+        action = self.action_from_params(mean_actions, log_std)
         log_prob = self.log_prob(action)
         return action, log_prob
 
@@ -156,10 +198,10 @@ def __init__(self, action_dim: int, epsilon: float = 1e-6):
         self.epsilon = epsilon
         self.gaussian_action = None
 
-    def proba_distribution(self, mean_actions, log_std, deterministic=False):
-        action, _ = super(SquashedDiagGaussianDistribution, self).proba_distribution(mean_actions, log_std,
-                                                                                     deterministic)
-        return action, self
+    def proba_distribution(self, mean_actions: th.Tensor,
+                           log_std: th.Tensor) -> 'SquashedDiagGaussianDistribution':
+        super(SquashedDiagGaussianDistribution, self).proba_distribution(mean_actions, log_std)
+        return self
 
     def mode(self) -> th.Tensor:
         self.gaussian_action = self.distribution.mean
@@ -175,12 +217,14 @@ def sample(self) -> th.Tensor:
         self.gaussian_action = self.distribution.rsample()
         return th.tanh(self.gaussian_action)
 
-    def log_prob_from_params(self, mean_actions, log_std) -> Tuple[th.Tensor, th.Tensor]:
-        action, _ = self.proba_distribution(mean_actions, log_std)
+    def log_prob_from_params(self, mean_actions: th.Tensor,
+                             log_std: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        action = self.action_from_params(mean_actions, log_std)
         log_prob = self.log_prob(action, self.gaussian_action)
         return action, log_prob
 
-    def log_prob(self, action: th.Tensor, gaussian_action: Optional[th.Tensor] = None) -> th.Tensor:
+    def log_prob(self, action: th.Tensor,
+                 gaussian_action: Optional[th.Tensor] = None) -> th.Tensor:
         # Inverse tanh
         # Naive implementation (not stable): 0.5 * torch.log((1 + x) / (1 - x))
         # We use numpy to avoid numerical instability
@@ -220,14 +264,9 @@ def proba_distribution_net(self, latent_dim: int) -> nn.Module:
         action_logits = nn.Linear(latent_dim, self.action_dim)
         return action_logits
 
-    def proba_distribution(self, action_logits: th.Tensor,
-                           deterministic: bool = False) -> Tuple[th.Tensor, 'CategoricalDistribution']:
+    def proba_distribution(self, action_logits: th.Tensor) -> 'CategoricalDistribution':
         self.distribution = Categorical(logits=action_logits)
-        if deterministic:
-            action = self.mode()
-        else:
-            action = self.sample()
-        return action, self
+        return self
 
     def mode(self) -> th.Tensor:
         return th.argmax(self.distribution.probs, dim=1)
@@ -238,8 +277,14 @@ def sample(self) -> th.Tensor:
     def entropy(self) -> th.Tensor:
         return self.distribution.entropy()
 
+    def action_from_params(self, action_logits: th.Tensor,
+                           deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(action_logits)
+        return self.get_action(deterministic=deterministic)
+
     def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
-        action, _ = self.proba_distribution(action_logits)
+        action = self.action_from_params(action_logits)
         log_prob = self.log_prob(action)
         return action, log_prob
 
@@ -283,6 +328,7 @@ def __init__(self, action_dim: int,
         self.weights_dist = None
         self.exploration_mat = None
         self.exploration_matrices = None
+        self._latent_sde = None
         self.use_expln = use_expln
         self.full_std = full_std
         self.epsilon = epsilon
@@ -358,27 +404,26 @@ def proba_distribution_net(self, latent_dim: int, log_std_init: float = -2.0,
 
     def proba_distribution(self, mean_actions: th.Tensor,
                            log_std: th.Tensor,
-                           latent_sde: th.Tensor,
-                           deterministic: bool = False) -> Tuple[th.Tensor, 'StateDependentNoiseDistribution']:
+                           latent_sde: th.Tensor) -> 'StateDependentNoiseDistribution':
         """
         Create and sample for the distribution given its parameters (mean, std)
 
         :param mean_actions: (th.Tensor)
         :param log_std: (th.Tensor)
         :param latent_sde: (th.Tensor)
-        :param deterministic: (bool)
-        :return: (Tuple[th.Tensor, Distribution])
+        :return: (StateDependentNoiseDistribution)
         """
         # Stop gradient if we don't want to influence the features
-        latent_sde = latent_sde if self.learn_features else latent_sde.detach()
+        self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
         variance = th.mm(latent_sde ** 2, self.get_std(log_std) ** 2)
         self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
+        return self
 
+    def get_action(self, deterministic: bool = False) -> th.Tensor:
         if deterministic:
-            action = self.mode()
+            return self.mode()
         else:
-            action = self.sample(latent_sde)
-        return action, self
+            return self.sample(self._latent_sde)
 
     def mode(self) -> th.Tensor:
         action = self.distribution.mean
@@ -412,10 +457,18 @@ def entropy(self) -> Optional[th.Tensor]:
             return None
         return sum_independent_dims(self.distribution.entropy())
 
+    def action_from_params(self, mean_actions: th.Tensor,
+                           log_std: th.Tensor,
+                           latent_sde: th.Tensor,
+                           deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(mean_actions, log_std, latent_sde)
+        return self.get_action(deterministic=deterministic)
+
     def log_prob_from_params(self, mean_actions: th.Tensor,
                              log_std: th.Tensor,
                              latent_sde: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
-        action, _ = self.proba_distribution(mean_actions, log_std, latent_sde)
+        action = self.action_from_params(mean_actions, log_std, latent_sde)
         log_prob = self.log_prob(action)
         return action, log_prob
 
diff --git a/torchy_baselines/common/policies.py b/torchy_baselines/common/policies.py
index 85f0b18f5..d6677d6c3 100644
--- a/torchy_baselines/common/policies.py
+++ b/torchy_baselines/common/policies.py
@@ -63,7 +63,7 @@ def init_weights(module: nn.Module, gain: float = 1):
     def forward(self, *_args, **kwargs):
         raise NotImplementedError()
 
-    def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
+    def _predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
         """
         Get the action according to the policy for a given observation.
 
@@ -73,21 +73,145 @@ def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Ten
         """
         raise NotImplementedError()
 
+    def predict(self, observation: np.ndarray,
+                state: Optional[np.ndarray] = None,
+                mask: Optional[np.ndarray] = None,
+                deterministic: bool = False) -> Tuple[np.ndarray, Optional[np.ndarray]]:
+        """
+        Get the policy action and state from an observation (and optional state).
+
+        :param observation: (np.ndarray) the input observation
+        :param state: (Optional[np.ndarray]) The last states (can be None, used in recurrent policies)
+        :param mask: (Optional[np.ndarray]) The last masks (can be None, used in recurrent policies)
+        :param deterministic: (bool) Whether or not to return deterministic actions.
+        :return: (Tuple[np.ndarray, Optional[np.ndarray]]) the model's action and the next state
+            (used in recurrent policies)
+        """
+        # if state is None:
+        #     state = self.initial_state
+        # if mask is None:
+        #     mask = [False for _ in range(self.n_envs)]
+        observation = np.array(observation)
+        vectorized_env = self._is_vectorized_observation(observation, self.observation_space)
+
+        observation = observation.reshape((-1,) + self.observation_space.shape)
+        observation = th.as_tensor(observation).to(self.device)
+        with th.no_grad():
+            actions = self._predict(observation, deterministic=deterministic)
+        # Convert to numpy
+        actions = actions.cpu().numpy()
+
+        # Rescale to proper domain when using squashing
+        if isinstance(self.action_space, gym.spaces.Box) and self.squash_output:
+            actions = self.unscale_action(actions)
+
+        clipped_actions = actions
+        # Clip the actions to avoid out of bound error when using gaussian distribution
+        if isinstance(self.action_space, gym.spaces.Box) and not self.squash_output:
+            clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high)
+
+        if not vectorized_env:
+            if state is not None:
+                raise ValueError("Error: The environment must be vectorized when using recurrent policies.")
+            clipped_actions = clipped_actions[0]
+
+        return clipped_actions, state
+
+    def scale_action(self, action: np.ndarray) -> np.ndarray:
+        """
+        Rescale the action from [low, high] to [-1, 1]
+        (no need for symmetric action space)
+
+        :param action: (np.ndarray) Action to scale
+        :return: (np.ndarray) Scaled action
+        """
+        low, high = self.action_space.low, self.action_space.high
+        return 2.0 * ((action - low) / (high - low)) - 1.0
+
+    def unscale_action(self, scaled_action: np.ndarray) -> np.ndarray:
+        """
+        Rescale the action from [-1, 1] to [low, high]
+        (no need for symmetric action space)
+
+        :param scaled_action: Action to un-scale
+        """
+        low, high = self.action_space.low, self.action_space.high
+        return low + (0.5 * (scaled_action + 1.0) * (high - low))
+
+    @staticmethod
+    def _is_vectorized_observation(observation: np.ndarray, observation_space: gym.spaces.Space) -> bool:
+        """
+        For every observation type, detects and validates the shape,
+        then returns whether or not the observation is vectorized.
+
+        :param observation: (np.ndarray) the input observation to validate
+        :param observation_space: (gym.spaces) the observation space
+        :return: (bool) whether the given observation is vectorized or not
+        """
+        if isinstance(observation_space, gym.spaces.Box):
+            if observation.shape == observation_space.shape:
+                return False
+            elif observation.shape[1:] == observation_space.shape:
+                return True
+            else:
+                raise ValueError("Error: Unexpected observation shape {} for ".format(observation.shape) +
+                                 "Box environment, please use {} ".format(observation_space.shape) +
+                                 "or (n_env, {}) for the observation shape."
+                                 .format(", ".join(map(str, observation_space.shape))))
+        elif isinstance(observation_space, gym.spaces.Discrete):
+            if observation.shape == ():  # A numpy array of a number, has shape empty tuple '()'
+                return False
+            elif len(observation.shape) == 1:
+                return True
+            else:
+                raise ValueError("Error: Unexpected observation shape {} for ".format(observation.shape) +
+                                 "Discrete environment, please use (1,) or (n_env, 1) for the observation shape.")
+        # TODO: add support for MultiDiscrete and MultiBinary observation spaces
+        # elif isinstance(observation_space, gym.spaces.MultiDiscrete):
+        #     if observation.shape == (len(observation_space.nvec),):
+        #         return False
+        #     elif len(observation.shape) == 2 and observation.shape[1] == len(observation_space.nvec):
+        #         return True
+        #     else:
+        #         raise ValueError("Error: Unexpected observation shape {} for MultiDiscrete ".format(observation.shape) +
+        #                          "environment, please use ({},) or ".format(len(observation_space.nvec)) +
+        #                          "(n_env, {}) for the observation shape.".format(len(observation_space.nvec)))
+        # elif isinstance(observation_space, gym.spaces.MultiBinary):
+        #     if observation.shape == (observation_space.n,):
+        #         return False
+        #     elif len(observation.shape) == 2 and observation.shape[1] == observation_space.n:
+        #         return True
+        #     else:
+        #         raise ValueError("Error: Unexpected observation shape {} for MultiBinary ".format(observation.shape) +
+        #                          "environment, please use ({},) or ".format(observation_space.n) +
+        #                          "(n_env, {}) for the observation shape.".format(observation_space.n))
+        else:
+            raise ValueError("Error: Cannot determine if the observation is vectorized with the space type {}."
+                             .format(observation_space))
+
+
     def save(self, path: str) -> None:
         """
-        Save model to a given location.
+        Save policy weights to a given location.
+        NOTE: we don't save policy parameters
 
         :param path: (str)
         """
+        previous_device = self.device
+        # Convert to cpu before saving
+        self = self.to('cpu')
         th.save(self.state_dict(), path)
+        self = self.to(previous_device)
 
     def load(self, path: str) -> None:
         """
-        Load saved model from path.
+        Load policy weights from path.
+        NOTE: we don't load policy parameters
 
         :param path: (str)
         """
         self.load_state_dict(th.load(path))
+        self = self.to(self.device)
 
     def load_from_vector(self, vector: np.ndarray):
         """
diff --git a/torchy_baselines/ppo/policies.py b/torchy_baselines/ppo/policies.py
index 216790ccb..1589247db 100644
--- a/torchy_baselines/ppo/policies.py
+++ b/torchy_baselines/ppo/policies.py
@@ -156,9 +156,9 @@ def forward(self, obs: th.Tensor,
         latent_pi, latent_vf, latent_sde = self._get_latent(obs)
         # Evaluate the values for the given observations
         value = self.value_net(latent_vf)
-        action, action_distribution = self._get_action_dist_from_latent(latent_pi, latent_sde=latent_sde,
-                                                                        deterministic=deterministic)
-        log_prob = action_distribution.log_prob(action)
+        distribution = self._get_action_dist_from_latent(latent_pi, latent_sde=latent_sde)
+        action = distribution.get_action(deterministic=deterministic)
+        log_prob = distribution.log_prob(action)
         return action, value, log_prob
 
     def _get_latent(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
@@ -180,33 +180,29 @@ def _get_latent(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
         return latent_pi, latent_vf, latent_sde
 
     def _get_action_dist_from_latent(self, latent_pi: th.Tensor,
-                                     latent_sde: Optional[th.Tensor] = None,
-                                     deterministic: bool = False) -> Tuple[th.Tensor, Distribution]:
+                                     latent_sde: Optional[th.Tensor] = None) -> Distribution:
         """
-        Retrieve action and associated action distribution
-        given the latent codes.
+        Retrieve action distribution given the latent codes.
 
         :param latent_pi: (th.Tensor) Latent code for the actor
         :param latent_sde: (Optional[th.Tensor]) Latent code for the SDE exploration function
-        :param deterministic: (bool) Whether to sample or use deterministic actions
-        :return: (Tuple[th.Tensor, Distribution]) Action and action distribution
+        :return: (Distribution) Action distribution
         """
         mean_actions = self.action_net(latent_pi)
 
         if isinstance(self.action_dist, DiagGaussianDistribution):
-            return self.action_dist.proba_distribution(mean_actions, self.log_std, deterministic=deterministic)
+            return self.action_dist.proba_distribution(mean_actions, self.log_std)
 
         elif isinstance(self.action_dist, CategoricalDistribution):
             # Here mean_actions are the logits before the softmax
-            return self.action_dist.proba_distribution(mean_actions, deterministic=deterministic)
+            return self.action_dist.proba_distribution(action_logits=mean_actions)
 
         elif isinstance(self.action_dist, StateDependentNoiseDistribution):
-            return self.action_dist.proba_distribution(mean_actions, self.log_std, latent_sde,
-                                                       deterministic=deterministic)
+            return self.action_dist.proba_distribution(mean_actions, self.log_std, latent_sde)
         else:
             raise ValueError('Invalid action distribution')
 
-    def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
+    def _predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
         """
         Get the action according to the policy for a given observation.
 
@@ -215,27 +211,25 @@ def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Ten
         :return: (th.Tensor) Taken action according to the policy
         """
         latent_pi, _, latent_sde = self._get_latent(observation)
-        action, _ = self._get_action_dist_from_latent(latent_pi, latent_sde, deterministic=deterministic)
-        return action
+        distribution = self._get_action_dist_from_latent(latent_pi, latent_sde)
+        return distribution.get_action(deterministic=deterministic)
 
     def evaluate_actions(self, obs: th.Tensor,
-                         actions: th.Tensor,
-                         deterministic: bool = False) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
+                         actions: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
         """
         Evaluate actions according to the current policy,
         given the observations.
 
         :param obs: (th.Tensor)
         :param actions: (th.Tensor)
-        :param deterministic: (bool)
         :return: (th.Tensor, th.Tensor, th.Tensor) estimated value, log likelihood of taking those actions
             and entropy of the action distribution.
         """
         latent_pi, latent_vf, latent_sde = self._get_latent(obs)
-        _, action_distribution = self._get_action_dist_from_latent(latent_pi, latent_sde, deterministic=deterministic)
-        log_prob = action_distribution.log_prob(actions)
+        distribution = self._get_action_dist_from_latent(latent_pi, latent_sde)
+        log_prob = distribution.log_prob(actions)
         values = self.value_net(latent_vf)
-        return values, log_prob, action_distribution.entropy()
+        return values, log_prob, distribution.entropy()
 
 
 MlpPolicy = PPOPolicy
diff --git a/torchy_baselines/sac/policies.py b/torchy_baselines/sac/policies.py
index db0f1a9aa..f561c6e6b 100644
--- a/torchy_baselines/sac/policies.py
+++ b/torchy_baselines/sac/policies.py
@@ -108,14 +108,11 @@ def reset_noise(self, batch_size: int = 1) -> None:
             'reset_noise() is only available when using SDE'
         self.action_dist.sample_weights(self.log_std, batch_size=batch_size)
 
-    def _get_latent(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+    def get_action_dist_params(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
         features = self.extract_features(obs)
         latent_pi = self.latent_pi(features)
         latent_sde = self.sde_features_extractor(features) if self.sde_features_extractor is not None else latent_pi
-        return latent_pi, latent_sde
 
-    def get_action_dist_params(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor, th.Tensor]:
-        latent_pi, latent_sde = self._get_latent(obs)
         mean_actions = self.mu(latent_pi)
 
         if self.use_sde:
@@ -130,9 +127,8 @@ def forward(self, obs: th.Tensor, deterministic: bool = False) -> th.Tensor:
         mean_actions, log_std, latent_sde = self.get_action_dist_params(obs)
         kwargs = dict(latent_sde=latent_sde) if self.use_sde else {}
         # Note: the action is squashed
-        action, _ = self.action_dist.proba_distribution(mean_actions, log_std,
-                                                        deterministic=deterministic, **kwargs)
-        return action
+        return self.action_dist.action_from_params(mean_actions, log_std,
+                                                   deterministic=deterministic, **kwargs)
 
     def action_log_prob(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
         mean_actions, log_std, latent_sde = self.get_action_dist_params(obs)
@@ -268,7 +264,7 @@ def make_critic(self) -> Critic:
     def forward(self, obs: th.Tensor) -> th.Tensor:
         return self.predict(obs, deterministic=False)
 
-    def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
+    def _predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
         return self.actor(observation, deterministic)
 
 
diff --git a/torchy_baselines/td3/policies.py b/torchy_baselines/td3/policies.py
index 8b24832c5..c10e4f2fc 100644
--- a/torchy_baselines/td3/policies.py
+++ b/torchy_baselines/td3/policies.py
@@ -104,11 +104,6 @@ def get_std(self) -> th.Tensor:
         """
         return self.action_dist.get_std(self.log_std)
 
-    def _get_action_dist_from_latent(self, latent_pi: th.Tensor,
-                                     latent_sde: th.Tensor) -> Tuple[th.Tensor, Distribution]:
-        mean_actions = self.mu(latent_pi)
-        return self.action_dist.proba_distribution(mean_actions, self.log_std, latent_sde)
-
     def _get_latent(self, obs: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
         features = self.extract_features(obs)
         latent_pi = self.latent_pi(features)
@@ -126,9 +121,9 @@ def evaluate_actions(self, obs: th.Tensor, action: th.Tensor) -> Tuple[th.Tensor
             and entropy of the action distribution.
         """
         latent_pi, latent_sde = self._get_latent(obs)
-        _, distribution = self._get_action_dist_from_latent(latent_pi, latent_sde)
+        mean_actions = self.mu(latent_pi)
+        distribution = self.action_dist.proba_distribution(mean_actions, self.log_std, latent_sde)
         log_prob = distribution.log_prob(action)
-        # value = self.value_net(latent_vf)
         return log_prob, distribution.entropy()
 
     def reset_noise(self) -> None:
@@ -150,8 +145,6 @@ def forward(self, obs: th.Tensor, deterministic: bool = True) -> th.Tensor:
             # -> set squash_output=True in the action_dist?
             # NOTE: the clipping is done in the rollout for now
             return self.mu(latent_pi) + noise
-            # action, _ = self._get_action_dist_from_latent(latent_pi)
-            # return action
         else:
             features = self.extract_features(obs)
             return self.mu(features)
@@ -338,9 +331,9 @@ def make_critic(self) -> Critic:
         return Critic(**self.net_args).to(self.device)
 
     def forward(self, observation: th.Tensor, deterministic: bool = False):
-        return self.predict(observation, deterministic=deterministic)
+        return self._predict(observation, deterministic=deterministic)
 
-    def predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
+    def _predict(self, observation: th.Tensor, deterministic: bool = False) -> th.Tensor:
         return self.actor(observation, deterministic=deterministic)
 
 
diff --git a/torchy_baselines/version.txt b/torchy_baselines/version.txt
index db79cdc82..892c7ae8c 100644
--- a/torchy_baselines/version.txt
+++ b/torchy_baselines/version.txt
@@ -1 +1 @@
-0.4.0a2
+0.4.0a3