duburcqa · duburcqa · Jul 8, 2024 · Jul 6, 2024 · Jul 6, 2024 · Jun 30, 2024
diff --git a/core/src/io/serialization.cc b/core/src/io/serialization.cc
@@ -1008,6 +1008,7 @@ namespace boost::serialization
 
         // Restore extended simulation model
         model.pinocchioModel_ = pinocchioModel;
+        model.pinocchioData_ = pinocchio::Data(pinocchioModel);
     }
 
     template<class Archive>

diff --git a/core/src/solver/constraint_solvers.cc b/core/src/solver/constraint_solvers.cc
@@ -265,11 +265,11 @@ namespace jiminy
                they can grow arbitrary large for constraints whose bounds are active. It follows
                that stagnation of residuals is the only viable criteria.
                The PGS algorithm has been modified for solving second-order cone LCP, which means
-               that only the L2-norm of the tangential forces can be expected to converge. Because
+               that only the L^2-norm of the tangential forces can be expected to converge. Because
                of this, it is too restrictive to check the element-wise variation of the residuals
                over iterations. It makes more sense to look at the Linf-norm instead, but this
                criteria is very lax. A good compromise may be to look at the constraint-block-wise
-               L2-norm, which is similar to what Drake simulator is doing. For reference, see:
+               L^2-norm, which is similar to what Drake simulator is doing. For reference, see:
                https://github.com/RobotLocomotion/drake/blob/master/multibody/contact_solvers/pgs_solver.cc
             */
             const double tol = tolAbs_ + tolRel_ * y_.lpNorm<Eigen::Infinity>() + EPS;

diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py b/python/gym_jiminy/common/gym_jiminy/common/bases/compositions.py
@@ -7,7 +7,7 @@
 greatly reduces code duplication and bugs.
 """
 from abc import ABC, abstractmethod
-from enum import Enum
+from enum import IntEnum
 from typing import Tuple, Sequence, Callable, Union, Optional, Generic, TypeVar
 
 import numpy as np
@@ -234,7 +234,7 @@ def __init__(self,
         self.env.quantities[self.name] = quantity
 
         # Keep track of the underlying quantity
-        self.quantity = self.env.quantities.registry[self.name]
+        self.data = self.env.quantities.registry[self.name]
 
     def __del__(self) -> None:
         try:
@@ -266,7 +266,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
             return None
 
         # Evaluate raw quantity
-        value = self.quantity.get()
+        value = self.data.get()
 
         # Early return if quantity is None
         if value is None:
@@ -383,7 +383,7 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
         return reward_total
 
 
-class EpisodeState(Enum):
+class EpisodeState(IntEnum):
     """Specify the current state of the ongoing episode.
     """
 
@@ -595,7 +595,7 @@ def __init__(self,
         self.env.quantities[self.name] = quantity
 
         # Keep track of the underlying quantity
-        self.quantity = self.env.quantities.registry[self.name]
+        self.data = self.env.quantities.registry[self.name]
 
     def __del__(self) -> None:
         try:
@@ -615,7 +615,7 @@ def compute(self, info: InfoType) -> bool:
             This method is not meant to be overloaded.
         """
         # Evaluate the quantity
-        value = self.quantity.get()
+        value = self.data.get()
 
         # Check if the quantity is out-of-bounds bound.
         # Note that it may be `None` if the quantity is ill-defined for the

diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py b/python/gym_jiminy/common/gym_jiminy/common/bases/interfaces.py
@@ -274,7 +274,7 @@ def _observer_handle(self,
         # they are always updated before the controller gets called, no matter
         # if either one or the other is time-continuous. Hacking the internal
         # dynamics to clear quantities does not address this issue either.
-        self.quantities.clear()
+        # self.quantities.clear()
 
         # Refresh the observation if not already done but only if a simulation
         # is already running. It would be pointless to refresh the observation

diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py b/python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
@@ -448,9 +448,10 @@ def refresh_observation(self, measurement: EngineObsType) -> None:
         self.env.refresh_observation(measurement)
 
     def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
-        """Determine whether the episode is over, because a terminal state of
-        the underlying MDP has been reached or an aborting condition outside
-        the scope of the MDP has been triggered.
+        """Determine whether the practitioner is instructed to stop the ongoing
+        episode on the spot because a termination condition has been triggered,
+        either coming from the based environment or from the ad-hoc termination
+        conditions that has been plugged on top of it.
 
         At each step of the wrapped environment, all its termination conditions
         will be evaluated sequentially until one of them eventually gets
@@ -465,6 +466,9 @@ def has_terminated(self, info: InfoType) -> Tuple[bool, bool]:
             This method is called after `refresh_observation`, so that the
             internal buffer 'observation' is up-to-date.
 
+        .. seealso::
+            See `InterfaceJiminyEnv.has_terminated` documentation for details.
+
         :param info: Dictionary of extra information for monitoring.
 
         :returns: terminated and truncated flags.
@@ -492,9 +496,29 @@ def compute_command(self, action: ActT, command: np.ndarray) -> None:
         self.env.compute_command(action, command)
 
     def compute_reward(self, terminated: bool, info: InfoType) -> float:
-        if self.reward is None:
-            return 0.0
-        return self.reward(terminated, info)
+        """Compute the total reward, ie the sum of the original reward from the
+        wrapped environment with the ad-hoc reward components that has been
+        plugged on top of it.
+
+        .. seealso::
+            See `InterfaceController.compute_reward` documentation for details.
+
+        :param terminated: Whether the episode has reached the terminal state
+                           of the MDP at the current step. This flag can be
+                           used to compute a specific terminal reward.
+        :param info: Dictionary of extra information for monitoring.
+
+        :returns: Aggregated reward for the current step.
+        """
+        # Compute base reward
+        reward = self.env.compute_reward(terminated, info)
+
+        # Add composed reward if any
+        if self.reward is not None:
+            reward += self.reward(terminated, info)
+
+        # Return total reward
+        return reward
 
 
 class ObservedJiminyEnv(