From 1988254445c150d4aed093e4d75814c59c5f1616 Mon Sep 17 00:00:00 2001
From: Alexis Duburcq <alexis.duburcq@gmail.com>
Date: Fri, 3 May 2024 10:54:59 +0200
Subject: [PATCH] [gym/common] Add average odometry velocity quantity.

---
 .../gym_jiminy/common/bases/quantity.py       |  2 +-
 .../common/gym_jiminy/common/bases/reward.py  |  3 +
 .../gym_jiminy/common/quantities/__init__.py  |  5 +-
 .../gym_jiminy/common/quantities/generic.py   | 81 ++++++++++++-------
 .../common/quantities/locomotion.py           | 38 +++++++++
 .../gym_jiminy/common/rewards/locomotion.py   | 46 ++++++++---
 .../gym_jiminy/common/utils/__init__.py       |  2 +
 .../common/gym_jiminy/common/utils/math.py    | 34 ++++++++
 python/gym_jiminy/unit_py/test_quantities.py  | 10 +++
 python/gym_jiminy/unit_py/test_reward.py      | 47 +++++++++++
 10 files changed, 228 insertions(+), 40 deletions(-)
 create mode 100644 python/gym_jiminy/unit_py/test_reward.py

diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/quantity.py b/python/gym_jiminy/common/gym_jiminy/common/bases/quantity.py
index cabd196d8..58f1e5dce 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/bases/quantity.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/bases/quantity.py
@@ -224,7 +224,7 @@ def __init__(self,
                  env: InterfaceJiminyEnv,
                  parent: Optional["AbstractQuantity"],
                  requirements: Dict[str, "QuantityCreator"],
-                 auto_refresh: bool) -> None:
+                 auto_refresh: bool = False) -> None:
         """
         :param env: Base or wrapped jiminy environment.
         :param parent: Higher-level quantity from which this quantity is a
diff --git a/python/gym_jiminy/common/gym_jiminy/common/bases/reward.py b/python/gym_jiminy/common/gym_jiminy/common/bases/reward.py
index 3939cc54e..e1e62e21b 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/bases/reward.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/bases/reward.py
@@ -128,6 +128,9 @@ def __init__(self,
         # Add quantity to the set of quantities managed by the environment
         self.env.quantities[self.name] = quantity
 
+        # Keep track of the underlying quantity
+        self.quantity = self.env.quantities.registry[self.name]
+
     @property
     def name(self) -> str:
         """Name uniquely identifying every reward. It will be used to add the
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
index c659c766a..b8db7baba 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/__init__.py
@@ -6,7 +6,9 @@
                       StackedQuantity,
                       AverageFrameSpatialVelocity,
                       MaskedQuantity)
-from .locomotion import CenterOfMass, ZeroMomentPoint
+from .locomotion import (AverageOdometryVelocity,
+                         CenterOfMass,
+                         ZeroMomentPoint)
 
 
 __all__ = [
@@ -16,6 +18,7 @@
     'StackedQuantity',
     'AverageFrameSpatialVelocity',
     'MaskedQuantity',
+    'AverageOdometryVelocity',
     'CenterOfMass',
     'ZeroMomentPoint',
 ]
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
index 08b12f4b1..d9bfab009 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/generic.py
@@ -7,8 +7,9 @@
 from functools import partial
 from dataclasses import dataclass
 from typing import (
-    List, Dict, Set, Optional, Protocol, Sequence, Tuple, TypeVar, Union,
+    List, Dict, Set, Any, Optional, Protocol, Sequence, Tuple, TypeVar, Union,
     runtime_checkable)
+from typing_extensions import TypeAlias
 
 import numpy as np
 
@@ -17,9 +18,13 @@
 import pinocchio as pin
 
 from ..bases import InterfaceJiminyEnv, AbstractQuantity, QuantityCreator
-from ..utils import fill, matrix_to_rpy, matrix_to_quat
+from ..utils import (
+    fill, matrix_to_rpy, matrix_to_quat, quat_to_matrix,
+    quat_interpolate_middle)
 
 
+EllipsisType: TypeAlias = Any  # TODO: `EllipsisType` introduced in Python 3.10
+
 ValueT = TypeVar('ValueT')
 
 
@@ -515,12 +520,19 @@ class AverageFrameSpatialVelocity(AbstractQuantity[np.ndarray]):
 
     The average spatial velocity is obtained by finite difference. More
     precisely, it is defined here as the ratio of the geodesic distance in SE3
-    Lie  group between the pose of the frame at the end of previous and current
+    Lie Group between the pose of the frame at the end of previous and current
     step over the time difference between them. Notably, under this definition,
     the linear average velocity jointly depends on rate of change of the
     translation and rotation of the frame, which may be undesirable in some
     cases. Alternatively, the double geodesic distance could be used instead to
     completely decouple the translation from the rotation.
+
+    .. note::
+        The local frame for which the velocity is expressed is defined as the
+        midpoint interpolation between the previous and current frame pose.
+        This definition is arbitrary, in a sense that any other point for an
+        interpolation ratio going from 0.0 (previous pose) to 1.0 (current
+        pose) would be equally valid.
     """
 
     frame_name: str
@@ -574,8 +586,9 @@ def __init__(self,
         # Inverse step size
         self._inv_step_dt = 0.0
 
-        # Define proxy to the current frame pose (translation, rotation matrix)
-        self._rot_mat = np.eye(3)
+        # Allocate memory for the average quaternion and rotation matrix
+        self._quat_mean = np.zeros(4)
+        self._rot_mat_mean = np.eye(3)
 
         # Pre-allocate memory for the spatial velocity
         self._v_spatial: np.ndarray = np.zeros(6)
@@ -590,11 +603,6 @@ def initialize(self) -> None:
         # Compute inverse step size
         self._inv_step_dt = 1.0 / self.env.step_dt
 
-        # Refresh proxy to current frame pose
-        frame_index = self.pinocchio_model.getFrameId(self.frame_name)
-        transform = self.pinocchio_data.oMf[frame_index]
-        self._rot_mat = transform.rotation
-
         # Re-initialize pre-allocated buffers
         fill(self._v_spatial, 0)
 
@@ -612,8 +620,17 @@ def refresh(self) -> np.ndarray:
 
         # Translate local velocity to world frame
         if self.reference_frame == pin.LOCAL_WORLD_ALIGNED:
+            # Define world frame as the "middle" between prev and next pose.
+            # The orientation difference has an effect on the translation
+            # difference, but not the other way around. Here, we only care
+            # about the middle rotation, so we can consider SO3 Lie Group
+            # algebra instead of SE3.
+            quat_interpolate_middle(
+                xyzquat_prev[-4:], xyzquat[-4:], self._quat_mean)
+            quat_to_matrix(self._quat_mean, self._rot_mat_mean)
+
             # TODO: x2 speedup can be expected using `np.dot` with  `nb.jit`
-            self._v_lin_ang[:] = self._rot_mat @ self._v_lin_ang
+            self._v_lin_ang[:] = self._rot_mat_mean @ self._v_lin_ang
 
         return self._v_spatial
 
@@ -624,14 +641,16 @@ class MaskedQuantity(AbstractQuantity[np.ndarray]):
     array along an axis.
 
     Elements will be extract by copy unless the indices of the elements to
-    extract to be written equivalently by a slice, ie they are evenly spaced.
+    extract to be written equivalently by a slice (ie they are evenly spaced),
+    and the array can be flattened while preserving memory contiguity if 'axis'
+    is `None`.
     """
 
     quantity: AbstractQuantity
     """Base quantity whose elements must be extracted.
     """
 
-    indices: Tuple[int]
+    indices: Tuple[int, ...]
     """Indices of the elements to extract.
     """
 
@@ -643,7 +662,7 @@ def __init__(self,
                  env: InterfaceJiminyEnv,
                  parent: Optional[AbstractQuantity],
                  quantity: QuantityCreator[np.ndarray],
-                 key: Union[Sequence[int], Sequence[bool], slice],
+                 key: Union[Sequence[int], Sequence[bool]],
                  axis: Optional[int] = None
                  ) -> None:
         """
@@ -653,17 +672,15 @@ def __init__(self,
         :param quantity: Tuple gathering the class of the quantity whose values
                          must be extracted, plus all its constructor keyword-
                          arguments except environment 'env' and parent 'parent.
-        :param key: Sequence of indices, boolean mask, or slice that will be
-                    used to extract elements from the quantity along one axis.
+        :param key: Sequence of indices or boolean mask that will be used to
+                    extract elements from the quantity along one axis.
         :param axis: Axis over which to extract elements. `None` to consider
                      flattened array.
                      Optional: `None` by default.
         """
-        # Check if a slice, indices or a mask has been provided
-        if key is slice:
-            pass
-        elif all(isinstance(e, bool) for e in key):
-            key, _ = np.nonzero(key)
+        # Check if indices or boolean mask has been provided
+        if all(isinstance(e, bool) for e in key):
+            key = tuple(np.flatnonzero(key))
         elif not all(isinstance(e, int) for e in key):
             raise ValueError(
                 "Argument 'key' invalid. It must either be a "
@@ -679,19 +696,23 @@ def __init__(self,
                 "No indices to extract from quantity. Data would be empty.")
 
         # Check if the indices are evenly spaced
-        self._slices: Optional[slice] = None
+        self._slices: Tuple[Union[slice, EllipsisType], ...] = ()
+        stride: Optional[int] = None
         if len(self.indices) == 1:
             stride = 1
-        if len(self.indices) > 1:
+        if len(self.indices) > 1 and all(e >= 0 for e in self.indices):
             spacing = np.unique(np.diff(self.indices))
-            stride = stride[0] if spacing.size == 1 else None
+            if spacing.size == 1:
+                stride = spacing[0]
         if stride is not None:
             slice_ = slice(self.indices[0], self.indices[-1] + 1, stride)
-            if axis > 0:
-                self._slices = (slice(None),) * axis + (slice_,)
+            if axis is None:
+                self._slices = (slice_,)
+            elif axis > 0:
+                self._slices = (*((slice(None),) * axis), slice_)
             else:
                 self._slices = (
-                    Ellipsis, slice_) + (slice(None),) * (- axis - 1)
+                    Ellipsis, slice_, *((slice(None),) * (- axis - 1)))
 
         # Call base implementation
         super(). __init__(env,
@@ -708,9 +729,13 @@ def initialize(self) -> None:
 
     def refresh(self) -> np.ndarray:
         # Extract elements from quantity
-        if self._slices is None:
+        if not self._slices:
             # Note that `take` is faster than classical advanced indexing via
             # `operator[]` (`__getitem__`) because the latter is more generic.
             # Notably, `operator[]` supports boolean mask but `take` does not.
             return self.data.take(self.indices, axis=self.axis)
+        if self.axis is None:
+            # `reshape` must be used instead of `flat` to get a view that can
+            # be sliced without copy.
+            return self.data.reshape((-1,))[self._slices]
         return self.data[self._slices]
diff --git a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
index e349fabc3..11915c68c 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/quantities/locomotion.py
@@ -9,6 +9,44 @@
 from ..bases import InterfaceJiminyEnv, AbstractQuantity
 from ..utils import fill
 
+from ..quantities import MaskedQuantity, AverageFrameSpatialVelocity
+
+
+@dataclass(unsafe_hash=True)
+class AverageOdometryVelocity(AbstractQuantity[np.ndarray]):
+    """Average odometry velocity in local-world-aligned frame at the end of the
+    agent step.
+
+    The odometry pose fully specifies the position of the robot in 2D world
+    plane. As such, it comprises the linear translation (X, Y) and the angular
+    velocity around Z axis (namely rate of change of Yaw Euler angle). The
+    average spatial velocity is obtained by finite difference. See
+    `AverageFrameSpatialVelocity` documentation for details.
+    """
+
+    def __init__(self,
+                 env: InterfaceJiminyEnv,
+                 parent: Optional[AbstractQuantity]) -> None:
+        """
+        :param env: Base or wrapped jiminy environment.
+        :param parent: Higher-level quantity from which this quantity is a
+                       requirement if any, `None` otherwise.
+        """
+        # Call base implementation
+        super().__init__(
+            env,
+            parent,
+            requirements=dict(
+                data=(MaskedQuantity, dict(
+                    quantity=(AverageFrameSpatialVelocity, dict(
+                        frame_name="root_joint",
+                        reference_frame=pin.LOCAL_WORLD_ALIGNED)),
+                    key=(0, 1, 5)))),
+            auto_refresh=False)
+
+    def refresh(self) -> np.ndarray:
+        return self.data
+
 
 @dataclass(unsafe_hash=True)
 class CenterOfMass(AbstractQuantity[np.ndarray]):
diff --git a/python/gym_jiminy/common/gym_jiminy/common/rewards/locomotion.py b/python/gym_jiminy/common/gym_jiminy/common/rewards/locomotion.py
index 9c41296d5..b546d6641 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/rewards/locomotion.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/rewards/locomotion.py
@@ -1,39 +1,65 @@
-
+"""Rewards mainly relevant for locomotion tasks on floating-base robots.
+"""
 from typing import Sequence
 
 import numpy as np
 
 from ..bases import InterfaceJiminyEnv, BaseQuantityReward
-from ..quantities import MaskedQuantity, AverageFrameSpatialVelocity
+from ..quantities import AverageOdometryVelocity
 
 from .generic import radial_basis_function
 
 
 class OdometryVelocityReward(BaseQuantityReward):
-    """ TODO: Write documentation.
+    """Reward the agent for tracking a non-stationary target odometry velocity.
+
+    The error transform in a normalized reward to maximize by applying RBF
+    kernel on the error. The reward will be 0.0 if the error cancels out
+    completely and less than 0.01 above the user-specified cutoff threshold.
     """
     def __init__(self,
                  env: InterfaceJiminyEnv,
                  target: Sequence[float],
                  cutoff: float) -> None:
-        """ TODO: Write documentation.
+        """
+        :param target: Initial target average odometry velocity (vX, vY, vYaw).
+                       The target can be updated in necessary by calling
+                       `set_target`.
+        :param cutoff: Cutoff threshold for the RBF kernel transform.
         """
         # Backup some user argument(s)
-        self.target = target
+        self._target = np.asarray(target)
         self.cutoff = cutoff
 
         # Call base implementation
         super().__init__(
             env,
             "reward_odometry_velocity",
-            (MaskedQuantity, dict(
-                quantity=(AverageFrameSpatialVelocity, dict(frame_name="root_joint")),
-                key=(0, 1, 5))),
+            (AverageOdometryVelocity, {}),
             self._transform,
             is_normalized=True,
             is_terminal=False)
 
-    def _transform(self, value: np.ndarray) -> np.ndarray:
-        """ TODO: Write documentation.
+    @property
+    def target(self) -> np.ndarray:
+        """Get current target odometry velocity.
+        """
+        return self._target
+
+    @target.setter
+    def target(self, target: Sequence[float]) -> None:
+        """Set current target odometry velocity.
+        """
+        self._target = np.asarray(target)
+
+    def _transform(self, value: np.ndarray) -> float:
+        """Apply Radial Base Function transform to the residual error between
+        the current and target average odometry velocity.
+
+        .. note::
+            The user must call `set_target` method before `compute_reward` to
+            update the target odometry velocity if non-stationary.
+
+        :param value: Current average odometry velocity.
         """
         return radial_basis_function(value - self.target, self.cutoff)
diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/__init__.py b/python/gym_jiminy/common/gym_jiminy/common/utils/__init__.py
index 32b108c84..f2b082a2c 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/utils/__init__.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/utils/__init__.py
@@ -11,6 +11,7 @@
                    quat_to_yaw_cos_sin,
                    quat_multiply,
                    quat_average,
+                   quat_interpolate_middle,
                    rpy_to_matrix,
                    rpy_to_quat,
                    transforms_to_vector,
@@ -55,6 +56,7 @@
     'quat_to_yaw_cos_sin',
     'quat_multiply',
     'quat_average',
+    'quat_interpolate_middle',
     'rpy_to_matrix',
     'rpy_to_quat',
     'transforms_to_vector',
diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/math.py b/python/gym_jiminy/common/gym_jiminy/common/utils/math.py
index 9721cd314..e89f23dcf 100644
--- a/python/gym_jiminy/common/gym_jiminy/common/utils/math.py
+++ b/python/gym_jiminy/common/gym_jiminy/common/utils/math.py
@@ -522,3 +522,37 @@ def quat_average(quat: np.ndarray,
     q_flat = q_perm.reshape((*q_perm.shape[:-len(axis)], -1))
     _, eigvec = np.linalg.eigh(q_flat @ np.swapaxes(q_flat, -1, -2))
     return np.moveaxis(eigvec[..., -1], -1, 0)
+
+
+@nb.jit(nopython=True, cache=True, fastmath=True)
+def quat_interpolate_middle(quat1: np.ndarray,
+                            quat2: np.ndarray,
+                            out: Optional[np.ndarray] = None) -> np.ndarray:
+    """Compute the midpoint interpolation between two batches of quaternions
+    [qx, qy, qz, qw].
+
+    The midpoint interpolation of two quaternion is defined as the integration
+    of half the difference between them, starting from the first one, ie
+    `q_mid = integrate(q1, 0.5 * difference(q1, d2))`, which is a special case
+    of the `slerp` method (spherical linear interpolation) for `alpha=0.5`.
+
+    For the midpoint in particular, one can show that the middle quaternion is
+    simply normalized sum of the previous and next quaternions.
+
+    :param quat1: First batch of quaternions as a N-dimensional array whose
+                  first dimension gathers the 4 quaternion coordinates.
+    :param quat2: Second batch of quaternions as a N-dimensional array.
+    :param out: A pre-allocated array into which the result is stored. If not
+                provided, a new array is freshly-allocated, which is slower.
+    """
+    assert quat1.ndim >= 1 and quat1.shape == quat2.shape
+    if out is None:
+        out_ = np.empty((4, *quat1.shape[1:]))
+    else:
+        assert out.shape == (4, *quat1.shape[1:])
+        out_ = out
+
+    dot = np.expand_dims(np.sum(quat1 * quat2, axis=0), axis=0)
+    out_[:] = (quat1 + np.sign(dot) * quat2) / np.sqrt(2 * (1 + np.abs(dot)))
+
+    return out_
diff --git a/python/gym_jiminy/unit_py/test_quantities.py b/python/gym_jiminy/unit_py/test_quantities.py
index ff3f09f7a..6bcc49952 100644
--- a/python/gym_jiminy/unit_py/test_quantities.py
+++ b/python/gym_jiminy/unit_py/test_quantities.py
@@ -17,6 +17,8 @@ class Quantities(unittest.TestCase):
     """ TODO: Write documentation
     """
     def test_shared_cache(self):
+        """ TODO: Write documentation
+        """
         env = gym.make("gym_jiminy.envs:atlas")
         env.reset()
 
@@ -54,6 +56,8 @@ def test_shared_cache(self):
         assert np.any(zmp_1 != zmp_2)
 
     def test_dynamic_batching(self):
+        """ TODO: Write documentation
+        """
         env = gym.make("gym_jiminy.envs:atlas")
         env.reset()
         env.step(env.action)
@@ -100,6 +104,8 @@ def test_dynamic_batching(self):
         assert len(quantities['rpy_0'].requirements['data'].frame_names) == 1
 
     def test_discard(self):
+        """ TODO: Write documentation
+        """
         env = gym.make("gym_jiminy.envs:atlas")
         env.reset()
 
@@ -132,6 +138,8 @@ def test_discard(self):
             assert len(cache.owners) == 0
 
     def test_env(self):
+        """ TODO: Write documentation
+        """
         env = gym.make("gym_jiminy.envs:atlas")
 
         env.quantities["com"] = (CenterOfMass, {})
@@ -144,6 +152,8 @@ def test_env(self):
         assert np.all(com_0 == env.quantities["com"])
 
     def test_stack(self):
+        """ TODO: Write documentation
+        """
         env = gym.make("gym_jiminy.envs:atlas")
         env.reset()
 
diff --git a/python/gym_jiminy/unit_py/test_reward.py b/python/gym_jiminy/unit_py/test_reward.py
new file mode 100644
index 000000000..bfaedd0af
--- /dev/null
+++ b/python/gym_jiminy/unit_py/test_reward.py
@@ -0,0 +1,47 @@
+""" TODO: Write documentation
+"""
+import unittest
+
+import numpy as np
+import gymnasium as gym
+import jiminy_py
+import pinocchio as pin
+
+from gym_jiminy.common.rewards import OdometryVelocityReward
+
+
+class Rewards(unittest.TestCase):
+    """ TODO: Write documentation
+    """
+    def test_average_odometry_velocity(self):
+        """ TODO: Write documentation
+        """
+        env = gym.make("gym_jiminy.envs:atlas")
+        reward_fun = OdometryVelocityReward(
+            env, np.array([0.0, 0.0, 0.0]), cutoff=0.3)
+
+        env.reset(seed=0)
+        base_pose_prev = env.robot_state.q[:7].copy()
+        _, _, terminated, _, info = env.step(env.action)
+        base_pose = env.robot_state.q[:7].copy()
+
+        se3 = pin.liegroups.SE3()
+        base_pose_diff = pin.LieGroup.difference(
+            se3, base_pose_prev, base_pose)
+        base_velocity_mean_local =  base_pose_diff / env.step_dt
+        base_pose_mean = pin.LieGroup.integrate(
+            se3, base_pose_prev, 0.5 * base_pose_diff)
+        rot_mat = pin.Quaternion(base_pose_mean[-4:]).matrix()
+        base_velocity_mean_world = np.concatenate((
+            rot_mat @ base_velocity_mean_local[:3],
+            rot_mat @ base_velocity_mean_local[3:]))
+        np.testing.assert_allclose(
+            reward_fun.quantity.requirements['data'].data,
+            base_velocity_mean_world)
+        base_odom_velocity = base_velocity_mean_world[[0, 1, 5]]
+        np.testing.assert_allclose(
+            reward_fun.quantity.data, base_odom_velocity)
+        gamma = - np.log(0.01) / reward_fun.cutoff ** 2
+        reward = np.exp(- gamma * np.sum(base_odom_velocity ** 2))
+        np.testing.assert_allclose(
+            reward_fun(terminated, info), reward)