Skip to content

Commit

Permalink
[gym/common] Support specifying reward in pipeline config. (#787)
Browse files Browse the repository at this point in the history
* [gym/common] Add compose reward pipeline wrapper. 
* [gym/common] Support specifying reward in pipeline config.
* [misc] Update install instructions on windows.
  • Loading branch information
duburcqa authored May 6, 2024
1 parent 182894f commit 7f44e8e
Show file tree
Hide file tree
Showing 21 changed files with 331 additions and 90 deletions.
35 changes: 17 additions & 18 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,24 +152,23 @@ if (-not (Test-Path -PathType Container $RootDir/build)) {
New-Item -ItemType "directory" -Force -Path "$RootDir/build"
}
Set-Location -Path $RootDir/build
cmake "$RootDir" -G "Visual Studio 16 2019" -T "v142" -DCMAKE_GENERATOR_PLATFORM=x64 `
-DCMAKE_INSTALL_PREFIX="$InstallDir" -DCMAKE_MODULE_PATH="$InstallDir" `
cmake "$RootDir" -G "Visual Studio 17 2022" -DCMAKE_GENERATOR_PLATFORM=x64 `
-DCMAKE_MSVC_RUNTIME_LIBRARY="MultiThreaded$<$<CONFIG:Debug>:Debug>DLL" `
-DCMAKE_INSTALL_PREFIX="$InstallDir" -DCMAKE_PREFIX_PATH="$InstallDir" `
-DCMAKE_INTERPROCEDURAL_OPTIMIZATION=OFF -DCMAKE_VERBOSE_MAKEFILE=ON `
-DBOOST_ROOT="$InstallDir" -DBoost_INCLUDE_DIR="$InstallDir/include" `
-DBoost_NO_SYSTEM_PATHS=TRUE -DBoost_NO_BOOST_CMAKE=TRUE `
-DBoost_USE_STATIC_LIBS=OFF `
-DBUILD_TESTING=ON -DBUILD_EXAMPLES=ON -DBUILD_PYTHON_INTERFACE=ON `
-DCMAKE_CXX_FLAGS="-DBOOST_ALL_NO_LIB -DBOOST_LIB_DIAGNOSTIC -DBOOST_CORE_USE_GENERIC_CMATH"
cmake --build . --target all --config "${env:BUILD_TYPE}" --parallel 8
if (-not (Test-Path -PathType Container "$RootDir/build/PyPi/jiminy_py/src/jiminy_py/core")) {
New-Item -ItemType "directory" -Force -Path "$RootDir/build/PyPi/jiminy_py/src/jiminy_py/core"
-DBoost_NO_SYSTEM_PATHS=TRUE -DBoost_NO_BOOST_CMAKE=TRUE -DBoost_USE_STATIC_LIBS=ON `
-DBUILD_TESTING=ON -DBUILD_EXAMPLES=ON -DBUILD_PYTHON_INTERFACE=ON ``
-DCMAKE_CXX_FLAGS="${env:CMAKE_CXX_FLAGS} $(
) -DBOOST_ALL_NO_LIB -DBOOST_LIB_DIAGNOSTIC -DBOOST_CORE_USE_GENERIC_CMATH $(
) -DEIGENPY_STATIC -DURDFDOM_STATIC -DHPP_FCL_STATIC -DPINOCCHIO_STATIC"
cmake --build . --target ALL_BUILD --config "${env:BUILD_TYPE}" --parallel 2
if (-not (Test-Path -PathType Container "$RootDir/build/pypi/jiminy_py/src/jiminy_py")) {
New-Item -ItemType "directory" -Force -Path "$RootDir/build/pypi/jiminy_py/src/jiminy_py/core"
}
Copy-Item -Path "$InstallDir/lib/boost_numpy*.dll" `
-Destination "$RootDir/build/PyPi/jiminy_py/src/jiminy_py/core"
Copy-Item -Path "$InstallDir/lib/boost_python*.dll" `
-Destination "$RootDir/build/PyPi/jiminy_py/src/jiminy_py/core"
Copy-Item -Path "$InstallDir/lib/site-packages/*" `
-Destination "$RootDir/build/PyPi/jiminy_py/src/jiminy_py" -Recurse
cmake --build . --target install --config "${env:BUILD_TYPE}"
Copy-Item -Force -Recurse -Path "$InstallDir/lib/site-packages/*" `
-Destination "$RootDir/build/pypi/jiminy_py/src/jiminy_py/core"
cmake --build . --target INSTALL --config "${env:BUILD_TYPE}"
```
6 changes: 6 additions & 0 deletions docs/api/gym_jiminy/common/bases/compositions.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Compositions
============

.. automodule:: gym_jiminy.common.bases.compositions
:members:
:show-inheritance:
2 changes: 2 additions & 0 deletions docs/api/gym_jiminy/common/bases/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ Bases

interfaces
blocks
quantities
compositions
pipeline
6 changes: 6 additions & 0 deletions docs/api/gym_jiminy/common/bases/quantities.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Quantities
==========

.. automodule:: gym_jiminy.common.bases.quantities
:members:
:show-inheritance:
6 changes: 0 additions & 6 deletions docs/api/gym_jiminy/common/bases/quantity.rst

This file was deleted.

6 changes: 0 additions & 6 deletions docs/api/gym_jiminy/common/bases/reward.rst

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Generic
=======

.. automodule:: gym_jiminy.common.rewards.generic
.. automodule:: gym_jiminy.common.compositions.generic
:members:
:undoc-members:
:private-members:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Envs
====
Compositions
============

.. toctree::
:maxdepth: 1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Locomotion
==========

.. automodule:: gym_jiminy.common.rewards.locomotion
.. automodule:: gym_jiminy.common.compositions.locomotion
:members:
:undoc-members:
:private-members:
Expand Down
4 changes: 2 additions & 2 deletions docs/api/gym_jiminy/common/quantities/index.rst
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Envs
====
Quantities
==========

.. toctree::
:maxdepth: 1
Expand Down
8 changes: 5 additions & 3 deletions python/gym_jiminy/common/gym_jiminy/common/bases/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@
from .quantities import (QuantityCreator,
SharedCache,
AbstractQuantity)
from .reward import (AbstractReward,
BaseQuantityReward,
BaseMixtureReward)
from .compositions import (AbstractReward,
BaseQuantityReward,
BaseMixtureReward)
from .blocks import (BlockStateT,
InterfaceBlock,
BaseObserverBlock,
Expand All @@ -25,6 +25,7 @@
BasePipelineWrapper,
BaseTransformObservation,
BaseTransformAction,
ComposedJiminyEnv,
ObservedJiminyEnv,
ControlledJiminyEnv)

Expand Down Expand Up @@ -54,6 +55,7 @@
'BasePipelineWrapper',
'BaseTransformObservation',
'BaseTransformAction',
'ComposedJiminyEnv',
'ObservedJiminyEnv',
'ControlledJiminyEnv',
'QuantityCreator'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

import numpy as np

from ..bases import InterfaceJiminyEnv, QuantityCreator, InfoType
from .interfaces import InfoType, InterfaceJiminyEnv
from .quantities import QuantityCreator


ValueT = TypeVar('ValueT')
Expand Down Expand Up @@ -271,48 +272,48 @@ class BaseMixtureReward(AbstractReward):
single one.
"""

rewards: Tuple[AbstractReward, ...]
components: Tuple[AbstractReward, ...]
"""List of all the reward components that must be aggregated together.
"""

def __init__(self,
env: InterfaceJiminyEnv,
name: str,
rewards: Sequence[AbstractReward],
components: Sequence[AbstractReward],
reduce_fn: Callable[
[Sequence[Optional[float]]], Optional[float]],
is_normalized: bool) -> None:
"""
:param env: Base or wrapped jiminy environment.
:param name: Desired name of the total reward.
:param rewards: Sequence of reward components to aggregate.
:param components: Sequence of reward components to aggregate.
:param reduce_fn: Transform function responsible for aggregating all
the reward components that were evaluated. Typical
examples are cumulative product and weighted sum.
:param is_normalized: Whether the reward is guaranteed to be normalized
after applying reduction function `reduce_fn`.
"""
# Make sure that at least one reward component has been specified
if not rewards:
if not components:
raise ValueError(
"At least one reward component must be specified.")

# Make sure that all reward components share the same environment
env = rewards[0].env
for reward in rewards[1:]:
for reward in components:
if env is not reward.env:
raise ValueError(
"All reward components must share the same environment.")

# Backup some user argument(s)
self.rewards = tuple(rewards)
self.components = tuple(components)
self._reduce_fn = reduce_fn
self._is_normalized = is_normalized

# Call base implementation
super().__init__(env, name)

# Determine whether the reward mixture is terminal
is_terminal = {reward.is_terminal for reward in self.rewards}
is_terminal = {reward.is_terminal for reward in self.components}
self._is_terminal: Optional[bool] = None
if len(is_terminal) == 1:
self._is_terminal = next(iter(is_terminal))
Expand All @@ -335,9 +336,13 @@ def compute(self, terminated: bool, info: InfoType) -> Optional[float]:
"""Evaluate each individual reward component for the current state of
the environment, then aggregate them in one.
"""
# Early return depending on whether the reward and state are terminal
if self.is_terminal is not None and self.is_terminal ^ terminated:
return None

# Compute all reward components
values = []
for reward in self.rewards:
for reward in self.components:
# Evaluate reward
reward_info: InfoType = {}
value: Optional[float] = reward(terminated, reward_info)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ def compute_reward(self,
By default, it returns 0.0 without extra information no matter what.
The user is expected to provide an appropriate reward on its own,
either by overloading this method or by wrapping the environment with
`ComposeReward` for modular environment pipeline design.
`ComposedJiminyEnv` for modular environment pipeline design.
:param terminated: Whether the episode has reached the terminal state
of the MDP at the current step. This flag can be
Expand Down
94 changes: 90 additions & 4 deletions python/gym_jiminy/common/gym_jiminy/common/bases/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
InfoType,
EngineObsType,
InterfaceJiminyEnv)
from .compositions import AbstractReward
from .blocks import BaseControllerBlock, BaseObserverBlock

from ..utils import DataNested, is_breakpoint, zeros, build_copyto, copy
Expand Down Expand Up @@ -298,6 +299,91 @@ def close(self) -> None:
self.env.close()


class ComposedJiminyEnv(
BasePipelineWrapper[ObsT, ActT, ObsT, ActT],
Generic[ObsT, ActT]):
"""Plug ad-hoc reward components and termination conditions to the
wrapped environment.
.. note::
This wrapper derives from `BasePipelineWrapper`, and such as, it is
considered as internal unlike `gym.Wrapper`. This means that it will be
taken into account when calling `evaluate` or `play_interactive` on the
wrapped environment.
"""
def __init__(self,
env: InterfaceJiminyEnv[ObsT, ActT],
*,
reward: AbstractReward) -> None:
# Make sure that the reward is linked to this environment
assert env is reward.env

# Backup user argument(s)
self.reward = reward

# Initialize base class
super().__init__(env)

# Bind observation and action of the base environment
assert self.observation_space.contains(self.env.observation)
assert self.action_space.contains(self.env.action)
self.observation = self.env.observation
self.action = self.env.action

def _initialize_action_space(self) -> None:
"""Configure the action space.
It simply copy the action space of the wrapped environment.
"""
self.action_space = self.env.action_space

def _initialize_observation_space(self) -> None:
"""Configure the observation space.
It simply copy the observation space of the wrapped environment.
"""
self.observation_space = self.env.observation_space

def _setup(self) -> None:
"""Configure the wrapper.
In addition to calling the base implementation, it sets the observe
and control update period.
"""
# Call base implementation
super()._setup()

# Copy observe and control update periods from wrapped environment
self.observe_dt = self.env.observe_dt
self.control_dt = self.env.control_dt

def refresh_observation(self, measurement: EngineObsType) -> None:
"""Compute high-level features based on the current wrapped
environment's observation.
It simply forwards the observation computed by the wrapped environment
without any processing.
:param measurement: Low-level measure from the environment to process
to get higher-level observation.
"""
self.env.refresh_observation(measurement)

def compute_command(self, action: ActT, command: np.ndarray) -> None:
"""Compute the motors efforts to apply on the robot.
It simply forwards the command computed by the wrapped environment
without any processing.
:param action: High-level target to achieve by means of the command.
:param command: Lower-level command to updated in-place.
"""
self.env.compute_command(action, command)

def compute_reward(self, terminated: bool, info: InfoType) -> float:
return self.reward(terminated, info)


class ObservedJiminyEnv(
BasePipelineWrapper[NestedObsT, ActT, BaseObsT, ActT],
Generic[NestedObsT, ActT, BaseObsT]):
Expand Down Expand Up @@ -364,8 +450,8 @@ def __init__(self,

# Make sure that the environment is either some `ObservedJiminyEnv` or
# `ControlledJiminyEnv` block, or the base environment directly.
if isinstance(env, BasePipelineWrapper) and not isinstance(
env, (ObservedJiminyEnv, ControlledJiminyEnv)):
if isinstance(env, BasePipelineWrapper) and not isinstance(env, (
ObservedJiminyEnv, ControlledJiminyEnv, ComposedJiminyEnv)):
raise TypeError(
"Observers can only be added on top of another observer, "
"controller, or a base environment itself.")
Expand Down Expand Up @@ -586,8 +672,8 @@ def __init__(self,

# Make sure that the environment is either some `ObservedJiminyEnv` or
# `ControlledJiminyEnv` block, or the base environment directly.
if isinstance(env, BasePipelineWrapper) and not isinstance(
env, (ObservedJiminyEnv, ControlledJiminyEnv)):
if isinstance(env, BasePipelineWrapper) and not isinstance(env, (
ObservedJiminyEnv, ControlledJiminyEnv, ComposedJiminyEnv)):
raise TypeError(
"Controllers can only be added on top of another observer, "
"controller, or a base environment itself.")
Expand Down
Loading

0 comments on commit 7f44e8e

Please sign in to comment.