From 6243e43237e4c9e5eceacc413cbc44f0c2008193 Mon Sep 17 00:00:00 2001 From: Alexis DUBURCQ Date: Wed, 18 Aug 2021 22:14:18 +0200 Subject: [PATCH] [all] Fix reproducibility and performance comparation issues. (#399) * [core] Fix log buffer not refreshed after calling 'stop', making last datapoint not accessible. (#396) * [python/robot] Fix non-reproducible simulation results. * [jiminy_py/simulator] Fix running simulation not stopped before updating seed. * [python/simulator] Fix unexpected error if 'replay' is called without data. * [gym/common] Do not set the seed by default in '_setup' method. * [gym/common] Add train and eval modes to allow specific evaluation behaviors. * [gym/common] Rename '_is_training' in 'is_training'. Fix 'is_training' False by default. (#397) * [gym/common] 'seed' method now return the actual sequence of seeds instead of only the first one. * [gym/common] Remove useless conversion of iterator to list. * [gym/common] Do not cast automatically the action space to float32. (#400) * [gym/common] Ensure internal '_action' buffer dtype float64. (#400) * [gym/toolbox] Add 'quat_to_yaw_cos_sin' math helper. * [gym/rllib] Add l2 regulization loss to PPO. * [gym/rllib] Use noisy samples instead of the true ones to compute global smoothness. * [gym/rllib] Make sure the validation is always the same to make it easier to benchmark performance. * [gym/rllib] Enable to evaluate policy on multiple trials and report statistics. Export data from best trial only. * [gym/rllib] Relax ray dependency version requirement. Co-authored-by: Alexis Duburcq --- CMakeLists.txt | 2 +- core/src/engine/EngineMultiRobot.cc | 3 + .../gym_jiminy/common/envs/env_generic.py | 64 ++++++++------ .../common/gym_jiminy/common/utils/helpers.py | 13 ++- .../common/gym_jiminy/common/utils/spaces.py | 5 +- .../envs/gym_jiminy/envs/acrobot.py | 2 +- .../envs/gym_jiminy/envs/cartpole.py | 2 +- .../gym_jiminy/rllib/gym_jiminy/rllib/ppo.py | 67 +++++++++----- .../rllib/gym_jiminy/rllib/utilities.py | 87 ++++++++++++++++--- python/gym_jiminy/rllib/setup.py | 3 +- .../gym_jiminy/toolbox/math/__init__.py | 6 +- .../gym_jiminy/toolbox/math/generic.py | 21 +++-- python/jiminy_py/src/jiminy_py/robot.py | 13 ++- python/jiminy_py/src/jiminy_py/simulator.py | 12 +-- 14 files changed, 212 insertions(+), 88 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 155849e99..97cc4dd51 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.10) # Set the build version -set(BUILD_VERSION 1.6.28) +set(BUILD_VERSION 1.6.29) # Set compatibility if(CMAKE_VERSION VERSION_GREATER "3.11.0") diff --git a/core/src/engine/EngineMultiRobot.cc b/core/src/engine/EngineMultiRobot.cc index 2e492b52a..96dc33cea 100644 --- a/core/src/engine/EngineMultiRobot.cc +++ b/core/src/engine/EngineMultiRobot.cc @@ -2162,6 +2162,9 @@ namespace jiminy // Log current buffer content as final point of the log data updateTelemetry(); + // Clear log data buffer one last time, now that the final point has been added + logData_ = nullptr; + /* Reset the telemetry. Note that calling ``stop` or `reset` does NOT clear the internal data buffer of telemetryRecorder_. diff --git a/python/gym_jiminy/common/gym_jiminy/common/envs/env_generic.py b/python/gym_jiminy/common/gym_jiminy/common/envs/env_generic.py index 29a8dd822..093e7a1ec 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/envs/env_generic.py +++ b/python/gym_jiminy/common/gym_jiminy/common/envs/env_generic.py @@ -6,12 +6,12 @@ import tempfile from copy import deepcopy from collections import OrderedDict -from typing import Optional, Tuple, Sequence, Dict, Any, Callable, List, Union +from typing import Optional, Tuple, Dict, Any, Callable, List, Union import numpy as np import gym from gym import logger, spaces -from gym.utils.seeding import create_seed, _int_list_from_bigint, hash_seed +from gym.utils.seeding import _int_list_from_bigint, hash_seed import jiminy_py.core as jiminy from jiminy_py.core import (EncoderSensor as encoder, @@ -120,10 +120,13 @@ def __init__(self, # Internal buffers for physics computations self.rg = np.random.Generator(np.random.Philox()) - self._seed: Optional[np.uint32] = None + self._seed: List[np.uint32] = [] self.log_path: Optional[str] = None self.logfile_action_headers: Optional[FieldDictNested] = None + # Whether or not evaluation mode is active + self.is_training = True + # Whether or not play interactive mode is active self._is_interactive = False @@ -139,7 +142,7 @@ def __init__(self, self._num_steps_beyond_done: Optional[int] = None # Initialize the seed of the environment. - # Note that reseting the seed also reset robot internal state. + # Note that resetting the seed also reset robot internal state. self.seed() # Set robot in neutral configuration @@ -429,15 +432,10 @@ def _refresh_action_space(self) -> None: command_limit[motor.joint_velocity_idx] = \ MOTOR_EFFORT_MAX - # Set the action space. - # Note that float32 is used instead of float64, because otherwise it - # would requires the neural network to perform float64 computations - # or cast the output for no really advantage since the action is - # directly forwarded to the motors, without intermediary computations. + # Set the action space action_scale = command_limit[self.robot.motors_velocity_idx] - self.action_space = spaces.Box(low=-action_scale.astype(np.float32), - high=action_scale.astype(np.float32), - dtype=np.float32) + self.action_space = spaces.Box( + low=-action_scale, high=action_scale, dtype=np.float64) def reset(self, controller_hook: Optional[Callable[[], Optional[Tuple[ @@ -566,14 +564,9 @@ def reset(self, # Fallback: Get generic fieldnames otherwise self.logfile_action_headers = get_fieldnames( self.action_space, "action") - is_success = register_variables(self.simulator.controller, - self.logfile_action_headers, - self._action) - if not is_success: - self.logfile_action_headers = None - logger.warn( - "Action must have dtype np.float64 to be registered to the " - "telemetry.") + register_variables(self.simulator.controller, + self.logfile_action_headers, + self._action) # Sample the initial state and reset the low-level engine qpos, qvel = self._sample_state() @@ -626,7 +619,7 @@ def reset(self, return obs - def seed(self, seed: Optional[int] = None) -> Sequence[np.uint32]: + def seed(self, seed: Optional[int] = None) -> List[np.uint32]: """Specify the seed of the environment. .. warning:: @@ -643,16 +636,16 @@ def seed(self, seed: Optional[int] = None) -> Sequence[np.uint32]: # into sequence of 4 bytes uint32 seeds. Backup only the first one. # Note that hashing is used to get rid off possible correlation in the # presence of concurrency. - seed_ints = _int_list_from_bigint(hash_seed(create_seed(seed))) - self._seed = np.uint32(seed_ints[0]) + seed = hash_seed(seed) + self._seed = list(map(np.uint32, _int_list_from_bigint(seed))) # Instantiate a new random number generator based on the provided seed - self.rg = np.random.Generator(np.random.Philox(seed_ints)) + self.rg = np.random.Generator(np.random.Philox(self._seed)) # Reset the seed of Jiminy Engine - self.simulator.seed(self._seed) + self.simulator.seed(self._seed[0]) - return [self._seed] + return self._seed def close(self) -> None: """Terminate the Python Jiminy engine. @@ -958,6 +951,24 @@ def _interact(key: Optional[str] = None) -> bool: # Disable play interactive mode flag self._is_interactive = False + def train(self) -> None: + """Sets the environment in training mode. + + .. note:: + This mode is enabled by default. + """ + self.is_training = True + + def eval(self) -> None: + """Sets the environment in evaluation mode. + + This has any effect only on certain environment. See documentations of + particular environment for details of their behaviors in training and + evaluation modes, if they are affected. It can be used to activate + clipping or some filtering of the action specifical at evaluation time. + """ + self.is_training = False + # methods to override: # ---------------------------- @@ -982,7 +993,6 @@ def _setup(self) -> None: engine_options["stepper"]["iterMax"] = 0 engine_options["stepper"]["timeout"] = 0.0 engine_options["stepper"]["logInternalStepperSteps"] = False - engine_options["stepper"]["randomSeed"] = self._seed self.simulator.engine.set_options(engine_options) # Set robot in neutral configuration diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/helpers.py b/python/gym_jiminy/common/gym_jiminy/common/utils/helpers.py index 2f8f2d670..ef8ede40c 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/utils/helpers.py +++ b/python/gym_jiminy/common/gym_jiminy/common/utils/helpers.py @@ -1,6 +1,6 @@ """ TODO: Write documentation. """ -from typing import Union, Dict, List +from typing import Union, Dict, List, ValuesView import numpy as np import numba as nb @@ -64,7 +64,8 @@ def get_fieldnames(space: spaces.Space, def register_variables(controller: jiminy.AbstractController, - fields: FieldDictNested, + fields: Union[ + ValuesView[FieldDictNested], FieldDictNested], data: SpaceDictNested, namespace: str = "") -> bool: """Register data from `Gym.Space` to the telemetry of a controller. @@ -101,8 +102,8 @@ def register_variables(controller: jiminy.AbstractController, # Default case: data is already a numpy array. Can be registered directly. if isinstance(data, np.ndarray): if np.issubsctype(data, np.float64): + assert isinstance(fields, list) for i, field in enumerate(fields): - assert not isinstance(fields, dict) if isinstance(fields[i], list): fields[i] = [".".join(filter(None, (namespace, subfield))) for subfield in field] @@ -115,10 +116,8 @@ def register_variables(controller: jiminy.AbstractController, # Fallback to looping over fields and data iterators is_success = True if isinstance(fields, dict): - fields = list(fields.values()) - if isinstance(data, dict): - data = list(data.values()) - for subfields, value in zip(fields, data): + fields = fields.values() + for subfields, value in zip(fields, data.values()): assert isinstance(subfields, (dict, list)) is_success = register_variables( controller, subfields, value, namespace) diff --git a/python/gym_jiminy/common/gym_jiminy/common/utils/spaces.py b/python/gym_jiminy/common/gym_jiminy/common/utils/spaces.py index 9bbaba624..3c8b6dece 100644 --- a/python/gym_jiminy/common/gym_jiminy/common/utils/spaces.py +++ b/python/gym_jiminy/common/gym_jiminy/common/utils/spaces.py @@ -129,9 +129,10 @@ def zeros(space: gym.Space, if isinstance(space, spaces.Tuple): return tuple(zeros(subspace, dtype=dtype) for subspace in space.spaces) if isinstance(space, (spaces.Discrete, spaces.MultiDiscrete)): - return np.array(0) # Using np.array of 0 dim to be mutable + # Note that np.array of 0 dim is returned in order to be mutable + return np.array(0, dtype=dtype or np.int64) if isinstance(space, spaces.MultiBinary): - return np.zeros(space.n, dtype=np.int8) + return np.zeros(space.n, dtype=dtype or np.int8) raise NotImplementedError( f"Space of type {type(space)} is not supported.") diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/acrobot.py b/python/gym_jiminy/envs/gym_jiminy/envs/acrobot.py index a37b0cac7..b4148b936 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/acrobot.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/acrobot.py @@ -214,7 +214,7 @@ def compute_command(self, # Compute the actual torque to apply if not self.continuous: - action = self.AVAIL_CTRL[action] + action = self.AVAIL_CTRL[round(action[()])] if ACTION_NOISE > 0.0: action += sample(scale=ACTION_NOISE, rg=self.rg) diff --git a/python/gym_jiminy/envs/gym_jiminy/envs/cartpole.py b/python/gym_jiminy/envs/gym_jiminy/envs/cartpole.py index e5a813a20..89095bcb0 100644 --- a/python/gym_jiminy/envs/gym_jiminy/envs/cartpole.py +++ b/python/gym_jiminy/envs/gym_jiminy/envs/cartpole.py @@ -214,7 +214,7 @@ def compute_command(self, # Compute the actual torque to apply if not self.continuous: - action = self.AVAIL_CTRL[action] + action = self.AVAIL_CTRL[round(action[()])] return action diff --git a/python/gym_jiminy/rllib/gym_jiminy/rllib/ppo.py b/python/gym_jiminy/rllib/gym_jiminy/rllib/ppo.py index f01faabee..923f3e361 100644 --- a/python/gym_jiminy/rllib/gym_jiminy/rllib/ppo.py +++ b/python/gym_jiminy/rllib/gym_jiminy/rllib/ppo.py @@ -12,6 +12,7 @@ from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.view_requirement import ViewRequirement from ray.rllib.utils.typing import TensorType, TrainerConfigDict +from ray.rllib.utils.torch_ops import l2_loss from ray.rllib.agents.ppo import DEFAULT_CONFIG, PPOTrainer from ray.rllib.agents.ppo.ppo_torch_policy import ( ppo_surrogate_loss, kl_and_loss_stats, setup_mixins, PPOTorchPolicy) @@ -23,7 +24,8 @@ "symmetric_policy_reg": 0.0, "caps_temporal_reg": 0.0, "caps_spatial_reg": 0.0, - "caps_global_reg": 0.0 + "caps_global_reg": 0.0, + "l2_reg": 0.0 }, _allow_unknown_configs=True) @@ -53,6 +55,7 @@ def ppo_init(policy: Policy, policy._mean_temporal_caps_loss = 0.0 policy._mean_spatial_caps_loss = 0.0 policy._mean_global_caps_loss = 0.0 + policy._l2_reg_loss = 0.0 # Convert to torch.Tensor observation bounds policy._observation_space_low = \ @@ -110,7 +113,8 @@ def ppo_loss(policy: Policy, # Append the training batches to the set train_batches["prev"] = train_batch_copy - if policy.config["caps_spatial_reg"] > 0.0: + if policy.config["caps_spatial_reg"] > 0.0 or \ + policy.config["caps_global_reg"] > 0.0: # Shallow copy the original training batch train_batch_copy = train_batch.copy(shallow=True) @@ -211,8 +215,8 @@ def value_function(self, *args: Any, **kwargs: Any) -> torch.Tensor: action_dist = dist_class(action_logits, model) action_mean_true = action_dist.deterministic_sample() + # Compute the mean action corresponding to the previous observation if policy.config["caps_temporal_reg"] > 0.0: - # Compute the mean action corresponding to the previous observation action_logits_prev = logits["prev"] if issubclass(dist_class, TorchDiagGaussian): action_mean_prev, _ = torch.chunk(action_logits_prev, 2, dim=1) @@ -220,6 +224,29 @@ def value_function(self, *args: Any, **kwargs: Any) -> torch.Tensor: action_dist_prev = dist_class(action_logits_prev, model) action_mean_prev = action_dist_prev.deterministic_sample() + # Compute the mean action corresponding to the noisy observation + if policy.config["caps_spatial_reg"] > 0.0 or \ + policy.config["caps_global_reg"] > 0.0: + action_logits_noisy = logits["noisy"] + if issubclass(dist_class, TorchDiagGaussian): + action_mean_noisy, _ = torch.chunk(action_logits_noisy, 2, dim=1) + else: + action_dist_noisy = dist_class(action_logits_noisy, model) + action_mean_noisy = action_dist_noisy.deterministic_sample() + + # Compute the mirrored mean action corresponding to the mirrored action + if policy.config["symmetric_policy_reg"] > 0.0: + action_logits_mirror = logits["mirrored"] + if issubclass(dist_class, TorchDiagGaussian): + action_mean_mirror, _ = torch.chunk(action_logits_mirror, 2, dim=1) + else: + action_dist_mirror = dist_class(action_logits_mirror, model) + action_mean_mirror = action_dist_mirror.deterministic_sample() + action_mirror_mat = policy.action_space.mirror_mat.to(device) + policy.action_space.mirror_mat = action_mirror_mat + action_mean_mirror = action_mean_mirror @ action_mirror_mat + + if policy.config["caps_temporal_reg"] > 0.0: # Minimize the difference between the successive action mean policy._mean_temporal_caps_loss = torch.mean( (action_mean_prev - action_mean_true) ** 2) @@ -229,14 +256,6 @@ def value_function(self, *args: Any, **kwargs: Any) -> torch.Tensor: policy._mean_temporal_caps_loss if policy.config["caps_spatial_reg"] > 0.0: - # Compute the mean action corresponding to the noisy observation - action_logits_noisy = logits["noisy"] - if issubclass(dist_class, TorchDiagGaussian): - action_mean_noisy, _ = torch.chunk(action_logits_noisy, 2, dim=1) - else: - action_dist_noisy = dist_class(action_logits_noisy, model) - action_mean_noisy = action_dist_noisy.deterministic_sample() - # Minimize the difference between the original action mean and the # one corresponding to the noisy observation. policy._mean_spatial_caps_loss = torch.mean( @@ -248,24 +267,13 @@ def value_function(self, *args: Any, **kwargs: Any) -> torch.Tensor: if policy.config["caps_global_reg"] > 0.0: # Minimize the magnitude of action mean - policy._mean_global_caps_loss = torch.mean(action_mean_true ** 2) + policy._mean_global_caps_loss = torch.mean(action_mean_noisy ** 2) # Add global smoothness loss to total loss total_loss += policy.config["caps_global_reg"] * \ policy._mean_global_caps_loss if policy.config["symmetric_policy_reg"] > 0.0: - # Compute the mirrored mean action corresponding to the mirrored action - action_logits_mirror = logits["mirrored"] - if issubclass(dist_class, TorchDiagGaussian): - action_mean_mirror, _ = torch.chunk(action_logits_mirror, 2, dim=1) - else: - action_dist_mirror = dist_class(action_logits_mirror, model) - action_mean_mirror = action_dist_mirror.deterministic_sample() - action_mirror_mat = policy.action_space.mirror_mat.to(device) - policy.action_space.mirror_mat = action_mirror_mat - action_mean_mirror = action_mean_mirror @ action_mirror_mat - # Minimize the assymetry of policy output policy._mean_symmetric_policy_loss = torch.mean( (action_mean_mirror - action_mean_true) ** 2) @@ -274,6 +282,17 @@ def value_function(self, *args: Any, **kwargs: Any) -> torch.Tensor: total_loss += policy.config["symmetric_policy_reg"] * \ policy._mean_symmetric_policy_loss + if policy.config["l2_reg"] > 0.0: + # Add actor l2-regularization loss + l2_reg_loss = 0.0 + for name, params in model.state_dict().items(): + if "bias" not in name: + l2_reg_loss += l2_loss(params) + policy._l2_reg_loss = l2_reg_loss + + # Add l2-regularization loss to total loss + total_loss += policy.config["l2_reg"] * policy._l2_reg_loss + return total_loss @@ -293,6 +312,8 @@ def ppo_stats(policy: Policy, stats_dict["spatial_smoothness"] = policy._mean_spatial_caps_loss if policy.config["caps_global_reg"] > 0.0: stats_dict["global_smoothness"] = policy._mean_global_caps_loss + if policy.config["l2_reg"] > 0.0: + stats_dict["l2_reg"] = policy._l2_reg_loss return stats_dict diff --git a/python/gym_jiminy/rllib/gym_jiminy/rllib/utilities.py b/python/gym_jiminy/rllib/gym_jiminy/rllib/utilities.py index 07f8c55e1..455a6b1e1 100644 --- a/python/gym_jiminy/rllib/gym_jiminy/rllib/utilities.py +++ b/python/gym_jiminy/rllib/gym_jiminy/rllib/utilities.py @@ -11,12 +11,14 @@ import logging import inspect import tracemalloc +from tempfile import mkstemp from datetime import datetime from collections import defaultdict from typing import Optional, Callable, Dict, Any, Tuple import gym import numpy as np +import plotext as plt from tensorboard.program import TensorBoard import ray @@ -32,6 +34,7 @@ from ray.rllib.agents.trainer import Trainer from ray.rllib.models.preprocessors import get_preprocessor +from jiminy_py.viewer import play_logs_files from gym_jiminy.common.utils import clip, SpaceDictNested try: @@ -43,9 +46,12 @@ except ModuleNotFoundError: pass + logger = logging.getLogger(__name__) +HISTOGRAM_BINS = 20 + PRINT_RESULT_FIELDS_FILTER = [ "training_iteration", "time_total_s", @@ -384,6 +390,7 @@ def build_policy_from_checkpoint( def train(train_agent: Trainer, max_timesteps: int = 0, max_iters: int = 0, + evaluation_num: int = 10, evaluation_period: int = 0, checkpoint_period: int = 0, record_video: bool = True, @@ -404,6 +411,10 @@ def train(train_agent: Trainer, Optional: Disabled by default. :param max_iters: Maximum number of training iterations. 0 to disable. Optional: Disabled by default. + :param evaluation_num: How any evaluation to run. The log files of the best + and worst performing trials will be exported, and + some statistics will be reported if 'verbose' is + enabled. :param evaluation_period: Run one simulation (with exploration) every given number of training steps, and save the log file and a video of the result in log folder if @@ -413,7 +424,8 @@ def train(train_agent: Trainer, steps in log folder if requested. 0 to disable. Optional: Disabled by default. :param record_video: Whether or not to enable video recording during - evaluation. + evaluation. Video will be recorded for best and worst + trials. Optional: True by default. :param debug: Whether or not to monitor memory allocation for debugging memory leaks. @@ -501,15 +513,55 @@ def train(train_agent: Trainer, # Record video and log data of the result if evaluation_period > 0 and iter_num % evaluation_period == 0: - record_video_path = f"{train_agent.logdir}/iter_{iter_num}.mp4" - env, _ = test(train_agent, - explore=True, - enable_replay=record_video, - viewer_kwargs={ - "record_video_path": record_video_path, - "scene_name": f"iter_{iter_num}" - }) - env.write_log(f"{train_agent.logdir}/iter_{iter_num}.hdf5") + duration = [] + total_rewards = [] + log_files_tmp = [] + test_env = train_agent.env_creator( + train_agent.config["env_config"]) + seed = train_agent.config["seed"] or 0 + for i in range(evaluation_num): + # Evaluate the policy once + test(train_agent, + explore=True, + seed=seed+i, + test_env=test_env, + enable_stats=False, + enable_replay=False) + + # Export temporary log file + fd, log_path = mkstemp(prefix="log_", suffix=".hdf5") + os.close(fd) + test_env.write_log(log_path) + + # Monitor some statistics + duration.append(test_env.num_steps * test_env.step_dt) + total_rewards.append(test_env.total_reward) + log_files_tmp.append(log_path) + + # Backup log file of best trial + trial_best_idx = np.argmax(duration) + log_path = f"{train_agent.logdir}/iter_{iter_num}.hdf5" + shutil.move(log_files_tmp[trial_best_idx], log_path) + + # Record video of best trial if requested + if record_video: + video_path = f"{train_agent.logdir}/iter_{iter_num}.mp4" + play_logs_files(log_path, + record_video_path=video_path, + scene_name=f"iter_{iter_num}") + + # Ascii histogram if requested + if verbose: + plt.clp() + plt.subplots(1, 2) + for i, (title, data) in enumerate(zip( + ("Episode duration", "Total reward"), + (duration, total_rewards))): + plt.subplot(1, i) + plt.hist(data, HISTOGRAM_BINS) + plt.plotsize(50, 20) + plt.title(title) + plt.show() # Backup the policy if checkpoint_period > 0 and iter_num % checkpoint_period == 0: @@ -536,6 +588,7 @@ def train(train_agent: Trainer, def evaluate(env: gym.Env, policy: Policy, + seed: int, obs_filter_fn: Optional[ Callable[[np.ndarray], np.ndarray]] = None, n_frames_stack: int = 1, @@ -585,6 +638,14 @@ def evaluate(env: gym.Env, policy_forward = build_policy_wrapper( policy, obs_filter_fn, n_frames_stack, clip_action, explore) + # Make sure evaluation mode is enabled + is_training = env.is_training + if is_training: + env.eval() + + # Reset the seed of the environment + env.seed(seed) + # Initialize the simulation obs = env.reset() reward = None @@ -602,6 +663,10 @@ def evaluate(env: gym.Env, except KeyboardInterrupt: pass + # Restore training mode if it was enabled + if is_training: + env.train() + # Display some statistic if requested if enable_stats: print("env.num_steps:", env.num_steps) @@ -620,6 +685,7 @@ def evaluate(env: gym.Env, def test(test_agent: Trainer, explore: bool = True, + seed: Optional[int] = None, n_frames_stack: int = 1, enable_stats: bool = True, enable_replay: bool = True, @@ -672,6 +738,7 @@ def test(test_agent: Trainer, return evaluate(test_env, policy, + seed or test_agent.config["seed"] or 0, obs_filter_fn, n_frames_stack=n_frames_stack, clip_action=test_agent.config["clip_actions"], diff --git a/python/gym_jiminy/rllib/setup.py b/python/gym_jiminy/rllib/setup.py index 3e9ab7b63..191f560c8 100644 --- a/python/gym_jiminy/rllib/setup.py +++ b/python/gym_jiminy/rllib/setup.py @@ -29,7 +29,8 @@ packages=find_namespace_packages(), install_requires=[ f"gym_jiminy~={version}", - "ray[default,rllib]<=1.4.1" + "ray[default,rllib]<=1.5.1", + "plotext" ], zip_safe=False ) diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py index 5d40550f6..86f87ca2c 100644 --- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py +++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/__init__.py @@ -2,7 +2,10 @@ from .spline import Spline from .qhull import ConvexHull, compute_distance_convex_to_point -from .generic import squared_norm_2, matrix_to_yaw, quat_to_yaw +from .generic import (squared_norm_2, + matrix_to_yaw, + quat_to_yaw_cos_sin, + quat_to_yaw) from .signal import integrate_zoh @@ -12,6 +15,7 @@ "compute_distance_convex_to_point", "squared_norm_2", "matrix_to_yaw", + "quat_to_yaw_cos_sin", "quat_to_yaw", "integrate_zoh" ] diff --git a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/generic.py b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/generic.py index 1f1cf3d99..095727b55 100644 --- a/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/generic.py +++ b/python/gym_jiminy/toolbox/gym_jiminy/toolbox/math/generic.py @@ -1,6 +1,7 @@ """ TODO: Write documentation. """ import math +from typing import Tuple import numba as nb import numpy as np @@ -16,17 +17,27 @@ def squared_norm_2(array: np.ndarray) -> float: @nb.jit(nopython=True, nogil=True) def matrix_to_yaw(rotation_matrix: np.ndarray) -> float: - """Compute the yaw from Yaw-Pitch-Roll Euler angles representation from a + """Compute the yaw from Yaw-Pitch-Roll Euler angles representation of a rotation matrix. """ return math.atan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) +@nb.jit(nopython=True, nogil=True) +def quat_to_yaw_cos_sin(quaternion: np.ndarray) -> Tuple[float, float]: + """Compute the cosinus and sinus of the yaw from Yaw-Pitch-Roll Euler + angles representation of a quaternion. + """ + cos_yaw = - 1.0 + 2.0 * (quaternion[3] ** 2 + quaternion[0] ** 2) + sin_yaw = 2.0 * ( + quaternion[2] * quaternion[3] + quaternion[0] * quaternion[1]) + return cos_yaw, sin_yaw + + @nb.jit(nopython=True, nogil=True) def quat_to_yaw(quaternion: np.ndarray) -> float: - """Compute the yaw from Yaw-Pitch-Roll Euler angles representation from a + """Compute the yaw from Yaw-Pitch-Roll Euler angles representation of a quaternion. """ - return math.atan2( - 2.0 * (quaternion[2] * quaternion[3] + quaternion[0] * quaternion[1]), - - 1.0 + 2.0 * (quaternion[3] ** 2 + quaternion[0] ** 2)) + cos_yaw, sin_yaw = quat_to_yaw_cos_sin(quaternion) + return math.atan2(sin_yaw, cos_yaw) diff --git a/python/jiminy_py/src/jiminy_py/robot.py b/python/jiminy_py/src/jiminy_py/robot.py index 8514681f6..4dd872a05 100644 --- a/python/jiminy_py/src/jiminy_py/robot.py +++ b/python/jiminy_py/src/jiminy_py/robot.py @@ -166,7 +166,7 @@ def generate_hardware_description_file( root_link = next(iter(parent_links.difference(child_links))) else: root_link = None - leaf_links = list(child_links.difference(parent_links)) + leaf_links = sorted(list(child_links.difference(parent_links))) # Parse the gazebo plugins, if any. # Note that it is only useful to extract "advanced" hardware, not basic @@ -289,7 +289,7 @@ def generate_hardware_description_file( # Specify collision bodies and ground model in global config options hardware_info['Global']['collisionBodiesNames'] = \ - list(collision_bodies_names) + sorted(list(collision_bodies_names)) if gazebo_ground_stiffness is not None: hardware_info['Global']['groundStiffness'] = gazebo_ground_stiffness if gazebo_ground_damping is not None: @@ -745,9 +745,16 @@ def initialize(self, # Note that it must be done before adding the sensors because # Contact sensors requires contact points to be defined. # Mesh collisions is not numerically stable for now, so disabling it. + # Note: Be careful, the order of the contact points is important, it + # changes the computation of the external forces, which is an iterative + # algorithm for impulse model, resulting in different simulation + # results. The order of the element of the set depends of the `hash` + # method of python, whose seed is randomly generated when starting the + # interpreter for security reason. As a result, the set must be sorted + # manually to ensure consistent results. self.add_collision_bodies( collision_bodies_names, ignore_meshes=avoid_instable_collisions) - self.add_contact_points(list(set(contact_frames_names))) + self.add_contact_points(sorted(list(set(contact_frames_names)))) # Add the motors to the robot for motor_type, motors_descr in motors_info.items(): diff --git a/python/jiminy_py/src/jiminy_py/simulator.py b/python/jiminy_py/src/jiminy_py/simulator.py index a8603988d..75b485b8c 100644 --- a/python/jiminy_py/src/jiminy_py/simulator.py +++ b/python/jiminy_py/src/jiminy_py/simulator.py @@ -321,6 +321,9 @@ def seed(self, seed: int) -> None: """ assert isinstance(seed, np.uint32), "'seed' must have type np.uint32." + # Make sure no simulation is running before setting the seed + self.engine.stop() + # Set the seed through the engine instead of using # `jiminy.reset_random_generator` to keep track of the seed in options, # and thereby to log it in the telemetry as constant. @@ -577,7 +580,7 @@ def replay(self, # Extract trajectory data from pairs (robot, log) trajectories, update_hooks, extra_kwargs = [], [], {} for robot, log_data in zip(robots, logs_data): - if log_data is not None: + if log_data: traj, update_hook, _kwargs = \ extract_replay_data_from_log_data(robot, log_data) trajectories.append(traj) @@ -587,7 +590,7 @@ def replay(self, update_hooks += [None for _ in extra_trajectories] # Make sure there is something to replay - if not logs_data: + if not trajectories: raise RuntimeError( "Nothing to replay. Please run a simulation before calling " "`replay` method, or provided data manually.") @@ -599,10 +602,7 @@ def replay(self, **kwargs}) # Define sequence of viewer instances - viewers = [ - self.viewer, - *[None for _ in extra_logs_files], - *[None for _ in extra_trajectories]] + viewers = [self.viewer, *[None for _ in trajectories[:-1]]] # Replay the trajectories self._viewers = play_trajectories(