From 9f3bf886ce5d59d2eff8d9ec93ac49cb2b816b2f Mon Sep 17 00:00:00 2001 From: stephen Date: Sun, 8 Mar 2020 18:28:48 +0000 Subject: [PATCH] Segmentation masks returned as 1 channel. Resolves #43. --- rlbench/backend/scene.py | 17 +++++++++-- rlbench/backend/utils.py | 10 +++++++ rlbench/observation_config.py | 4 ++- rlbench/task_environment.py | 54 +++++++++++++++++++---------------- tools/dataset_generator.py | 4 +++ 5 files changed, 61 insertions(+), 28 deletions(-) diff --git a/rlbench/backend/scene.py b/rlbench/backend/scene.py index 47a2f7ed2..0d3e6b28e 100644 --- a/rlbench/backend/scene.py +++ b/rlbench/backend/scene.py @@ -7,6 +7,7 @@ from rlbench.backend.observation import Observation from rlbench.backend.exceptions import ( WaypointError, BoundaryError, NoWaypointsError, DemoError) +from rlbench.backend.utils import rgb_handles_to_mask from rlbench.demo import Demo from rlbench.observation_config import ObservationConfig, CameraConfig from rlbench.backend.task import Task @@ -156,6 +157,13 @@ def get_observation(self) -> Observation: rsc_ob = self._obs_config.right_shoulder_camera wc_ob = self._obs_config.wrist_camera + lsc_mask_fn = ( + rgb_handles_to_mask if lsc_ob.masks_as_one_channel else lambda x: x) + rsc_mask_fn = ( + rgb_handles_to_mask if rsc_ob.masks_as_one_channel else lambda x: x) + wc_mask_fn = ( + rgb_handles_to_mask if wc_ob.masks_as_one_channel else lambda x: x) + obs = Observation( left_shoulder_rgb=( lsc_ob.rgb_noise.apply( @@ -181,13 +189,16 @@ def get_observation(self) -> Observation: if wc_ob.depth else None), left_shoulder_mask=( - self._cam_over_shoulder_left_mask.capture_rgb() + lsc_mask_fn( + self._cam_over_shoulder_left_mask.capture_rgb()) if lsc_ob.mask else None), right_shoulder_mask=( - self._cam_over_shoulder_right_mask.capture_rgb() + rsc_mask_fn( + self._cam_over_shoulder_right_mask.capture_rgb()) if rsc_ob.mask else None), wrist_mask=( - self._cam_wrist_mask.capture_rgb() + wc_mask_fn( + self._cam_wrist_mask.capture_rgb()) if wc_ob.mask else None), joint_velocities=( diff --git a/rlbench/backend/utils.py b/rlbench/backend/utils.py index 3f13a5d2a..912707cc3 100644 --- a/rlbench/backend/utils.py +++ b/rlbench/backend/utils.py @@ -216,3 +216,13 @@ def task_file_to_task_class(task_file): mod = importlib.reload(mod) task_class = getattr(mod, class_name) return task_class + + +def rgb_handles_to_mask(rgb_coded_handles): + # rgb_coded_handles should be (w, h, c) + # Handle encoded as : handle = R + G * 256 + B * 256 * 256 + rgb_coded_handles *= 255 # takes rgb range to 0 -> 255 + rgb_coded_handles.astype(int) + return (rgb_coded_handles[:, :, 0] + + rgb_coded_handles[:, :, 1] * 256 + + rgb_coded_handles[:, :, 2] * 256 * 256) diff --git a/rlbench/observation_config.py b/rlbench/observation_config.py index de87617db..b478eb915 100644 --- a/rlbench/observation_config.py +++ b/rlbench/observation_config.py @@ -10,7 +10,8 @@ def __init__(self, depth_noise: NoiseModel=Identity(), mask=True, image_size=(128, 128), - render_mode=RenderMode.OPENGL3): + render_mode=RenderMode.OPENGL3, + masks_as_one_channel=True): self.rgb = rgb self.rgb_noise = rgb_noise self.depth = depth @@ -18,6 +19,7 @@ def __init__(self, self.mask = mask self.image_size = image_size self.render_mode = render_mode + self.masks_as_one_channel = masks_as_one_channel def set_all(self, value: bool): self.rgb = value diff --git a/rlbench/task_environment.py b/rlbench/task_environment.py index ace3027f8..8f4dd2e83 100644 --- a/rlbench/task_environment.py +++ b/rlbench/task_environment.py @@ -9,7 +9,7 @@ from rlbench.backend.scene import Scene from rlbench.backend.task import Task from rlbench.backend.const import * -from rlbench.backend.utils import image_to_float_array +from rlbench.backend.utils import image_to_float_array, rgb_handles_to_mask from rlbench.backend.robot import Robot import logging from typing import List @@ -374,48 +374,54 @@ def _get_stored_demos(self, amount: int, image_paths: bool) -> List[Demo]: self._resize_if_needed( Image.open(obs[i].left_shoulder_rgb), obs_config.left_shoulder_camera.image_size)) + if obs_config.right_shoulder_camera.rgb: + obs[i].right_shoulder_rgb = np.array( + self._resize_if_needed(Image.open( + obs[i].right_shoulder_rgb), + obs_config.right_shoulder_camera.image_size)) + if obs_config.wrist_camera.rgb: + obs[i].wrist_rgb = np.array( + self._resize_if_needed( + Image.open(obs[i].wrist_rgb), + obs_config.wrist_camera.image_size)) + if obs_config.left_shoulder_camera.depth: obs[i].left_shoulder_depth = image_to_float_array( self._resize_if_needed( Image.open(obs[i].left_shoulder_depth), obs_config.left_shoulder_camera.image_size), DEPTH_SCALE) - if obs_config.left_shoulder_camera.mask: - obs[i].left_shoulder_mask = np.array( - self._resize_if_needed(Image.open( - obs[i].left_shoulder_mask), - obs_config.left_shoulder_camera.image_size)) - if obs_config.right_shoulder_camera.rgb: - obs[i].right_shoulder_rgb = np.array( - self._resize_if_needed(Image.open( - obs[i].right_shoulder_rgb), - obs_config.right_shoulder_camera.image_size)) if obs_config.right_shoulder_camera.depth: obs[i].right_shoulder_depth = image_to_float_array( self._resize_if_needed( Image.open(obs[i].right_shoulder_depth), obs_config.right_shoulder_camera.image_size), DEPTH_SCALE) - if obs_config.right_shoulder_camera.mask: - obs[i].right_shoulder_mask = np.array( - self._resize_if_needed(Image.open( - obs[i].right_shoulder_mask), - obs_config.right_shoulder_camera.image_size)) - if obs_config.wrist_camera.rgb: - obs[i].wrist_rgb = np.array( - self._resize_if_needed( - Image.open(obs[i].wrist_rgb), - obs_config.wrist_camera.image_size)) if obs_config.wrist_camera.depth: obs[i].wrist_depth = image_to_float_array( self._resize_if_needed( Image.open(obs[i].wrist_depth), - obs_config.wrist_camera.image_size), DEPTH_SCALE) + obs_config.wrist_camera.image_size), + DEPTH_SCALE) + + # Masks are stored as coded RGB images. + # Here we transform them into 1 channel handles. + if obs_config.left_shoulder_camera.mask: + obs[i].left_shoulder_mask = rgb_handles_to_mask( + np.array(self._resize_if_needed(Image.open( + obs[i].left_shoulder_mask), + obs_config.left_shoulder_camera.image_size))) + if obs_config.right_shoulder_camera.mask: + obs[i].right_shoulder_mask = rgb_handles_to_mask( + np.array(self._resize_if_needed(Image.open( + obs[i].right_shoulder_mask), + obs_config.right_shoulder_camera.image_size))) if obs_config.wrist_camera.mask: - obs[i].wrist_mask = np.array( + obs[i].wrist_mask = rgb_handles_to_mask(np.array( self._resize_if_needed(Image.open( obs[i].wrist_mask), - obs_config.wrist_camera.image_size)) + obs_config.wrist_camera.image_size))) + demos.append(obs) return demos diff --git a/tools/dataset_generator.py b/tools/dataset_generator.py index 3d24ddd12..b0ec606f0 100644 --- a/tools/dataset_generator.py +++ b/tools/dataset_generator.py @@ -123,6 +123,10 @@ def run(i, lock, task_index, variation_count, results, file_lock, tasks): obs_config = ObservationConfig() obs_config.set_all(True) + # We want to save the masks as rgb encodings. + obs_config.left_shoulder_camera.masks_as_one_channel = False + obs_config.right_shoulder_camera.masks_as_one_channel = False + obs_config.wrist_camera.masks_as_one_channel = False rlbench_env = Environment( action_mode=ActionMode(),