StanfordVL · cgokmen · Aug 12, 2024 · Jul 29, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/omnigibson/sensors/vision_sensor.py b/omnigibson/sensors/vision_sensor.py
@@ -304,22 +304,19 @@ def _get_obs(self):
                     obs[modality], id_to_labels, obs["seg_semantic"], info["seg_semantic"], id=True
                 )
             elif "bbox" in modality:
-                obs[modality] = self._remap_bounding_box_semantic_ids(obs[modality])
+                id_to_labels = raw_obs["info"]["idToLabels"]
+                obs[modality], info[modality] = self._remap_bounding_box_semantic_ids(obs[modality], id_to_labels)
         return obs, info
 
-    def _remap_semantic_segmentation(self, img, id_to_labels):
+    def _preprocess_semantic_labels(self, id_to_labels):
         """
-        Remap the semantic segmentation image to the class IDs defined in semantic_class_name_to_id().
-        Also, correct the id_to_labels input with the labels from semantic_class_name_to_id() and return it.
+        Preprocess the semantic labels to feed into the remapper.
 
         Args:
-            img (np.ndarray): Semantic segmentation image to remap
             id_to_labels (dict): Dictionary of semantic IDs to class labels
         Returns:
-            np.ndarray: Remapped semantic segmentation image
-            dict: Corrected id_to_labels dictionary
+            dict: Preprocessed dictionary of semantic IDs to class labels
         """
-        # Preprocess id_to_labels to feed into the remapper
         replicator_mapping = {}
         for key, val in id_to_labels.items():
             key = int(key)
@@ -338,6 +335,21 @@ def _remap_semantic_segmentation(self, img, id_to_labels):
             assert (
                 replicator_mapping[key] in semantic_class_id_to_name().values()
             ), f"Class {val['class']} does not exist in the semantic class name to id mapping!"
+        return replicator_mapping
+
+    def _remap_semantic_segmentation(self, img, id_to_labels):
+        """
+        Remap the semantic segmentation image to the class IDs defined in semantic_class_name_to_id().
+        Also, correct the id_to_labels input with the labels from semantic_class_name_to_id() and return it.
+
+        Args:
+            img (np.ndarray): Semantic segmentation image to remap
+            id_to_labels (dict): Dictionary of semantic IDs to class labels
+        Returns:
+            np.ndarray: Remapped semantic segmentation image
+            dict: Corrected id_to_labels dictionary
+        """
+        replicator_mapping = self._preprocess_semantic_labels(id_to_labels)
 
         image_keys = np.unique(img)
         assert set(image_keys).issubset(
@@ -447,18 +459,22 @@ def _register_instance(self, instance_name, id=False):
         if instance_name not in registry.values():
             registry[len(registry)] = instance_name
 
-    def _remap_bounding_box_semantic_ids(self, bboxes):
+    def _remap_bounding_box_semantic_ids(self, bboxes, id_to_labels):
         """
         Remap the semantic IDs of the bounding boxes to our own semantic IDs.
 
         Args:
             bboxes (list of dict): List of bounding boxes to remap
+            id_to_labels (dict): Dictionary of semantic IDs to class labels
         Returns:
             list of dict: Remapped list of bounding boxes
+            dict: Remapped id_to_labels dictionary
         """
+        replicator_mapping = self._preprocess_semantic_labels(id_to_labels)
         for bbox in bboxes:
-            bbox["semanticId"] = VisionSensor.SEMANTIC_REMAPPER.remap_bbox(bbox["semanticId"], self._scene)
-        return bboxes
+            bbox["semanticId"] = semantic_class_name_to_id()[replicator_mapping[bbox["semanticId"]]]
+        info = {semantic_class_name_to_id()[val]: val for val in replicator_mapping.values()}
+        return bboxes, info
 
     def add_modality(self, modality):
         # Check if we already have this modality (if so, no need to initialize it explicitly)

diff --git a/omnigibson/transition_rules.py b/omnigibson/transition_rules.py
@@ -1,5 +1,6 @@
 import itertools
 import json
+import math
 import operator
 import os
 from abc import ABCMeta, abstractmethod
@@ -1816,7 +1817,7 @@ def _spawn_object_in_container(obj):
                 # When ignore_nonrecipe_objects is True, we don't necessarily remove all objects in the container.
                 # Therefore, we need to check for contact when generating output systems.
                 check_contact=self.ignore_nonrecipe_objects,
-                max_samples=int(volume / (np.pi * (out_system.particle_radius**3) * 4 / 3)),
+                max_samples=math.ceil(volume / (np.pi * (out_system.particle_radius**3) * 4 / 3)),
             )
 
         # Return transition results

diff --git a/omnigibson/utils/gym_utils.py b/omnigibson/utils/gym_utils.py
@@ -55,7 +55,7 @@ def recursively_generate_compatible_dict(dic):
             out[k] = recursively_generate_compatible_dict(dic=v)
         elif isinstance(v, np.ndarray) and len(v.dtype) > 0:
             # Map to list of tuples
-            out[k] = list(map(tuple, v))
+            out[k] = tuple(map(tuple, v))
         else:
             # Preserve the key-value pair
             out[k] = v

diff --git a/omnigibson/utils/vision_utils.py b/omnigibson/utils/vision_utils.py
@@ -138,24 +138,6 @@ def remap(self, old_mapping, new_mapping, image, image_keys=None):
 
         return remapped_img, remapped_labels
 
-    def remap_bbox(self, semantic_id, scene):
-        """
-        Remaps a semantic id to a new id using the key_array.
-        Args:
-            semantic_id (int): The semantic id to remap.
-            scene: The scene we are remapping for.
-        Returns:
-            int: The remapped id.
-        """
-        if semantic_id >= len(self.key_array):
-            if semantic_id not in self.warning_printed:
-                og.log.warning(
-                    f"We do not have semantic information about bounding box semantic id {semantic_id} yet. Marking as unlabelled."
-                )
-                self.warning_printed.add(semantic_id)
-            return semantic_class_name_to_id()["unlabelled"]
-        return self.key_array[semantic_id]
-
 
 def randomize_colors(N, bright=True):
     """

diff --git a/tests/test_sensors.py b/tests/test_sensors.py
@@ -1,14 +1,14 @@
 import numpy as np
-import pytest
 from utils import SYSTEM_EXAMPLES, og_test, place_obj_on_floor_plane
 
 import omnigibson as og
 import omnigibson.utils.transform_utils as T
 from omnigibson.sensors import VisionSensor
+from omnigibson.utils.constants import semantic_class_id_to_name
 
 
 @og_test
-def test_seg(env):
+def test_segmentation_modalities(env):
     breakfast_table = env.scene.object_registry("name", "breakfast_table")
     dishtowel = env.scene.object_registry("name", "dishtowel")
     robot = env.scene.robots[0]
@@ -17,6 +17,10 @@ def test_seg(env):
     robot.set_position_orientation([0, 0.8, 0.0], T.euler2quat([0, 0, -np.pi / 2]))
     robot.reset()
 
+    modalities_required = ["seg_semantic", "seg_instance", "seg_instance_id"]
+    for modality in modalities_required:
+        robot.add_obs_modality(modality)
+
     systems = [env.scene.get_system(system_name) for system_name in SYSTEM_EXAMPLES.keys()]
     for i, system in enumerate(systems):
         # Sample two particles for each system
@@ -93,5 +97,42 @@ def test_seg(env):
     # assert set(seg_instance_id_info.values()) == set(expected_dict.values())
 
 
-def test_clear_sim():
-    og.clear()
+@og_test
+def test_bbox_modalities(env):
+    breakfast_table = env.scene.object_registry("name", "breakfast_table")
+    dishtowel = env.scene.object_registry("name", "dishtowel")
+    robot = env.scene.robots[0]
+    place_obj_on_floor_plane(breakfast_table)
+    dishtowel.set_position_orientation([-0.4, 0.0, 0.55], [0, 0, 0, 1])
+    robot.set_position_orientation([0, 0.8, 0.0], T.euler2quat([0, 0, -np.pi / 2]))
+    robot.reset()
+
+    modalities_required = ["bbox_2d_tight", "bbox_2d_loose", "bbox_3d"]
+    for modality in modalities_required:
+        robot.add_obs_modality(modality)
+
+    og.sim.step()
+    for _ in range(3):
+        og.sim.render()
+
+    sensors = [s for s in robot.sensors.values() if isinstance(s, VisionSensor)]
+    assert len(sensors) > 0
+    vision_sensor = sensors[0]
+    all_observation, all_info = vision_sensor.get_obs()
+
+    bbox_2d_tight = all_observation["bbox_2d_tight"]
+    bbox_2d_loose = all_observation["bbox_2d_loose"]
+    bbox_3d = all_observation["bbox_3d"]
+
+    assert bbox_2d_tight.shape[0] == 4
+    assert bbox_2d_loose.shape[0] == 4
+    assert bbox_3d.shape[0] == 3
+
+    bbox_2d_expected_objs = set(["floors", "agent", "breakfast_table", "dishtowel"])
+    bbox_3d_expected_objs = set(["agent", "breakfast_table", "dishtowel"])
+
+    bbox_2d_objs = set([semantic_class_id_to_name()[bbox["semanticId"]] for bbox in bbox_2d_tight])
+    bbox_3d_objs = set([semantic_class_id_to_name()[bbox["semanticId"]] for bbox in bbox_3d])
+
+    assert bbox_2d_objs == bbox_2d_expected_objs
+    assert bbox_3d_objs == bbox_3d_expected_objs
diff --git a/tests/utils.py b/tests/utils.py
@@ -170,7 +170,7 @@ def assert_test_env():
             "robots": [
                 {
                     "type": "Fetch",
-                    "obs_modalities": ["seg_semantic", "seg_instance", "seg_instance_id"],
+                    "obs_modalities": "rgb",
                     "position": [150, 150, 100],
                     "orientation": [0, 0, 0, 1],
                 }