openvinotoolkit · sovrasov · Nov 8, 2024 · Nov 7, 2024 · Nov 7, 2024 · Nov 7, 2024
@@ -8,7 +8,7 @@
 from abc import abstractmethod
 from collections.abc import Iterable
 from contextlib import contextmanager
-from typing import TYPE_CHECKING, Callable, Generic, Iterator, List, Union
+from typing import TYPE_CHECKING, Any, Callable, Generic, Iterator, List, Union
 
 import cv2
 import numpy as np
@@ -92,6 +92,7 @@
         self.image_color_channel = image_color_channel
         self.stack_images = stack_images
         self.to_tv_image = to_tv_image
+
         if self.dm_subset.categories():
             self.label_info = LabelInfo.from_dm_label_groups(self.dm_subset.categories()[AnnotationType.label])
         else:
@@ -141,7 +142,11 @@
         msg = f"Reach the maximum refetch number ({self.max_refetch})"
         raise RuntimeError(msg)
 
-    def _get_img_data_and_shape(self, img: Image) -> tuple[np.ndarray, tuple[int, int]]:
+    def _get_img_data_and_shape(
+        self,
+        img: Image,
+        roi: dict[str, Any] | None = None,
+    ) -> tuple[np.ndarray, tuple[int, int]]:
         key = img.path if isinstance(img, ImageFromFile) else id(img)
 
         if (img_data := self.mem_cache_handler.get(key=key)[0]) is not None:
@@ -158,6 +163,18 @@
             msg = "Cannot get image data"
             raise RuntimeError(msg)
 
+        if roi:
+            # extract ROI from image
+            shape = roi["shape"]
+            h, w = img_data.shape[:2]
+            x1, y1, x2, y2 = (
+                np.trunc(shape["x1"] * w),
+                np.trunc(shape["y1"] * h),
+                np.ceil(shape["x2"] * w),
+                np.ceil(shape["y2"] * h),
+            )
+            img_data = img_data[int(y1) : int(y2), int(x1) : int(x2)]
+
         img_data = self._cache_img(key=key, img_data=img_data.astype(np.uint8))
 
         return img_data, img_data.shape[:2]

@@ -32,18 +32,18 @@
     def _get_item_impl(self, index: int) -> MulticlassClsDataEntity | None:
         item = self.dm_subset[index]
         img = item.media_as(Image)
-        img_data, img_shape = self._get_img_data_and_shape(img)
+        roi = item.attributes.get("roi", None)
+        img_data, img_shape = self._get_img_data_and_shape(img, roi)
+        if roi:
+            # extract labels from ROI
+            labels_ids = [
+                label["label"]["_id"] for label in roi["labels"] if label["label"]["domain"] == "CLASSIFICATION"
+            ]
+            label_anns = [self.label_info.label_names.index(label_id) for label_id in labels_ids]
+        else:
+            # extract labels from annotations
+            label_anns = [ann.label for ann in item.annotations if isinstance(ann, Label)]
 
-        label_anns = []
-        for ann in item.annotations:
-            if isinstance(ann, Label):
-                label_anns.append(ann)
-            else:
-                # If the annotation is not Label, it should be converted to Label.
-                # For Chained Task: Detection (Bbox) -> Classification (Label)
-                label = Label(label=ann.label)
-                if label not in label_anns:
-                    label_anns.append(label)
         if len(label_anns) > 1:
             msg = f"Multi-class Classification can't use the multi-label, currently len(labels) = {len(label_anns)}"
             raise ValueError(msg)
@@ -56,7 +56,7 @@
                 ori_shape=img_shape,
                 image_color_channel=self.image_color_channel,
             ),
-            labels=torch.as_tensor([ann.label for ann in label_anns]),
+            labels=torch.as_tensor(label_anns),
         )
 
         return self._apply_transforms(entity)