diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml index 75341b384d..1d434bcd17 100644 --- a/.github/workflows/publish_to_pypi.yml +++ b/.github/workflows/publish_to_pypi.yml @@ -80,12 +80,12 @@ jobs: file_glob: true - name: Publish package distributions to PyPI if: ${{ steps.check-tag.outputs.match != '' }} - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: password: ${{ secrets.PYPI_API_TOKEN }} - name: Publish package distributions to TestPyPI if: ${{ steps.check-tag.outputs.match == '' }} - uses: pypa/gh-action-pypi-publish@v1.10.1 + uses: pypa/gh-action-pypi-publish@v1.10.2 with: password: ${{ secrets.TESTPYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 0748fcb0d1..6bcdb1e1a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## \[Q4 2024 Release 1.9.1\] +## \[Unreleased\] + +### New features +- Support KITTI 3D format + () +- Add PseudoLabeling transform for unlabeled dataset + () + +### Enhancements +- Raise an appropriate error when exporting a datumaro dataset if its subset name contains path separators. + () +- Update docs for transform plugins + () + +### Bug fixes + +## Q4 2024 Release 1.9.1 ### New features ### Enhancements @@ -22,6 +38,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### New features - Add a new CLI command: datum format () +- Add a new Cuboid2D annotation type + () - Support language dataset for DmTorchDataset () diff --git a/docs/source/docs/command-reference/context_free/transform.md b/docs/source/docs/command-reference/context_free/transform.md index c33bdc8359..7a63a90c55 100644 --- a/docs/source/docs/command-reference/context_free/transform.md +++ b/docs/source/docs/command-reference/context_free/transform.md @@ -101,7 +101,10 @@ Basic dataset item manipulations: - [`remove_images`](#remove_images) - Removes specific images - [`remove_annotations`](#remove_annotations) - Removes annotations - [`remove_attributes`](#remove_attributes) - Removes attributes -- [`astype_annotations`](#astype_annotations) - Convert annotation type +- [`astype_annotations`](#astype_annotations) - Transforms annotation types +- [`pseudo_labeling`](#pseudo_labeling) - Generates pseudo labels for unlabeled data +- [`correct`](#correct) - Corrects annotaiton types +- [`clean`](#clean) - Removes noisy data for tabular dataset Subset manipulations: - [`random_split`](#random_split) - Splits dataset into subsets @@ -826,6 +829,35 @@ bbox_values_decrement [-h] Optional arguments: - `-h`, `--help` (flag) - Show this help message and exit +#### `pseudo_labeling` + +Assigns pseudo-labels to items in a dataset based on their similarity to predefined labels. This class is useful for semi-supervised learning when dealing with missing or uncertain labels. + +The process includes: + +- Similarity Computation: Uses hashing techniques to compute the similarity between items and predefined labels. +- Pseudo-Label Assignment: Assigns the most similar label as a pseudo-label to each item. + +Attributes: + +- `extractor` (IDataset) - Provides access to dataset items and their annotations. +- `labels` (Optional[List[str]]) - List of predefined labels for pseudo-labeling. Defaults to all available labels if not provided. +- `explorer` (Optional[Explorer]) - Computes hash keys for items and labels. If not provided, a new Explorer is created. + +Usage: +```console +pseudo_labeling [-h] [--labels LABELS] + +Optional arguments: +- `-h`, `--help` (flag) - Show this help message and exit +- `--labels` (str) - Comma-separated list of label names for pseudo-labeling + +Examples: +- Assign pseudo-labels based on predefined labels + ```console + datum transform -t pseudo_labeling -- --labels 'label1,label2' + ``` + #### `correct` Correct the dataset from a validation report @@ -838,3 +870,27 @@ correct [-h] [-r REPORT_PATH] Optional arguments: - `-h`, `--help` (flag) - Show this help message and exit - `-r`, `--reports` (str) - A validation report from a 'validate' CLI (default=validation_reports.json) + +#### `clean` + +Refines and preprocesses media items in a dataset, focusing on string, numeric, and categorical data. This transform is designed to clean and improve the quality of the data, making it more suitable for analysis and modeling. + +The cleaning process includes: + +- String Data: Removes unnecessary characters using NLP techniques. +- Numeric Data: Identifies and handles outliers and missing values. +- Categorical Data: Cleans and refines categorical information. + +Usage: +```console +clean [-h] +``` + +Optional arguments: +- `-h`, `--help` (flag) - Show this help message and exit + +Examples: +- Clean and preprocess dataset items + ```console + datum transform -t clean + ``` diff --git a/docs/source/docs/data-formats/formats/datumaro.md b/docs/source/docs/data-formats/formats/datumaro.md index b12f1af6a1..0e9f1abfe8 100644 --- a/docs/source/docs/data-formats/formats/datumaro.md +++ b/docs/source/docs/data-formats/formats/datumaro.md @@ -73,6 +73,8 @@ A Datumaro dataset directory should have the following structure: └── ... ``` +Note that the subset name shouldn't contain path separators. + If your dataset is not following the above directory structure, it cannot detect and import your dataset as the Datumaro format properly. diff --git a/docs/source/docs/data-formats/formats/datumaro_binary.md b/docs/source/docs/data-formats/formats/datumaro_binary.md index 7b724b3734..a970d135a5 100644 --- a/docs/source/docs/data-formats/formats/datumaro_binary.md +++ b/docs/source/docs/data-formats/formats/datumaro_binary.md @@ -113,6 +113,8 @@ A DatumaroBinary dataset directory should have the following structure: └── ... ``` +Note that the subset name shouldn't contain path separators. + If your dataset is not following the above directory structure, it cannot detect and import your dataset as the DatumaroBinary format properly. diff --git a/src/datumaro/components/annotation.py b/src/datumaro/components/annotation.py index c68af00c4a..c43599227f 100644 --- a/src/datumaro/components/annotation.py +++ b/src/datumaro/components/annotation.py @@ -50,6 +50,7 @@ class AnnotationType(IntEnum): feature_vector = 13 tabular = 14 rotated_bbox = 15 + cuboid_2d = 16 COORDINATE_ROUNDING_DIGITS = 2 @@ -1363,6 +1364,41 @@ def wrap(item, **kwargs): return attr.evolve(item, **d) +@attrs(slots=True, init=False, order=False) +class Cuboid2D(Annotation): + """ + Cuboid2D annotation class. This class represents a 3D bounding box defined by its point coordinates + in the following way: + [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x5, y5), (x6, y6), (x7, y7), (x8, y8)]. + + + 6---7 + /| /| + 5-+-8 | + | 2 + 3 + |/ |/ + 1---4 + + Attributes: + _type (AnnotationType): The type of annotation, set to `AnnotationType.bbox`. + + Methods: + __init__: Initializes the Cuboid2D with its coordinates. + wrap: Creates a new Bbox instance with updated attributes. + """ + + _type = AnnotationType.cuboid_2d + points = field(default=None) + label: Optional[int] = field( + converter=attr.converters.optional(int), default=None, kw_only=True + ) + z_order: int = field(default=0, validator=default_if_none(int), kw_only=True) + + def __init__(self, _points: Iterable[Tuple[float, float]], *args, **kwargs): + kwargs.pop("points", None) # comes from wrap() + self.__attrs_init__(points=_points, *args, **kwargs) + + @attrs(slots=True, order=False) class PointsCategories(Categories): """ diff --git a/src/datumaro/components/annotations/matcher.py b/src/datumaro/components/annotations/matcher.py index db9322722a..eb7c874cc4 100644 --- a/src/datumaro/components/annotations/matcher.py +++ b/src/datumaro/components/annotations/matcher.py @@ -35,6 +35,7 @@ "ImageAnnotationMatcher", "HashKeyMatcher", "FeatureVectorMatcher", + "Cuboid2DMatcher", ] @@ -378,3 +379,8 @@ def distance(self, a, b): b = Points([p for pt in b.as_polygon() for p in pt]) return OKS(a, b, sigma=self.sigma) + + +@attrs +class Cuboid2DMatcher(ShapeMatcher): + pass diff --git a/src/datumaro/components/annotations/merger.py b/src/datumaro/components/annotations/merger.py index c1c356f81b..8ff7593a61 100644 --- a/src/datumaro/components/annotations/merger.py +++ b/src/datumaro/components/annotations/merger.py @@ -12,6 +12,7 @@ AnnotationMatcher, BboxMatcher, CaptionsMatcher, + Cuboid2DMatcher, Cuboid3dMatcher, FeatureVectorMatcher, HashKeyMatcher, @@ -210,3 +211,8 @@ class TabularMerger(AnnotationMerger, TabularMatcher): @attrs class RotatedBboxMerger(_ShapeMerger, RotatedBboxMatcher): pass + + +@attrs +class Cuboid2DMerger(_ShapeMerger, Cuboid2DMatcher): + pass diff --git a/src/datumaro/components/errors.py b/src/datumaro/components/errors.py index c850fcc551..446c16a548 100644 --- a/src/datumaro/components/errors.py +++ b/src/datumaro/components/errors.py @@ -342,6 +342,16 @@ def __str__(self): return f"Item {self.item_id} is repeated in the source sequence." +@define(auto_exc=False) +class PathSeparatorInSubsetNameError(DatasetError): + subset: str = field() + + def __str__(self): + return ( + f"Failed to export the subset '{self.subset}': subset name contains path separator(s)." + ) + + class DatasetQualityError(DatasetError): pass diff --git a/src/datumaro/components/merge/intersect_merge.py b/src/datumaro/components/merge/intersect_merge.py index 26677661ea..bb545f950d 100644 --- a/src/datumaro/components/merge/intersect_merge.py +++ b/src/datumaro/components/merge/intersect_merge.py @@ -19,6 +19,7 @@ AnnotationMerger, BboxMerger, CaptionsMerger, + Cuboid2DMerger, Cuboid3dMerger, EllipseMerger, FeatureVectorMerger, @@ -455,6 +456,8 @@ def _for_type(t, **kwargs): return _make(TabularMerger, **kwargs) elif t is AnnotationType.rotated_bbox: return _make(RotatedBboxMerger, **kwargs) + elif t is AnnotationType.cuboid_2d: + return _make(Cuboid2DMerger, **kwargs) else: raise NotImplementedError("Type %s is not supported" % t) diff --git a/src/datumaro/components/visualizer.py b/src/datumaro/components/visualizer.py index 7030165871..12b4acc05a 100644 --- a/src/datumaro/components/visualizer.py +++ b/src/datumaro/components/visualizer.py @@ -19,6 +19,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, DepthAnnotation, Ellipse, @@ -661,6 +662,39 @@ def _draw_cuboid_3d( ) -> None: raise NotImplementedError(f"{ann.type} is not implemented yet.") + def _draw_cuboid_2d( + self, + ann: Cuboid2D, + label_categories: Optional[LabelCategories], + fig: Figure, + ax: Axes, + context: List, + ) -> None: + import matplotlib.patches as patches + + points = ann.points + color = self._get_color(ann) + label_text = label_categories[ann.label].name if label_categories is not None else ann.label + + # Define the faces based on vertex indices + + faces = [ + [points[i] for i in [0, 1, 2, 3]], # Bottom face + [points[i] for i in [4, 5, 6, 7]], # Top face + [points[i] for i in [0, 1, 5, 4]], # Front face + [points[i] for i in [1, 2, 6, 5]], # Right face + [points[i] for i in [2, 3, 7, 6]], # Back face + [points[i] for i in [3, 0, 4, 7]], # Left face + ] + ax.text(points[0][0], points[0][1] - self.text_y_offset, label_text, color=color) + + # Draw each face + for face in faces: + polygon = patches.Polygon( + face, fill=False, linewidth=self.bbox_linewidth, edgecolor=color + ) + ax.add_patch(polygon) + def _draw_super_resolution_annotation( self, ann: SuperResolutionAnnotation, diff --git a/src/datumaro/plugins/data_formats/datumaro/base.py b/src/datumaro/plugins/data_formats/datumaro/base.py index ee7a8cdc21..a4034269f7 100644 --- a/src/datumaro/plugins/data_formats/datumaro/base.py +++ b/src/datumaro/plugins/data_formats/datumaro/base.py @@ -11,6 +11,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, GroupType, @@ -378,6 +379,18 @@ def _load_annotations(self, item: Dict): elif ann_type == AnnotationType.hash_key: continue + elif ann_type == AnnotationType.cuboid_2d: + loaded.append( + Cuboid2D( + list(map(tuple, points)), + label=label_id, + id=ann_id, + attributes=attributes, + group=group, + object_id=object_id, + z_order=z_order, + ) + ) else: raise NotImplementedError() except Exception as e: diff --git a/src/datumaro/plugins/data_formats/datumaro/exporter.py b/src/datumaro/plugins/data_formats/datumaro/exporter.py index 494492cbe8..a470b66b8f 100644 --- a/src/datumaro/plugins/data_formats/datumaro/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro/exporter.py @@ -20,6 +20,7 @@ Annotation, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, HashKey, @@ -37,6 +38,7 @@ from datumaro.components.crypter import NULL_CRYPTER from datumaro.components.dataset_base import DatasetItem from datumaro.components.dataset_item_storage import ItemStatus +from datumaro.components.errors import PathSeparatorInSubsetNameError from datumaro.components.exporter import ExportContextComponent, Exporter from datumaro.components.media import Image, MediaElement, PointCloud, Video, VideoFrame from datumaro.util import cast, dump_json_file @@ -184,7 +186,8 @@ def context_save_media( if context.save_media: fname = context.make_video_filename(item) - context.save_video(item, fname=fname, subdir=item.subset) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_video(item, fname=fname, subdir=subdir) item.media = Video( path=fname, step=video._step, @@ -199,7 +202,8 @@ def context_save_media( if context.save_media: fname = context.make_video_filename(item) - context.save_video(item, fname=fname, subdir=item.subset) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_video(item, fname=fname, subdir=subdir) item.media = VideoFrame(Video(fname), video_frame.index) yield @@ -209,8 +213,9 @@ def context_save_media( if context.save_media: # Temporarily update image path and save it. - fname = context.make_image_filename(item) - context.save_image(item, encryption=encryption, fname=fname, subdir=item.subset) + fname = context.make_image_filename(item, name=str(item.id).replace(os.sep, "_")) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_image(item, encryption=encryption, fname=fname, subdir=subdir) item.media = Image.from_file(path=fname, size=image._size) yield @@ -219,14 +224,18 @@ def context_save_media( pcd = item.media_as(PointCloud) if context.save_media: - pcd_fname = context.make_pcd_filename(item) - context.save_point_cloud(item, fname=pcd_fname, subdir=item.subset) + pcd_name = str(item.id).replace(os.sep, "_") + pcd_fname = context.make_pcd_filename(item, name=pcd_name) + subdir = item.subset.replace(os.sep, "_") if item.subset else None + context.save_point_cloud(item, fname=pcd_fname, subdir=subdir) extra_images = [] for i, extra_image in enumerate(pcd.extra_images): extra_images.append( Image.from_file( - path=context.make_pcd_extra_image_filename(item, i, extra_image) + path=context.make_pcd_extra_image_filename( + item, i, extra_image, name=f"{pcd_name}/extra_image_{i}" + ) ) ) @@ -311,6 +320,8 @@ def _gen_item_desc(self, item: DatasetItem, *args, **kwargs) -> Dict: converted_ann = self._convert_ellipse_object(ann) elif isinstance(ann, HashKey): continue + elif isinstance(ann, Cuboid2D): + converted_ann = self._convert_cuboid_2d_object(ann) else: raise NotImplementedError() annotations.append(converted_ann) @@ -435,6 +446,18 @@ def _convert_cuboid_3d_object(self, obj): def _convert_ellipse_object(self, obj: Ellipse): return self._convert_shape_object(obj) + def _convert_cuboid_2d_object(self, obj: Cuboid2D): + converted = self._convert_annotation(obj) + + converted.update( + { + "label_id": cast(obj.label, int), + "points": obj.points, + "z_order": obj.z_order, + } + ) + return converted + class _StreamSubsetWriter(_SubsetWriter): def __init__( @@ -492,18 +515,27 @@ def create_writer( default_image_ext=self._default_image_ext, ) + if os.path.sep in subset: + raise PathSeparatorInSubsetNameError(subset) + return ( _SubsetWriter( context=self, subset=subset, - ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + ann_file=osp.join( + self._annotations_dir, + subset + self.PATH_CLS.ANNOTATION_EXT, + ), export_context=export_context, ) if not self._stream else _StreamSubsetWriter( context=self, subset=subset, - ann_file=osp.join(self._annotations_dir, subset + self.PATH_CLS.ANNOTATION_EXT), + ann_file=osp.join( + self._annotations_dir, + subset + self.PATH_CLS.ANNOTATION_EXT, + ), export_context=export_context, ) ) diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py index a1c86d5061..0b257334fb 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/exporter.py @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2024 Intel Corporation # # SPDX-License-Identifier: MIT @@ -15,7 +15,7 @@ from datumaro.components.crypter import NULL_CRYPTER, Crypter from datumaro.components.dataset_base import DatasetItem, IDataset -from datumaro.components.errors import DatumaroError +from datumaro.components.errors import DatumaroError, PathSeparatorInSubsetNameError from datumaro.components.exporter import ExportContext, ExportContextComponent, Exporter from datumaro.plugins.data_formats.datumaro.exporter import DatumaroExporter from datumaro.plugins.data_formats.datumaro.exporter import _SubsetWriter as __SubsetWriter @@ -309,6 +309,9 @@ def create_writer( default_image_ext=self._default_image_ext, ) + if osp.sep in subset: + raise PathSeparatorInSubsetNameError(subset) + return _SubsetWriter( context=self, subset=subset, diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py index cefedf4cbd..01ee56d60a 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/__init__.py @@ -22,6 +22,7 @@ "CaptionMapper", "Cuboid3dMapper", "EllipseMapper", + "Cuboid2DMapper", # common "Mapper", "DictMapper", diff --git a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py index 4c7269719e..c26658bc64 100644 --- a/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py +++ b/src/datumaro/plugins/data_formats/datumaro_binary/mapper/annotation.py @@ -12,6 +12,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, Label, @@ -270,6 +271,33 @@ def backward(cls, _bytes: bytes, offset: int = 0) -> Tuple[Ellipse, int]: return Ellipse(x, y, x2, y2, **shape_dict), offset +class Cuboid2DMapper(AnnotationMapper): + ann_type = AnnotationType.cuboid_2d + + @classmethod + def forward(cls, ann: Shape) -> bytes: + _bytearray = bytearray() + _bytearray.extend(struct.pack(" Tuple[Ellipse, int]: + ann_dict, offset = super().backward_dict(_bytes, offset) + label, z_order = struct.unpack_from(" bytes: _bytearray.extend(Cuboid3dMapper.forward(ann)) elif isinstance(ann, Ellipse): _bytearray.extend(EllipseMapper.forward(ann)) + elif isinstance(ann, Cuboid2D): + _bytearray.extend(Cuboid2DMapper.forward(ann)) else: raise NotImplementedError() diff --git a/src/datumaro/plugins/data_formats/kitti/importer.py b/src/datumaro/plugins/data_formats/kitti/importer.py index 2880301901..c1e53fad0c 100644 --- a/src/datumaro/plugins/data_formats/kitti/importer.py +++ b/src/datumaro/plugins/data_formats/kitti/importer.py @@ -99,7 +99,7 @@ class KittiDetectionImporter(KittiImporter): @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: # left color camera label files - context.require_file(f"**/label_2/*{cls._ANNO_EXT}") + context.require_file(f"**/label_2/*_*{cls._ANNO_EXT}") return cls.DETECT_CONFIDENCE @classmethod diff --git a/src/datumaro/plugins/data_formats/kitti_3d/__init__.py b/src/datumaro/plugins/data_formats/kitti_3d/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/datumaro/plugins/data_formats/kitti_3d/base.py b/src/datumaro/plugins/data_formats/kitti_3d/base.py new file mode 100644 index 0000000000..340792c14b --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/base.py @@ -0,0 +1,142 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import glob +import logging +import os.path as osp +from typing import List, Optional, Type, TypeVar + +from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories +from datumaro.components.dataset_base import DatasetItem, SubsetBase +from datumaro.components.errors import InvalidAnnotationError +from datumaro.components.importer import ImportContext +from datumaro.components.media import Image, PointCloud +from datumaro.util.image import find_images + +from .format import Kitti3dPath + +T = TypeVar("T") + + +class Kitti3dBase(SubsetBase): + # https://www.cvlibs.net/datasets/kitti/eval_object.php?obj_benchmark=3d + + def __init__( + self, + path: str, + *, + subset: Optional[str] = None, + ctx: Optional[ImportContext] = None, + ): + assert osp.isdir(path), path + super().__init__(subset=subset, media_type=PointCloud, ctx=ctx) + + self._path = path + + common_attrs = {"truncated", "occluded", "alpha", "dimensions", "location", "rotation_y"} + self._categories = {AnnotationType.label: LabelCategories(attributes=common_attrs)} + self._items = self._load_items() + + def _load_items(self) -> List[DatasetItem]: + items = [] + image_dir = osp.join(self._path, Kitti3dPath.IMAGE_DIR) + image_path_by_id = { + osp.splitext(osp.relpath(p, image_dir))[0]: p + for p in find_images(image_dir, recursive=True) + } + + ann_dir = osp.join(self._path, Kitti3dPath.LABEL_DIR) + label_categories = self._categories[AnnotationType.label] + + for labels_path in sorted(glob.glob(osp.join(ann_dir, "*.txt"), recursive=True)): + item_id = osp.splitext(osp.relpath(labels_path, ann_dir))[0] + anns = [] + + try: + with open(labels_path, "r", encoding="utf-8") as f: + lines = f.readlines() + except IOError as e: + logging.error(f"Error reading file {labels_path}: {e}") + continue + + for line_idx, line in enumerate(lines): + line = line.split() + if len(line) not in [15, 16]: + logging.warning( + f"Unexpected line length {len(line)} in file {labels_path} at line {line_idx + 1}" + ) + continue + + label_name = line[0] + label_id = label_categories.find(label_name)[0] + if label_id is None: + label_id = label_categories.add(label_name) + + try: + x1 = self._parse_field(line[4], float, "bbox left-top x") + y1 = self._parse_field(line[5], float, "bbox left-top y") + x2 = self._parse_field(line[6], float, "bbox right-bottom x") + y2 = self._parse_field(line[7], float, "bbox right-bottom y") + + attributes = { + "truncated": self._parse_field(line[1], float, "truncated"), + "occluded": self._parse_field(line[2], int, "occluded"), + "alpha": self._parse_field(line[3], float, "alpha"), + "dimensions": [ + self._parse_field(line[8], float, "height (in meters)"), + self._parse_field(line[9], float, "width (in meters)"), + self._parse_field(line[10], float, "length (in meters)"), + ], + "location": [ + self._parse_field(line[11], float, "x (in meters)"), + self._parse_field(line[12], float, "y (in meters)"), + self._parse_field(line[13], float, "z (in meters)"), + ], + "rotation_y": self._parse_field(line[14], float, "rotation_y"), + } + except ValueError as e: + logging.error(f"Error parsing line {line_idx + 1} in file {labels_path}: {e}") + continue + + anns.append( + Bbox( + x=x1, + y=y1, + w=x2 - x1, + h=y2 - y1, + id=line_idx, + attributes=attributes, + label=label_id, + ) + ) + self._ann_types.add(AnnotationType.bbox) + + image = image_path_by_id.pop(item_id, None) + if image: + image = Image.from_file(path=image) + + items.append( + DatasetItem( + id=item_id, + subset=self._subset, + media=PointCloud.from_file( + path=osp.join(self._path, Kitti3dPath.PCD_DIR, item_id + ".bin"), + extra_images=[image], + ), + attributes={ + "calib_path": osp.join(self._path, Kitti3dPath.CALIB_DIR, item_id + ".txt") + }, + annotations=anns, + ) + ) + + return items + + def _parse_field(self, value: str, desired_type: Type[T], field_name: str) -> T: + try: + return desired_type(value) + except Exception as e: + raise InvalidAnnotationError( + f"Can't parse {field_name} from '{value}'. Expected {desired_type}" + ) from e diff --git a/src/datumaro/plugins/data_formats/kitti_3d/format.py b/src/datumaro/plugins/data_formats/kitti_3d/format.py new file mode 100644 index 0000000000..98a883428d --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/format.py @@ -0,0 +1,12 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +import os.path as osp + + +class Kitti3dPath: + PCD_DIR = osp.join("velodyne") + IMAGE_DIR = "image_2" + LABEL_DIR = "label_2" + CALIB_DIR = "calib" diff --git a/src/datumaro/plugins/data_formats/kitti_3d/importer.py b/src/datumaro/plugins/data_formats/kitti_3d/importer.py new file mode 100644 index 0000000000..3be488b71f --- /dev/null +++ b/src/datumaro/plugins/data_formats/kitti_3d/importer.py @@ -0,0 +1,45 @@ +# Copyright (C) 2024 Intel Corporation +# +# SPDX-License-Identifier: MIT + +from typing import List + +from datumaro.components.errors import DatasetImportError +from datumaro.components.format_detection import FormatDetectionConfidence, FormatDetectionContext +from datumaro.components.importer import Importer + +from .format import Kitti3dPath + + +class Kitti3dImporter(Importer): + _ANNO_EXT = ".txt" + + @classmethod + def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: + context.require_file(f"{Kitti3dPath.PCD_DIR}/*.bin") + cls._check_ann_file(context.require_file(f"{Kitti3dPath.LABEL_DIR}/*.txt"), context) + return FormatDetectionConfidence.MEDIUM + + @classmethod + def _check_ann_file(cls, fpath: str, context: FormatDetectionContext) -> bool: + with context.probe_text_file( + fpath, "Requirements for the annotation file of Kitti 3D format" + ) as fp: + for line in fp: + fields = line.rstrip("\n").split(" ") + if len(fields) == 15 or len(fields) == 16: + return True + raise DatasetImportError( + f"Kitti 3D format txt file should have 15 or 16 fields for " + f"each line, but the read line has {len(fields)} fields: " + f"fields={fields}." + ) + raise DatasetImportError("Empty file is not allowed.") + + @classmethod + def get_file_extensions(cls) -> List[str]: + return [cls._ANNO_EXT] + + @classmethod + def find_sources(cls, path): + return [{"url": path, "format": "kitti3d"}] diff --git a/src/datumaro/plugins/specs.json b/src/datumaro/plugins/specs.json index 8891b79287..395ff510b0 100644 --- a/src/datumaro/plugins/specs.json +++ b/src/datumaro/plugins/specs.json @@ -799,6 +799,21 @@ ] } }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.base.Kitti3dBase", + "plugin_name": "kitti3d", + "plugin_type": "DatasetBase" + }, + { + "import_path": "datumaro.plugins.data_formats.kitti_3d.importer.Kitti3dImporter", + "plugin_name": "kitti3d", + "plugin_type": "Importer", + "metadata": { + "file_extensions": [ + ".txt" + ] + } + }, { "import_path": "datumaro.plugins.data_formats.kitti_raw.base.KittiRawBase", "plugin_name": "kitti_raw", @@ -1855,13 +1870,13 @@ "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Correct", - "plugin_name": "correct", + "import_path": "datumaro.plugins.transforms.Clean", + "plugin_name": "clean", "plugin_type": "Transform" }, { - "import_path": "datumaro.plugins.transforms.Clean", - "plugin_name": "clean", + "import_path": "datumaro.plugins.transforms.Correct", + "plugin_name": "correct", "plugin_type": "Transform" }, { @@ -1929,6 +1944,11 @@ "plugin_name": "remove_annotations", "plugin_type": "Transform" }, + { + "import_path": "datumaro.plugins.transforms.PseudoLabeling", + "plugin_name": "pseudo_labeling", + "plugin_type": "Transform" + }, { "import_path": "datumaro.plugins.transforms.RemoveAttributes", "plugin_name": "remove_attributes", diff --git a/src/datumaro/plugins/transforms.py b/src/datumaro/plugins/transforms.py index 2aa6811194..f1515940c2 100644 --- a/src/datumaro/plugins/transforms.py +++ b/src/datumaro/plugins/transforms.py @@ -22,6 +22,8 @@ from pandas.api.types import CategoricalDtype import datumaro.util.mask_tools as mask_tools +from datumaro.components.algorithms.hash_key_inference.explorer import Explorer +from datumaro.components.algorithms.hash_key_inference.hashkey_util import calculate_hamming from datumaro.components.annotation import ( AnnotationType, Bbox, @@ -40,6 +42,7 @@ TabularCategories, ) from datumaro.components.cli_plugin import CliPlugin +from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DEFAULT_SUBSET_NAME, DatasetInfo, DatasetItem, IDataset from datumaro.components.errors import ( AnnotationTypeError, @@ -1348,9 +1351,21 @@ def transform_item(self, item: DatasetItem): class Correct(Transform, CliPlugin): """ - Correct the dataset from a validation report. - A user can should feed into validation_reports.json from validator to correct the dataset. - This helps to refine the dataset by rejecting undefined labels, missing annotations, and outliers. + This class provides functionality to correct and refine a dataset based on a validation report.|n + It processes a validation report (typically in JSON format) to identify and rectify various |n + dataset issues, such as undefined labels, missing annotations, outliers, empty labels/captions,|n + and unnecessary characters in captions. The correction process includes:|n + |n + - Adding missing labels and attributes.|n + - Removing or adjusting annotations with invalid or anomalous values.|n + - Filling in missing labels and captions with appropriate values.|n + - Removing unnecessary characters from text-based annotations like captions.|n + - Handling outliers by capping values within specified bounds.|n + - Updating dataset categories and annotations according to the corrections.|n + |n + The class is designed to be used as part of a command-line interface (CLI) and can be |n + configured with different validation reports. It integrates with the dataset extraction |n + process, ensuring that corrections are applied consistently across the dataset.|n """ @classmethod @@ -1746,13 +1761,15 @@ def __iter__(self): class AstypeAnnotations(ItemTransform): """ - Enables the conversion of annotation types for the categories and individual items within a dataset.|n + Converts the types of annotations within a dataset based on a specified mapping.|n |n - Based on a specified mapping, it transforms the annotation types,|n - changing them to 'Label' if they are categorical, and to 'Caption' if they are of type string, float, or integer.|n + This transform changes annotations to 'Label' if they are categorical, and to 'Caption' + if they are of type string, float, or integer. This is particularly useful when working + with tabular data that needs to be converted into a format suitable for specific machine + learning tasks.|n |n Examples:|n - - Convert type of `title` annotation|n + - Converts the type of a `title` annotation:|n .. code-block:: @@ -2004,3 +2021,64 @@ def transform_item(self, item): refined_annotations.append(ann) return self.wrap_item(item, media=refined_media, annotations=refined_annotations) + + +class PseudoLabeling(ItemTransform): + """ + A class used to assign pseudo-labels to items in a dataset based on + their similarity to predefined labels.|n + |n + This class leverages hashing techniques to compute the similarity + between dataset items and a set of predefined labels.|n + It assigns the most similar label as a pseudo-label to each item. + This is particularly useful in semi-supervised + learning scenarios where some labels are missing or uncertain.|n + |n + Attributes:|n + - extractor : IDataset|n + The dataset extractor that provides access to dataset items and their annotations.|n + - labels : Optional[List[str]]|n + A list of label names to be used for pseudo-labeling. + If not provided, all available labels in the dataset will be used.|n + - explorer : Optional[Explorer]|n + An optional Explorer object used to compute hash keys for items and labels. + If not provided, a new Explorer will be created.|n + """ + + def __init__( + self, + extractor: IDataset, + labels: Optional[List[str]] = None, + explorer: Optional[Explorer] = None, + ): + super().__init__(extractor) + + self._categories = self._extractor.categories() + self._labels = labels + self._explorer = explorer + self._label_indices = self._categories[AnnotationType.label]._indices + + if not self._labels: + self._labels = list(self._label_indices.keys()) + if not self._explorer: + self._explorer = Explorer(Dataset.from_iterable(list(self._extractor))) + + label_hashkeys = [ + np.unpackbits(self._explorer._get_hash_key_from_text_query(label).hash_key, axis=-1) + for label in self._labels + ] + self._label_hashkeys = np.stack(label_hashkeys, axis=0) + + def categories(self): + return self._categories + + def transform_item(self, item: DatasetItem): + hashkey_ = np.unpackbits(self._explorer._get_hash_key_from_item_query(item).hash_key) + logits = calculate_hamming(hashkey_, self._label_hashkeys) + inverse_distances = 1.0 / (logits + 1e-6) + probs = inverse_distances / np.sum(inverse_distances) + ind = np.argsort(probs)[::-1] + + pseudo = np.array(self._labels)[ind][0] + pseudo_annotation = [Label(label=self._label_indices[pseudo])] + return self.wrap_item(item, annotations=pseudo_annotation) diff --git a/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt new file mode 100644 index 0000000000..367f0b263a --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/training/calib/000001.txt @@ -0,0 +1,7 @@ +P0: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 0.000000000000e+00 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P1: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.875744000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 0.000000000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 0.000000000000e+00 +P2: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 4.485728000000e+01 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.163791000000e-01 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.745884000000e-03 +P3: 7.215377000000e+02 0.000000000000e+00 6.095593000000e+02 -3.395242000000e+02 0.000000000000e+00 7.215377000000e+02 1.728540000000e+02 2.199936000000e+00 0.000000000000e+00 0.000000000000e+00 1.000000000000e+00 2.729905000000e-03 +R0_rect: 9.999239000000e-01 9.837760000000e-03 -7.445048000000e-03 -9.869795000000e-03 9.999421000000e-01 -4.278459000000e-03 7.402527000000e-03 4.351614000000e-03 9.999631000000e-01 +Tr_velo_to_cam: 7.533745000000e-03 -9.999714000000e-01 -6.166020000000e-04 -4.069766000000e-03 1.480249000000e-02 7.280733000000e-04 -9.998902000000e-01 -7.631618000000e-02 9.998621000000e-01 7.523790000000e-03 1.480755000000e-02 -2.717806000000e-01 +Tr_imu_to_velo: 9.999976000000e-01 7.553071000000e-04 -2.035826000000e-03 -8.086759000000e-01 -7.854027000000e-04 9.998898000000e-01 -1.482298000000e-02 3.195559000000e-01 2.024406000000e-03 1.482454000000e-02 9.998881000000e-01 -7.997231000000e-01 \ No newline at end of file diff --git a/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png b/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png new file mode 100644 index 0000000000..e6f3cff877 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/image_2/000001.png differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt b/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt new file mode 100644 index 0000000000..a2fdc0fa6f --- /dev/null +++ b/tests/assets/kitti_dataset/kitti_3d/training/label_2/000001.txt @@ -0,0 +1,3 @@ +Truck 0.00 0 -1.57 600 150 630 190 2.85 2.63 12.34 0.47 1.49 69.44 -1.56 +Car 0.00 3 -1.65 650 160 700 200 1.86 0.60 2.02 4.59 1.32 45.84 -1.55 +DontCare -1 -1 -10 500 170 590 190 -1 -1 -1 -1000 -1000 -1000 -10 diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin new file mode 100644 index 0000000000..d6089802fb Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000001.bin differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin new file mode 100644 index 0000000000..50a1df582a Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000002.bin differ diff --git a/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin new file mode 100644 index 0000000000..1eb847a044 Binary files /dev/null and b/tests/assets/kitti_dataset/kitti_3d/training/velodyne/000003.bin differ diff --git a/tests/unit/data_formats/datumaro/conftest.py b/tests/unit/data_formats/datumaro/conftest.py index e600ae957c..9d08a31700 100644 --- a/tests/unit/data_formats/datumaro/conftest.py +++ b/tests/unit/data_formats/datumaro/conftest.py @@ -15,6 +15,7 @@ AnnotationType, Bbox, Caption, + Cuboid2D, Cuboid3d, Ellipse, Label, @@ -122,6 +123,25 @@ def fxt_test_datumaro_format_dataset(): "y": "2", }, ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), ], ), DatasetItem( @@ -201,6 +221,191 @@ def fxt_test_datumaro_format_dataset(): ) +@pytest.fixture +def fxt_test_datumaro_format_dataset_with_path_separator(): + label_categories = LabelCategories(attributes={"a", "b", "score"}) + for i in range(5): + label_categories.add("cat" + str(i), attributes={"x", "y"}) + + mask_categories = MaskCategories(generate_colormap(len(label_categories.items))) + + points_categories = PointsCategories() + for index, _ in enumerate(label_categories.items): + points_categories.add(index, ["cat1", "cat2"], joints=[[0, 1]]) + + sep = os.path.sep + return Dataset.from_iterable( + [ + DatasetItem( + id="100/0", + subset=f"my{sep}train", + media=Image.from_numpy(data=np.ones((10, 6, 3))), + annotations=[ + Caption("hello", id=1), + Caption("world", id=2, group=5), + Label( + 2, + id=3, + attributes={ + "x": 1, + "y": "2", + }, + ), + Bbox( + 1, + 2, + 3, + 4, + label=4, + id=4, + z_order=1, + attributes={ + "score": 1.0, + }, + ), + Bbox( + 5, + 6, + 7, + 8, + id=5, + group=5, + attributes={ + "a": 1.5, + "b": "text", + }, + ), + Points( + [1, 2, 2, 0, 1, 1], + label=0, + id=5, + z_order=4, + attributes={ + "x": 1, + "y": "2", + }, + ), + Mask( + label=3, + id=5, + z_order=2, + image=np.ones((2, 3)), + attributes={ + "x": 1, + "y": "2", + }, + ), + Ellipse( + 5, + 6, + 7, + 8, + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), + ], + ), + DatasetItem( + id=21, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset="train", + annotations=[ + Caption("test"), + Label(2), + Bbox(1, 2, 3, 4, label=5, id=42, group=42), + ], + ), + DatasetItem( + id=2, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset=f"my{sep}val", + annotations=[ + PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), + Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), + ], + ), + DatasetItem( + id="1/1", + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset="test", + annotations=[ + Cuboid3d( + [1.0, 2.0, 3.0], + [2.0, 2.0, 4.0], + [1.0, 3.0, 4.0], + id=6, + label=0, + attributes={"occluded": True}, + group=6, + ) + ], + ), + DatasetItem( + id=42, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + subset=f"my{sep}test", + attributes={"a1": 5, "a2": "42"}, + ), + DatasetItem( + id=42, + media=Image.from_numpy(data=np.ones((10, 6, 3))), + # id and group integer value can be higher than 32bits limits (COCO instances). + annotations=[ + Mask( + id=900100087038, group=900100087038, image=np.ones((2, 3), dtype=np.uint8) + ), + RleMask( + rle=mask_tools.encode(np.ones((2, 3), dtype=np.uint8, order="F")), + id=900100087038, + group=900100087038, + ), + ], + ), + DatasetItem( + id="1/b/c", + media=Image.from_file(path="1/b/c.qq", size=(2, 4)), + ), + ], + categories={ + AnnotationType.label: label_categories, + AnnotationType.mask: mask_categories, + AnnotationType.points: points_categories, + }, + infos={ + "string": "test", + "int": 0, + "float": 0.0, + "string_list": ["test0", "test1", "test2"], + "int_list": [0, 1, 2], + "float_list": [0.0, 0.1, 0.2], + }, + ) + + @pytest.fixture def fxt_test_datumaro_format_video_dataset(test_dir) -> Dataset: video_path = osp.join(test_dir, "video.avi") diff --git a/tests/unit/data_formats/datumaro/test_datumaro_format.py b/tests/unit/data_formats/datumaro/test_datumaro_format.py index 2492c072b6..bb03455b0d 100644 --- a/tests/unit/data_formats/datumaro/test_datumaro_format.py +++ b/tests/unit/data_formats/datumaro/test_datumaro_format.py @@ -14,6 +14,7 @@ from datumaro.components.dataset_base import DatasetItem from datumaro.components.environment import Environment +from datumaro.components.errors import PathSeparatorInSubsetNameError from datumaro.components.importer import DatasetImportError from datumaro.components.media import Image from datumaro.components.project import Dataset @@ -155,6 +156,31 @@ def test_can_save_and_load( stream=stream, ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + @pytest.mark.parametrize("require_media", [True, False]) + @pytest.mark.parametrize("stream", [True, False]) + def test_cannot_export_dataset_with_subset_containing_path_separators( + self, + fxt_test_datumaro_format_dataset_with_path_separator, + test_dir, + fxt_import_kwargs, + fxt_export_kwargs, + stream, + require_media, + helper_tc, + ): + with pytest.raises(PathSeparatorInSubsetNameError): + self._test_save_and_load( + helper_tc, + fxt_test_datumaro_format_dataset_with_path_separator, + partial(self.exporter.convert, save_media=True, stream=stream, **fxt_export_kwargs), + test_dir, + compare=compare_datasets, + require_media=require_media, + importer_args=fxt_import_kwargs, + stream=stream, + ) + @mark_requirement(Requirements.DATUM_GENERAL_REQ) def test_export_video_only_once( self, diff --git a/tests/unit/operations/test_statistics.py b/tests/unit/operations/test_statistics.py index bb92c53308..7f28be820a 100644 --- a/tests/unit/operations/test_statistics.py +++ b/tests/unit/operations/test_statistics.py @@ -10,7 +10,16 @@ import numpy as np import pytest -from datumaro.components.annotation import Bbox, Caption, Ellipse, Label, Mask, Points, RotatedBbox +from datumaro.components.annotation import ( + Bbox, + Caption, + Cuboid2D, + Ellipse, + Label, + Mask, + Points, + RotatedBbox, +) from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem from datumaro.components.errors import DatumaroError @@ -232,6 +241,25 @@ def test_stats(self): "tiny": True, }, ), + Cuboid2D( + [ + (1, 1), + (3, 1), + (3, 3), + (1, 3), + (1.5, 1.5), + (3.5, 1.5), + (3.5, 3.5), + (1.5, 3.5), + ], + label=3, + id=5, + z_order=2, + attributes={ + "x": 1, + "y": "2", + }, + ), ], ), DatasetItem(id=3), @@ -242,7 +270,7 @@ def test_stats(self): expected = { "images count": 4, - "annotations count": 12, + "annotations count": 13, "unannotated images count": 2, "unannotated images": ["3", "2.2"], "annotations by type": { @@ -277,33 +305,34 @@ def test_stats(self): "hash_key": {"count": 0}, "feature_vector": {"count": 0}, "tabular": {"count": 0}, + "cuboid_2d": {"count": 1}, "unknown": {"count": 0}, }, "annotations": { "labels": { - "count": 6, + "count": 7, "distribution": { - "label_0": [1, 1 / 6], + "label_0": [1, 1 / 7], "label_1": [0, 0.0], - "label_2": [3, 3 / 6], - "label_3": [2, 2 / 6], + "label_2": [3, 3 / 7], + "label_3": [3, 3 / 7], }, "attributes": { "x": { - "count": 2, # annotations with no label are skipped + "count": 3, # annotations with no label are skipped "values count": 2, "values present": ["1", "2"], "distribution": { - "1": [1, 1 / 2], - "2": [1, 1 / 2], + "1": [2, 2 / 3], + "2": [1, 1 / 3], }, }, "y": { - "count": 2, # annotations with no label are skipped + "count": 3, # annotations with no label are skipped "values count": 1, "values present": ["2"], "distribution": { - "2": [2, 2 / 2], + "2": [3, 3 / 3], }, }, # must not include "special" attributes like "occluded" @@ -403,6 +432,7 @@ def _get_stats_template(label_names: list): "feature_vector": {"count": 0}, "tabular": {"count": 0}, "rotated_bbox": {"count": 0}, + "cuboid_2d": {"count": 0}, "unknown": {"count": 0}, }, "annotations": { diff --git a/tests/unit/test_kitti_3d_format.py b/tests/unit/test_kitti_3d_format.py new file mode 100644 index 0000000000..ed4a8e6220 --- /dev/null +++ b/tests/unit/test_kitti_3d_format.py @@ -0,0 +1,116 @@ +import os.path as osp +from unittest import TestCase + +from datumaro.components.annotation import AnnotationType, Bbox, LabelCategories +from datumaro.components.dataset_base import DatasetItem +from datumaro.components.environment import Environment +from datumaro.components.media import Image, PointCloud +from datumaro.components.project import Dataset +from datumaro.plugins.data_formats.kitti_3d.importer import Kitti3dImporter + +from tests.requirements import Requirements, mark_requirement +from tests.utils.assets import get_test_asset_path +from tests.utils.test_utils import compare_datasets_3d + +DUMMY_DATASET_DIR = get_test_asset_path("kitti_dataset", "kitti_3d", "training") + + +class Kitti3DImporterTest(TestCase): + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_detect(self): + detected_formats = Environment().detect_dataset(DUMMY_DATASET_DIR) + self.assertEqual([Kitti3dImporter.NAME], detected_formats) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_can_load(self): + """ + Description: + Ensure that the dataset can be loaded correctly from the KITTI3D format. + + Expected results: + The loaded dataset should have the same number of data items as the expected dataset. + The data items in the loaded dataset should have the same attributes and values as the expected data items. + The point clouds and images associated with the data items should be loaded correctly. + + Steps: + 1. Prepare an expected dataset with known data items, point clouds, images, and attributes. + 2. Load the dataset from the KITTI3D format. + 3. Compare the loaded dataset with the expected dataset. + """ + pcd1 = osp.join(DUMMY_DATASET_DIR, "velodyne", "000001.bin") + + image1 = Image.from_file(path=osp.join(DUMMY_DATASET_DIR, "image_2", "000001.png")) + + expected_label_cat = LabelCategories( + attributes={"occluded", "truncated", "alpha", "dimensions", "location", "rotation_y"} + ) + expected_label_cat.add("Truck") + expected_label_cat.add("Car") + expected_label_cat.add("DontCare") + expected_dataset = Dataset.from_iterable( + [ + DatasetItem( + id="000001", + annotations=[ + Bbox( + 600, # x1 + 150, # y1 + 30, # x2-x1 + 40, # y2-y1 + label=0, + id=0, + attributes={ + "truncated": 0.0, + "occluded": 0, + "alpha": -1.57, + "dimensions": [2.85, 2.63, 12.34], + "location": [0.47, 1.49, 69.44], + "rotation_y": -1.56, + }, + z_order=0, + ), + Bbox( + 650, # x1 + 160, # y1 + 50, # x2-x1 + 40, # y2-y1 + label=1, + id=1, + attributes={ + "truncated": 0.0, + "occluded": 3, + "alpha": -1.65, + "dimensions": [1.86, 0.6, 2.02], + "location": [4.59, 1.32, 45.84], + "rotation_y": -1.55, + }, + z_order=0, + ), + Bbox( + 500, # x1 + 170, # y1 + 90, # x2-x1 + 20, # y2-y1 + label=2, + id=2, + attributes={ + "truncated": -1.0, + "occluded": -1, + "alpha": -10.0, + "dimensions": [-1.0, -1.0, -1.0], + "location": [-1000.0, -1000.0, -1000.0], + "rotation_y": -10.0, + }, + ), + ], + media=PointCloud.from_file(path=pcd1, extra_images=[image1]), + attributes={"calib_path": osp.join(DUMMY_DATASET_DIR, "calib", "000001.txt")}, + ), + ], + categories={AnnotationType.label: expected_label_cat}, + media_type=PointCloud, + ) + + parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, "kitti3d") + + compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True) diff --git a/tests/unit/test_transforms.py b/tests/unit/test_transforms.py index 24db8a76e6..25f01caff8 100644 --- a/tests/unit/test_transforms.py +++ b/tests/unit/test_transforms.py @@ -2,6 +2,7 @@ import argparse import logging as log +import os import os.path as osp import random from unittest import TestCase @@ -14,6 +15,7 @@ import datumaro.plugins.transforms as transforms import datumaro.util.mask_tools as mask_tools +from datumaro.components.algorithms.hash_key_inference.explorer import Explorer from datumaro.components.annotation import ( AnnotationType, Bbox, @@ -1673,3 +1675,57 @@ def test_transform_clean_after_astype_ann(self): result_item = result.__getitem__(i) self.assertEqual(expected_item.annotations, result_item.annotations) self.assertEqual(expected_item.media, result_item.media) + + +class PseudoLabelingTest(TestCase): + def setUp(self): + self.data_path = get_test_asset_path("explore_dataset") + self.categories = ["bird", "cat", "dog", "monkey"] + self.source = Dataset.from_iterable( + [ + DatasetItem( + id=0, + media=Image.from_file( + path=os.path.join(self.data_path, "dog", "ILSVRC2012_val_00001698.JPEG") + ), + ), + DatasetItem( + id=1, + media=Image.from_file( + path=os.path.join(self.data_path, "cat", "ILSVRC2012_val_00004894.JPEG") + ), + ), + ], + categories=self.categories, + ) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_with_labels(self): + dataset = self.source + labels = self.categories + explorer = Explorer(dataset) + result = dataset.transform("pseudo_labeling", labels=labels, explorer=explorer) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected]) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_without_labels(self): + dataset = self.source + explorer = Explorer(dataset) + result = dataset.transform("pseudo_labeling", explorer=explorer) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected]) + + @mark_requirement(Requirements.DATUM_GENERAL_REQ) + def test_transform_pseudolabeling_without_explorer(self): + dataset = self.source + labels = self.categories + result = dataset.transform("pseudo_labeling", labels=labels) + + label_indices = dataset.categories()[AnnotationType.label]._indices + for item, expected in zip(result, ["dog", "cat"]): + self.assertEqual(item.annotations[0].label, label_indices[expected])