From bda46b659e7bd52d85bda8840af7826ab5ad84f7 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 11:44:08 +0900 Subject: [PATCH 01/33] Refine data parts in recipes --- src/otx/recipe/_base_/data/detection.yaml | 12 ++++++++ .../recipe/_base_/data/torchvision_base.yaml | 8 ++--- .../recipe/detection/atss_mobilenetv2.yaml | 22 +------------- .../detection/atss_mobilenetv2_tile.yaml | 22 +------------- src/otx/recipe/detection/atss_resnext101.yaml | 22 +------------- src/otx/recipe/detection/openvino_model.yaml | 7 ++--- src/otx/recipe/detection/rtmdet_tiny.yaml | 16 +--------- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 28 +---------------- .../detection/ssd_mobilenetv2_tile.yaml | 28 +---------------- src/otx/recipe/detection/yolox_l.yaml | 16 +--------- src/otx/recipe/detection/yolox_l_tile.yaml | 14 +-------- src/otx/recipe/detection/yolox_s.yaml | 22 +++----------- src/otx/recipe/detection/yolox_s_tile.yaml | 14 +-------- src/otx/recipe/detection/yolox_tiny.yaml | 30 +++---------------- src/otx/recipe/detection/yolox_tiny_tile.yaml | 20 +------------ src/otx/recipe/detection/yolox_x.yaml | 25 +++------------- src/otx/recipe/detection/yolox_x_tile.yaml | 14 +-------- 17 files changed, 42 insertions(+), 278 deletions(-) create mode 100644 src/otx/recipe/_base_/data/detection.yaml diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml new file mode 100644 index 00000000000..6ff9b86242e --- /dev/null +++ b/src/otx/recipe/_base_/data/detection.yaml @@ -0,0 +1,12 @@ +data: ../_base_/data/torchvision_base.yaml +overrides: + data: + task: DETECTION + data_format: coco_instances + stack_images: true + train_subset: + to_tv_image: false + val_subset: + to_tv_image: false + test_subset: + to_tv_image: false diff --git a/src/otx/recipe/_base_/data/torchvision_base.yaml b/src/otx/recipe/_base_/data/torchvision_base.yaml index 874247a8593..c00dd996eed 100644 --- a/src/otx/recipe/_base_/data/torchvision_base.yaml +++ b/src/otx/recipe/_base_/data/torchvision_base.yaml @@ -2,13 +2,13 @@ task: MULTI_CLASS_CLS mem_cache_size: 1GB mem_cache_img_max_size: null image_color_channel: RGB -stack_images: False +stack_images: false data_format: imagenet_with_subset_dirs unannotated_items_ratio: 0.0 train_subset: subset_name: train transform_lib_type: TORCHVISION - to_tv_image: True + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 @@ -18,7 +18,7 @@ train_subset: val_subset: subset_name: val transform_lib_type: TORCHVISION - to_tv_image: True + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 @@ -28,7 +28,7 @@ val_subset: test_subset: subset_name: test transform_lib_type: TORCHVISION - to_tv_image: True + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index 41cd145ea29..af6c2c43a24 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -28,7 +28,7 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 callbacks: @@ -38,28 +38,15 @@ overrides: decay: -0.025 min_lrschedule_patience: 3 data: - task: DETECTION - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - init_args: - min_ious: - - 0.1 - - 0.3 - - 0.5 - - 0.7 - - 0.9 - min_crop_size: 0.3 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -68,7 +55,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -77,40 +63,34 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 948113d15e6..fb70c5e7f8a 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -28,35 +28,22 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION tile_config: enable_tiler: true enable_adaptive_tiling: true - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - init_args: - min_ious: - - 0.1 - - 0.3 - - 0.5 - - 0.7 - - 0.9 - min_crop_size: 0.3 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -65,7 +52,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -74,40 +60,34 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index d6342a3512f..1f6439e28d9 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -28,7 +28,7 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 callbacks: @@ -38,28 +38,15 @@ overrides: decay: -0.025 min_lrschedule_patience: 3 data: - task: DETECTION - stack_images: True - data_format: coco_instances train_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - init_args: - min_ious: - - 0.1 - - 0.3 - - 0.5 - - 0.7 - - 0.9 - min_crop_size: 0.3 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -68,7 +55,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -77,40 +63,34 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] test_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 800 - 992 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index 7ed99918d26..66b7d61144f 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -13,11 +13,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: data: - task: DETECTION - image_color_channel: RGB - data_format: coco_instances + stack_images: false test_subset: + to_tv_image: true batch_size: 64 diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 52763599318..55f9b1edf29 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -27,24 +27,19 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION image_color_channel: BGR - stack_images: true - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: img_scale: - 640 - 640 - pad_val: 114.0 max_cached_images: 20 random_pop: false - class_path: otx.core.data.transform_libs.torchvision.RandomResize @@ -82,23 +77,17 @@ overrides: - 1.0 max_cached_images: 10 random_pop: false - pad_val: - - 114 - - 114 - - 114 prob: 0.5 is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [103.53, 116.28, 123.675] std: [57.375, 57.12, 58.395] val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: @@ -117,14 +106,12 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [103.53, 116.28, 123.675] std: [57.375, 57.12, 58.395] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: @@ -143,7 +130,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [103.53, 116.28, 123.675] diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 66cc90b8392..9cc98d9bc9e 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -28,39 +28,20 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - init_args: - brightness_delta: 32 - contrast_range: - - 0.5 - - 1.5 - hue_delta: 18 - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - init_args: - min_ious: - - 0.1 - - 0.3 - - 0.5 - - 0.7 - - 0.9 - min_crop_size: 0.3 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -69,7 +50,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -78,40 +58,34 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 3b2b7bce739..db0c1832b0a 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -28,42 +28,23 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION - stack_images: True - data_format: coco_instances tile_config: enable_tiler: true enable_adaptive_tiling: true train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - init_args: - brightness_delta: 32 - contrast_range: - - 0.5 - - 1.5 - hue_delta: 18 - class_path: otx.core.data.transform_libs.torchvision.MinIoURandomCrop - init_args: - min_ious: - - 0.1 - - 0.3 - - 0.5 - - 0.7 - - 0.9 - min_crop_size: 0.3 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -72,7 +53,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -81,40 +61,34 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 864 - 864 - keep_ratio: false transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 1aef1bf2979..c45267dd6a1 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -28,22 +28,17 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: random_pop: false - pad_val: 114.0 max_cached_images: 20 img_scale: # (H, W) - 640 @@ -66,10 +61,6 @@ overrides: - 1.0 prob: 0.5 random_pop: false - pad_val: - - 114 - - 114 - - 114 max_cached_images: 10 - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -90,7 +81,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -99,7 +89,6 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: @@ -116,14 +105,12 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: @@ -140,7 +127,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 0ec3c2d5a2e..b979abe9d6f 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -28,21 +28,17 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION tile_config: enable_tiler: true enable_adaptive_tiling: true image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -50,7 +46,6 @@ overrides: scale: - 640 - 640 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -63,7 +58,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -71,14 +65,12 @@ overrides: val_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -88,7 +80,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -96,14 +87,12 @@ overrides: test_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -113,7 +102,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 8e5d72491cc..aa3ea75a8c9 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -28,22 +28,17 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: random_pop: false - pad_val: 114.0 max_cached_images: 20 img_scale: # (H, W) - 640 @@ -66,10 +61,6 @@ overrides: - 1.0 prob: 0.5 random_pop: false - pad_val: - - 114 - - 114 - - 114 max_cached_images: 10 - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -77,7 +68,7 @@ overrides: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -90,7 +81,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -99,14 +89,13 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -116,21 +105,19 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -140,7 +127,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 8c60685dd84..2b51d3f7696 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -28,21 +28,17 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION tile_config: enable_tiler: true enable_adaptive_tiling: true image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -50,7 +46,6 @@ overrides: scale: - 640 - 640 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -63,7 +58,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -71,14 +65,12 @@ overrides: val_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -88,7 +80,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -96,14 +87,12 @@ overrides: test_subset: num_workers: 4 batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -113,7 +102,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 3d80ea1536d..8dbbdf9a4ff 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -28,49 +28,32 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION - stack_images: True - data_format: coco_instances train_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: random_pop: false - pad_val: 114.0 max_cached_images: 20 img_scale: # (H, W) - 640 - 640 - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: - scaling_ratio_range: - - 0.5 - - 1.5 border: - -320 - -320 - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - init_args: - brightness_delta: 32 - contrast_range: - - 0.5 - - 1.5 - saturation_range: - - 0.5 - - 1.5 - hue_delta: 18 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -83,7 +66,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -92,14 +74,13 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 416 - 416 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -109,21 +90,19 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] test_subset: batch_size: 8 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 416 - 416 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -133,7 +112,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 62aacf115ec..e568b413542 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -28,36 +28,23 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION tile_config: enable_tiler: true enable_adaptive_tiling: true - stack_images: True - data_format: coco_instances train_subset: num_workers: 4 batch_size: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - init_args: - brightness_delta: 32 - contrast_range: - - 0.5 - - 1.5 - saturation_range: - - 0.5 - - 1.5 - hue_delta: 18 - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -70,7 +57,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -84,7 +70,6 @@ overrides: scale: - 416 - 416 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -94,7 +79,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -108,7 +92,6 @@ overrides: scale: - 416 - 416 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -118,7 +101,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index cbf7fc59efa..d68b80cf605 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -28,31 +28,23 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.CachedMosaic init_args: random_pop: false - pad_val: 114.0 max_cached_images: 20 img_scale: # (H, W) - 640 - 640 - class_path: otx.core.data.transform_libs.torchvision.RandomAffine init_args: - scaling_ratio_range: - - 0.5 - - 1.5 border: - -320 - -320 @@ -66,10 +58,6 @@ overrides: - 1.0 prob: 0.5 random_pop: false - pad_val: - - 114 - - 114 - - 114 max_cached_images: 10 - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -77,7 +65,7 @@ overrides: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -90,7 +78,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -99,14 +86,13 @@ overrides: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler val_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -116,21 +102,19 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] test_subset: batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: True + keep_ratio: true transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -140,7 +124,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index 67d2818bec8..074c6a931b9 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -28,21 +28,17 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 data: - task: DETECTION tile_config: enable_tiler: true enable_adaptive_tiling: true image_color_channel: BGR - stack_images: True - data_format: coco_instances train_subset: num_workers: 4 batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.YOLOXHSVRandomAug - class_path: otx.core.data.transform_libs.torchvision.Resize @@ -50,7 +46,6 @@ overrides: scale: - 640 - 640 - keep_ratio: false transform_bbox: true - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: @@ -63,7 +58,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -71,14 +65,12 @@ overrides: val_subset: num_workers: 4 batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -88,7 +80,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] @@ -96,14 +87,12 @@ overrides: test_subset: num_workers: 4 batch_size: 4 - to_tv_image: False transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: scale: - 640 - 640 - keep_ratio: false transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: @@ -113,7 +102,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [0.0, 0.0, 0.0] From 19f5f5149d4e761755dcf9009a645b05a412171b Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 13:31:39 +0900 Subject: [PATCH 02/33] Remove `torchvision_base.yaml` dependency --- src/otx/recipe/_base_/data/detection.yaml | 49 +++++++++++++++++------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml index 6ff9b86242e..ddc2f28ed7f 100644 --- a/src/otx/recipe/_base_/data/detection.yaml +++ b/src/otx/recipe/_base_/data/detection.yaml @@ -1,12 +1,37 @@ -data: ../_base_/data/torchvision_base.yaml -overrides: - data: - task: DETECTION - data_format: coco_instances - stack_images: true - train_subset: - to_tv_image: false - val_subset: - to_tv_image: false - test_subset: - to_tv_image: false +task: DETECTION +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +stack_images: true +data_format: coco_instances +unannotated_items_ratio: 0.0 +train_subset: + subset_name: train + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +val_subset: + subset_name: val + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +test_subset: + subset_name: test + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler From 2c3062c24dc5d439c0814fe1ac7357f3ef9e411b Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 14:21:38 +0900 Subject: [PATCH 03/33] Update --- src/otx/recipe/detection/atss_mobilenetv2.yaml | 2 -- src/otx/recipe/detection/atss_mobilenetv2_tile.yaml | 2 -- src/otx/recipe/detection/atss_resnext101.yaml | 2 -- src/otx/recipe/detection/rtmdet_tiny.yaml | 2 -- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 2 -- src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml | 2 -- src/otx/recipe/detection/yolox_l.yaml | 2 -- src/otx/recipe/detection/yolox_l_tile.yaml | 2 -- src/otx/recipe/detection/yolox_s.yaml | 2 -- src/otx/recipe/detection/yolox_s_tile.yaml | 2 -- src/otx/recipe/detection/yolox_tiny.yaml | 2 -- src/otx/recipe/detection/yolox_tiny_tile.yaml | 2 -- src/otx/recipe/detection/yolox_x.yaml | 2 -- src/otx/recipe/detection/yolox_x_tile.yaml | 2 -- 14 files changed, 28 deletions(-) diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index af6c2c43a24..f9785dfd7b3 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -69,7 +69,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -86,7 +85,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index fb70c5e7f8a..52dd77dde7c 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -66,7 +66,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -83,7 +82,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 1f6439e28d9..6911033bb55 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -69,7 +69,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -86,7 +85,6 @@ overrides: scale: - 800 - 992 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 55f9b1edf29..c97bfa5c358 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -95,7 +95,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size: @@ -119,7 +118,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 9cc98d9bc9e..81fbd8b3429 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -64,7 +64,6 @@ overrides: scale: - 864 - 864 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -81,7 +80,6 @@ overrides: scale: - 864 - 864 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index db0c1832b0a..f52999e8981 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -67,7 +67,6 @@ overrides: scale: - 864 - 864 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: @@ -84,7 +83,6 @@ overrides: scale: - 864 - 864 - transform_bbox: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index c45267dd6a1..3804cfd75d6 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -96,7 +96,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -118,7 +117,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index b979abe9d6f..1e6441f8069 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -71,7 +71,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -93,7 +92,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index aa3ea75a8c9..108684bddeb 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -96,7 +96,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -118,7 +117,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 2b51d3f7696..8082aa58167 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -71,7 +71,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -93,7 +92,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 8dbbdf9a4ff..f8cec8feda8 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -81,7 +81,6 @@ overrides: - 416 - 416 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -103,7 +102,6 @@ overrides: - 416 - 416 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index e568b413542..d08012d7233 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -70,7 +70,6 @@ overrides: scale: - 416 - 416 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -92,7 +91,6 @@ overrides: scale: - 416 - 416 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index d68b80cf605..3d83fcf6092 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -93,7 +93,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -115,7 +114,6 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index 074c6a931b9..f1cec3d2a6e 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -71,7 +71,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true @@ -93,7 +92,6 @@ overrides: scale: - 640 - 640 - transform_bbox: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true From 7288349cc77b70baada1544e39c5cef4f80b48fc Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 14:28:21 +0900 Subject: [PATCH 04/33] Refine iseg recipes --- .../_base_/data/instance_segmentation.yaml | 38 +++++++++++++++++++ .../maskrcnn_efficientnetb2b.yaml | 15 +------- .../maskrcnn_efficientnetb2b_tile.yaml | 18 +-------- .../instance_segmentation/maskrcnn_r50.yaml | 15 +------- .../maskrcnn_r50_tile.yaml | 18 +-------- .../maskrcnn_r50_tv.yaml | 15 +------- .../maskrcnn_r50_tv_tile.yaml | 18 +-------- .../instance_segmentation/maskrcnn_swint.yaml | 15 +------- .../maskrcnn_swint_tile.yaml | 18 +-------- .../instance_segmentation/openvino_model.yaml | 7 +--- .../rtmdet_inst_tiny.yaml | 23 +---------- .../rtmdet_inst_tiny_tile.yaml | 18 +-------- 12 files changed, 50 insertions(+), 168 deletions(-) create mode 100644 src/otx/recipe/_base_/data/instance_segmentation.yaml diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml new file mode 100644 index 00000000000..7227f8a2f54 --- /dev/null +++ b/src/otx/recipe/_base_/data/instance_segmentation.yaml @@ -0,0 +1,38 @@ +task: INSTANCE_SEGMENTATION +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +stack_images: true +data_format: coco_instances +include_polygons: true +unannotated_items_ratio: 0.0 +train_subset: + subset_name: train + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +val_subset: + subset_name: val + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +test_subset: + subset_name: test + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index 50561eb92e9..d968f341375 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -28,14 +28,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -60,7 +56,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -74,8 +69,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 @@ -83,12 +76,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -100,8 +91,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 @@ -109,12 +98,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index f782919984a..e7983fc9b0a 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -28,24 +28,19 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: - task: INSTANCE_SEGMENTATION tile_config: enable_tiler: true enable_adaptive_tiling: true - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -62,7 +57,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -75,21 +69,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -100,21 +89,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index 5c02fda6aa7..89cccf852a0 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -28,15 +28,11 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -61,7 +57,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -73,8 +68,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 @@ -82,12 +75,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -99,8 +90,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 @@ -108,12 +97,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index fa6dab56b4b..a56ea360183 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -28,25 +28,20 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -63,7 +58,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -74,21 +68,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -99,21 +88,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index c5c1e7d707a..bbc617f8f32 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -28,15 +28,11 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -60,7 +56,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -72,20 +67,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -97,20 +88,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index 6ef16d7df8e..6cafc6c5e89 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -28,25 +28,20 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -63,7 +58,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -74,21 +68,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -99,21 +88,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index a9490e7d7ce..64b2878cc02 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -27,14 +27,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -59,7 +55,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -71,8 +66,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1344 - 1344 @@ -80,12 +73,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -97,8 +88,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1344 - 1344 @@ -106,12 +95,10 @@ overrides: init_args: pad_to_square: true size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index 552f45178b6..e5d4d3512d0 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -27,24 +27,19 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 data: - task: INSTANCE_SEGMENTATION - stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -61,7 +56,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -72,21 +66,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -97,21 +86,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index ce9d8dfed1a..385957b9682 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -13,12 +13,9 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: data: - task: INSTANCE_SEGMENTATION - include_polygons: true - image_color_channel: RGB - data_format: coco_instances + stack_images: false test_subset: batch_size: 64 diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml index 5fabd11ea98..748bee485c4 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml @@ -29,16 +29,12 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -48,7 +44,6 @@ overrides: img_scale: - 640 - 640 - pad_val: 114.0 max_cached_images: 20 random_pop: false - class_path: otx.core.data.transform_libs.torchvision.RandomResize @@ -86,21 +81,13 @@ overrides: - 1.0 max_cached_images: 10 random_pop: false - pad_val: - - 114 - - 114 - - 114 prob: 0.5 - class_path: otx.core.data.transform_libs.torchvision.FilterAnnotations init_args: - min_gt_bbox_wh: - - 1 - - 1 is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -115,18 +102,14 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - transform_mask: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true pad_val: 114 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -141,18 +124,14 @@ overrides: - 640 - 640 keep_ratio: true - transform_bbox: false - transform_mask: false - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true pad_val: 114 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml index eab92d0a3ee..ba2f6825f0f 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml @@ -29,26 +29,21 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 data: - task: INSTANCE_SEGMENTATION - stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -65,7 +60,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -76,21 +70,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -101,21 +90,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] From a510f18a91f4586b876651e6325b2addcc22b4bb Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 14:51:36 +0900 Subject: [PATCH 05/33] Fix unit test --- tests/unit/engine/utils/test_auto_configurator.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/unit/engine/utils/test_auto_configurator.py b/tests/unit/engine/utils/test_auto_configurator.py index 76fe01f5bd1..d2663d8cb45 100644 --- a/tests/unit/engine/utils/test_auto_configurator.py +++ b/tests/unit/engine/utils/test_auto_configurator.py @@ -172,12 +172,10 @@ def test_update_ov_subset_pipeline(self) -> None: "class_path": "otx.core.data.transform_libs.torchvision.Resize", "init_args": { "scale": [800, 992], - "keep_ratio": False, - "transform_bbox": False, "is_numpy_to_tvtensor": True, }, }, - {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": torch.float32, "scale": False}}, + {"class_path": "torchvision.transforms.v2.ToDtype", "init_args": {"dtype": torch.float32}}, { "class_path": "torchvision.transforms.v2.Normalize", "init_args": {"mean": [0.0, 0.0, 0.0], "std": [255.0, 255.0, 255.0]}, From e43cf37c55d62987b2d18a3967ec963f2b6ca1b4 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 14:55:31 +0900 Subject: [PATCH 06/33] Update iseg ov model --- src/otx/recipe/instance_segmentation/openvino_model.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index 385957b9682..347c77adc72 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -18,4 +18,5 @@ overrides: data: stack_images: false test_subset: + to_tv_image: true batch_size: 64 From d4f48a67e4633a0f6c0682edb00df3ce8918ed80 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 15:00:58 +0900 Subject: [PATCH 07/33] Refine rotated det recipes --- .../recipe/_base_/data/rotated_detection.yaml | 38 +++++++++++++++++++ .../maskrcnn_efficientnetb2b.yaml | 15 +------- .../rotated_detection/maskrcnn_r50.yaml | 15 +------- .../rotated_detection/openvino_model.yaml | 8 ++-- 4 files changed, 43 insertions(+), 33 deletions(-) create mode 100644 src/otx/recipe/_base_/data/rotated_detection.yaml diff --git a/src/otx/recipe/_base_/data/rotated_detection.yaml b/src/otx/recipe/_base_/data/rotated_detection.yaml new file mode 100644 index 00000000000..0a4854646b8 --- /dev/null +++ b/src/otx/recipe/_base_/data/rotated_detection.yaml @@ -0,0 +1,38 @@ +task: ROTATED_DETECTION +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +stack_images: true +data_format: coco_instances +include_polygons: true +unannotated_items_ratio: 0.0 +train_subset: + subset_name: train + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +val_subset: + subset_name: val + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler +test_subset: + subset_name: test + transform_lib_type: TORCHVISION + to_tv_image: false + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 2 + sampler: + class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index 6d6be3f0e4b..e0e82f207c9 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -28,14 +28,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 data: - task: ROTATED_DETECTION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 transforms: @@ -58,7 +54,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -71,20 +66,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -95,20 +86,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index e0ccfbe8aae..1ea5d4b1e3b 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -28,14 +28,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 data: - task: ROTATED_DETECTION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 transforms: @@ -58,7 +54,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -69,20 +64,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -93,20 +84,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/rotated_detection/openvino_model.yaml b/src/otx/recipe/rotated_detection/openvino_model.yaml index 4ca0ef35640..d8401af3b36 100644 --- a/src/otx/recipe/rotated_detection/openvino_model.yaml +++ b/src/otx/recipe/rotated_detection/openvino_model.yaml @@ -13,12 +13,10 @@ engine: callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/rotated_detection.yaml overrides: data: - task: ROTATED_DETECTION - include_polygons: true - image_color_channel: RGB - data_format: coco_instances + stack_images: false test_subset: + to_tv_image: true batch_size: 24 From b0956946792e85e646d753315fb74a579ab2a48c Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 15:25:19 +0900 Subject: [PATCH 08/33] Refine sseg recipes --- .../_base_/data/semantic_segmentation.yaml | 82 +++++++++++++++++++ .../recipe/semantic_segmentation/dino_v2.yaml | 9 +- .../semantic_segmentation/litehrnet_18.yaml | 2 +- .../semantic_segmentation/litehrnet_s.yaml | 2 +- .../semantic_segmentation/litehrnet_x.yaml | 2 +- .../semantic_segmentation/openvino_model.yaml | 19 ++++- .../semantic_segmentation/segnext_b.yaml | 2 +- .../semantic_segmentation/segnext_s.yaml | 2 +- .../semantic_segmentation/segnext_t.yaml | 2 +- 9 files changed, 105 insertions(+), 17 deletions(-) create mode 100644 src/otx/recipe/_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml new file mode 100644 index 00000000000..32161c47b88 --- /dev/null +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -0,0 +1,82 @@ +task: SEMANTIC_SEGMENTATION +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +data_format: common_semantic_segmentation_with_subset_dirs +include_polygons: true +unannotated_items_ratio: 0.0 +ignore_index: 255 +train_subset: + subset_name: train + batch_size: 8 + num_workers: 4 + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.RandomResizedCrop + init_args: + size: + - 512 + - 512 + scale: + - 0.2 + - 1.0 + ratio: + - 0.5 + - 2.0 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion + init_args: + is_numpy_to_tvtensor: true + - class_path: torchvision.transforms.v2.RandomHorizontalFlip + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + sampler: + class_path: torch.utils.data.RandomSampler +val_subset: + subset_name: val + batch_size: 8 + num_workers: 4 + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.Resize + init_args: + size: + - 512 + - 512 + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + sampler: + class_path: torch.utils.data.RandomSampler +test_subset: + subset_name: test + num_workers: 4 + batch_size: 8 + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.Resize + init_args: + size: + - 512 + - 512 + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + sampler: + class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 91a8a5cff0e..234fa1315a6 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -69,7 +69,7 @@ engine: callback_monitor: val/Dice -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml overrides: data: train_subset: @@ -85,17 +85,14 @@ overrides: ratio: - 0.5 - 2.0 - antialias: True + antialias: true - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion init_args: is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.RandomHorizontalFlip - init_args: - p: 0.5 - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -111,7 +108,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -127,7 +123,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index 78d7648e670..c9565a5e413 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -133,4 +133,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index 3c79e8b29f1..9c1e818e5c5 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -126,4 +126,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 987000c6e1a..6ae3d1a4c8d 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -153,4 +153,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index 75cc664eb3c..174800094ec 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -13,11 +13,22 @@ engine: callback_monitor: val/Dice -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/semantic_segmentation.yaml overrides: data: - task: SEMANTIC_SEGMENTATION - image_color_channel: RGB - data_format: common_semantic_segmentation_with_subset_dirs + stack_images: false + train_subset: + batch_size: 1 + num_workers: 2 + transforms: + - class_path: torchvision.transforms.v2.ToImage + val_subset: + batch_size: 1 + num_workers: 2 + transforms: + - class_path: torchvision.transforms.v2.ToImage test_subset: batch_size: 64 + num_workers: 2 + transforms: + - class_path: torchvision.transforms.v2.ToImage diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index ab88fe8569b..6a1bb620f54 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -105,4 +105,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index e6f1e719fd5..510fdccaf69 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -106,4 +106,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 2e2ddfd8c9c..7f95019174b 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -106,4 +106,4 @@ overrides: init_args: warmup_iters: 100 -data: ../_base_/data/mmseg_base.yaml +data: ../_base_/data/semantic_segmentation.yaml From 403d35ac46a8a9e579e0e57585c52a1828ac3500 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 15:42:00 +0900 Subject: [PATCH 09/33] Refine vpm recipes --- .../recipe/_base_/data/visual_prompting.yaml | 76 +++++++++++++++++++ .../visual_prompting/openvino_model.yaml | 19 +++-- .../recipe/visual_prompting/sam_tiny_vit.yaml | 59 +------------- .../recipe/visual_prompting/sam_vit_b.yaml | 59 +------------- 4 files changed, 91 insertions(+), 122 deletions(-) create mode 100644 src/otx/recipe/_base_/data/visual_prompting.yaml diff --git a/src/otx/recipe/_base_/data/visual_prompting.yaml b/src/otx/recipe/_base_/data/visual_prompting.yaml new file mode 100644 index 00000000000..18109a100a5 --- /dev/null +++ b/src/otx/recipe/_base_/data/visual_prompting.yaml @@ -0,0 +1,76 @@ +task: VISUAL_PROMPTING +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +stack_images: false +data_format: coco_instances +unannotated_items_ratio: 0.0 +vpm_config: + use_bbox: true + use_point: false + +train_subset: + subset_name: train + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + batch_size: 2 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler + +val_subset: + subset_name: val + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + batch_size: 1 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler + +test_subset: + subset_name: test + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] + batch_size: 1 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/visual_prompting/openvino_model.yaml b/src/otx/recipe/visual_prompting/openvino_model.yaml index ce5de53d47b..ecbb8b7cdab 100644 --- a/src/otx/recipe/visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/visual_prompting/openvino_model.yaml @@ -13,16 +13,23 @@ engine: callback_monitor: val/Dice -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/visual_prompting.yaml overrides: data: - task: VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False train_subset: batch_size: 1 num_workers: 0 # TODO (sungchul): CVS-135462 + transforms: + - class_path: torchvision.transforms.v2.ToImage + + val_subset: + batch_size: 1 + num_workers: 0 # TODO (sungchul): CVS-135462 + transforms: + - class_path: torchvision.transforms.v2.ToImage + test_subset: batch_size: 1 + num_workers: 0 # TODO (sungchul): CVS-135462 + transforms: + - class_path: torchvision.transforms.v2.ToImage diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 2b34eb75781..1dee69aca66 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -29,67 +29,10 @@ engine: callback_monitor: val/f1-score -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 3 - data: - task: VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False - train_subset: - batch_size: 2 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - val_subset: - batch_size: 1 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - test_subset: - batch_size: 1 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index f353586d530..f27cc5984c9 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -29,67 +29,10 @@ engine: callback_monitor: val/f1-score -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: patience: 3 - data: - task: VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False - train_subset: - batch_size: 2 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - val_subset: - batch_size: 1 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] - test_subset: - batch_size: 1 - num_workers: 4 - transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: True - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - scale: False - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] From 7f6664511b596de96d11f01086426035d310b97c Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 15:43:04 +0900 Subject: [PATCH 10/33] Fix indent --- .../recipe/_base_/data/visual_prompting.yaml | 72 +++++++++---------- .../semantic_segmentation/openvino_model.yaml | 6 +- .../visual_prompting/openvino_model.yaml | 6 +- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/src/otx/recipe/_base_/data/visual_prompting.yaml b/src/otx/recipe/_base_/data/visual_prompting.yaml index 18109a100a5..b604542f4e0 100644 --- a/src/otx/recipe/_base_/data/visual_prompting.yaml +++ b/src/otx/recipe/_base_/data/visual_prompting.yaml @@ -14,18 +14,18 @@ train_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: true - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] batch_size: 2 num_workers: 4 sampler: @@ -36,18 +36,18 @@ val_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: true - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] batch_size: 1 num_workers: 4 sampler: @@ -58,18 +58,18 @@ test_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge - init_args: - size: 1024 - antialias: true - - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare - - class_path: torchvision.transforms.v2.ToDtype - init_args: - dtype: ${as_torch_dtype:torch.float32} - - class_path: torchvision.transforms.v2.Normalize - init_args: - mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + - class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge + init_args: + size: 1024 + antialias: true + - class_path: otx.core.data.transform_libs.torchvision.PadtoSquare + - class_path: torchvision.transforms.v2.ToDtype + init_args: + dtype: ${as_torch_dtype:torch.float32} + - class_path: torchvision.transforms.v2.Normalize + init_args: + mean: [123.675, 116.28, 103.53] + std: [58.395, 57.12, 57.375] batch_size: 1 num_workers: 4 sampler: diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index 174800094ec..f5513f20566 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -21,14 +21,14 @@ overrides: batch_size: 1 num_workers: 2 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage val_subset: batch_size: 1 num_workers: 2 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage test_subset: batch_size: 64 num_workers: 2 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage diff --git a/src/otx/recipe/visual_prompting/openvino_model.yaml b/src/otx/recipe/visual_prompting/openvino_model.yaml index ecbb8b7cdab..2686044a507 100644 --- a/src/otx/recipe/visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/visual_prompting/openvino_model.yaml @@ -20,16 +20,16 @@ overrides: batch_size: 1 num_workers: 0 # TODO (sungchul): CVS-135462 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage val_subset: batch_size: 1 num_workers: 0 # TODO (sungchul): CVS-135462 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage test_subset: batch_size: 1 num_workers: 0 # TODO (sungchul): CVS-135462 transforms: - - class_path: torchvision.transforms.v2.ToImage + - class_path: torchvision.transforms.v2.ToImage From d90b5d46fdc1597d79682f89eb5a6dcba8dea321 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 15:46:36 +0900 Subject: [PATCH 11/33] Refine zsl recipes --- .../data/zero_shot_visual_prompting.yaml | 44 +++++++++++++++++++ .../openvino_model.yaml | 12 ++--- .../sam_tiny_vit.yaml | 17 +------ .../zero_shot_visual_prompting/sam_vit_b.yaml | 17 +------ 4 files changed, 50 insertions(+), 40 deletions(-) create mode 100644 src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml diff --git a/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml new file mode 100644 index 00000000000..3e33261f4bc --- /dev/null +++ b/src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml @@ -0,0 +1,44 @@ +task: ZERO_SHOT_VISUAL_PROMPTING +mem_cache_size: 1GB +mem_cache_img_max_size: null +image_color_channel: RGB +stack_images: false +data_format: coco_instances +unannotated_items_ratio: 0.0 + +vpm_config: + use_bbox: True + use_point: False + +train_subset: + subset_name: train + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler + +val_subset: + subset_name: val + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler + +test_subset: + subset_name: test + transform_lib_type: TORCHVISION + to_tv_image: true + transforms: + - class_path: torchvision.transforms.v2.ToImage + batch_size: 1 + num_workers: 4 + sampler: + class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml index 777dde2a345..5ceb1838a97 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml @@ -16,18 +16,14 @@ engine: callback_monitor: step -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 data: - task: ZERO_SHOT_VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False train_subset: - batch_size: 1 + num_workers: 0 # TODO (sungchul): CVS-135462 + val_subset: num_workers: 0 # TODO (sungchul): CVS-135462 test_subset: - batch_size: 1 + num_workers: 0 # TODO (sungchul): CVS-135462 diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml index d93ed540a7e..fd98922a8fd 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml @@ -23,22 +23,7 @@ engine: callback_monitor: step -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 - data: - task: ZERO_SHOT_VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False - train_subset: - batch_size: 1 - num_workers: 4 - val_subset: - batch_size: 1 - num_workers: 4 - test_subset: - batch_size: 1 - num_workers: 4 diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml index 64a84003374..0fd90018098 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml @@ -23,22 +23,7 @@ engine: callback_monitor: step -data: ../_base_/data/torchvision_base.yaml +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 - data: - task: ZERO_SHOT_VISUAL_PROMPTING - data_format: coco_instances - vpm_config: - use_bbox: True - use_point: False - train_subset: - batch_size: 1 - num_workers: 4 - val_subset: - batch_size: 1 - num_workers: 4 - test_subset: - batch_size: 1 - num_workers: 4 From 502163bf0cd8ac3cf9de3525ca01021252def855 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 17:46:59 +0900 Subject: [PATCH 12/33] Add model recipe and update det recipes --- src/otx/recipe/_base_/data/detection.yaml | 14 ++++--- src/otx/recipe/_base_/model/detection.yaml | 20 ++++++++++ .../recipe/detection/atss_mobilenetv2.yaml | 35 ++++++------------ .../detection/atss_mobilenetv2_tile.yaml | 35 ++++++------------ src/otx/recipe/detection/atss_resnext101.yaml | 34 +++++------------ src/otx/recipe/detection/rtmdet_tiny.yaml | 35 ++++++------------ src/otx/recipe/detection/ssd_mobilenetv2.yaml | 34 +++++------------ .../detection/ssd_mobilenetv2_tile.yaml | 35 ++++++------------ src/otx/recipe/detection/yolox_l.yaml | 34 +++++------------ src/otx/recipe/detection/yolox_l_tile.yaml | 37 ++++++------------- src/otx/recipe/detection/yolox_s.yaml | 34 +++++------------ src/otx/recipe/detection/yolox_s_tile.yaml | 37 ++++++------------- src/otx/recipe/detection/yolox_tiny.yaml | 34 +++++------------ src/otx/recipe/detection/yolox_tiny_tile.yaml | 35 ++++++------------ src/otx/recipe/detection/yolox_x.yaml | 34 +++++------------ src/otx/recipe/detection/yolox_x_tile.yaml | 37 ++++++------------- 16 files changed, 180 insertions(+), 344 deletions(-) create mode 100644 src/otx/recipe/_base_/model/detection.yaml diff --git a/src/otx/recipe/_base_/data/detection.yaml b/src/otx/recipe/_base_/data/detection.yaml index ddc2f28ed7f..61d11758871 100644 --- a/src/otx/recipe/_base_/data/detection.yaml +++ b/src/otx/recipe/_base_/data/detection.yaml @@ -8,30 +8,32 @@ unannotated_items_ratio: 0.0 train_subset: subset_name: train transform_lib_type: TORCHVISION + batch_size: 1 + num_workers: 2 to_tv_image: false transforms: - class_path: torchvision.transforms.v2.ToImage - batch_size: 1 - num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION + batch_size: 1 + num_workers: 2 to_tv_image: false transforms: - class_path: torchvision.transforms.v2.ToImage - batch_size: 1 - num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test transform_lib_type: TORCHVISION + batch_size: 1 + num_workers: 2 to_tv_image: false transforms: - class_path: torchvision.transforms.v2.ToImage - batch_size: 1 - num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler diff --git a/src/otx/recipe/_base_/model/detection.yaml b/src/otx/recipe/_base_/model/detection.yaml new file mode 100644 index 00000000000..ff29e7bed39 --- /dev/null +++ b/src/otx/recipe/_base_/model/detection.yaml @@ -0,0 +1,20 @@ +init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index f9785dfd7b3..97d9721cb6a 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -1,42 +1,27 @@ -model: - class_path: otx.algo.detection.atss.MobileNetV2ATSS - init_args: - label_info: 1000 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.atss.MobileNetV2ATSS + init_args: + optimizer: + init_args: + lr: 0.004 + callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 5 decay: -0.025 min_lrschedule_patience: 3 + data: train_subset: batch_size: 8 @@ -61,6 +46,7 @@ overrides: std: [255.0, 255.0, 255.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -77,6 +63,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 52dd77dde7c..bb83f88b07d 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -1,40 +1,25 @@ -model: - class_path: otx.algo.detection.atss.MobileNetV2ATSS - init_args: - label_info: 1000 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.atss.MobileNetV2ATSS + init_args: + optimizer: + init_args: + lr: 0.004 + data: tile_config: enable_tiler: true enable_adaptive_tiling: true + train_subset: batch_size: 8 transforms: @@ -58,6 +43,7 @@ overrides: std: [255.0, 255.0, 255.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -74,6 +60,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 6911033bb55..613ec3d04d3 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -1,42 +1,26 @@ -model: - class_path: otx.algo.detection.atss.ResNeXt101ATSS - init_args: - label_info: 1000 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.atss.ResNeXt101ATSS + optimizer: + init_args: + lr: 0.004 + callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: max_interval: 5 decay: -0.025 min_lrschedule_patience: 3 + data: train_subset: batch_size: 4 @@ -61,6 +45,7 @@ overrides: std: [255.0, 255.0, 255.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 4 transforms: @@ -77,6 +62,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: batch_size: 4 transforms: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index c97bfa5c358..515d2cedb74 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -1,35 +1,22 @@ -model: - class_path: otx.algo.detection.rtmdet.RTMDetTiny - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0007 - weight_decay: 0.05 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.rtmdet.RTMDetTiny + init_args: + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0007 + weight_decay: 0.05 + data: image_color_channel: BGR train_subset: @@ -86,6 +73,7 @@ overrides: init_args: mean: [103.53, 116.28, 123.675] std: [57.375, 57.12, 58.395] + val_subset: batch_size: 8 transforms: @@ -109,6 +97,7 @@ overrides: init_args: mean: [103.53, 116.28, 123.675] std: [57.375, 57.12, 58.395] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 81fbd8b3429..5402fbc29bf 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -1,36 +1,20 @@ -model: - class_path: otx.algo.detection.ssd.SSD - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.01 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.ssd.SSD + init_args: + optimizer: + init_args: + lr: 0.01 + data: train_subset: batch_size: 8 @@ -56,6 +40,7 @@ overrides: std: [255.0, 255.0, 255.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -72,6 +57,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index f52999e8981..53b9c575b1b 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -1,40 +1,25 @@ -model: - class_path: otx.algo.detection.ssd.SSD - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.01 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.ssd.SSD + init_args: + optimizer: + init_args: + lr: 0.01 + data: tile_config: enable_tiler: true enable_adaptive_tiling: true + train_subset: batch_size: 8 transforms: @@ -59,6 +44,7 @@ overrides: std: [255.0, 255.0, 255.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -75,6 +61,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [255.0, 255.0, 255.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 3804cfd75d6..43bbbe1ba1d 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -1,36 +1,20 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXL - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXL + init_args: + optimizer: + init_args: + lr: 0.001 + data: image_color_channel: BGR train_subset: @@ -87,6 +71,7 @@ overrides: std: [1.0, 1.0, 1.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -108,6 +93,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 1e6441f8069..a5b6dbf0942 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -1,41 +1,26 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXL - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXL + init_args: + optimizer: + init_args: + lr: 0.001 + data: + image_color_channel: BGR tile_config: enable_tiler: true enable_adaptive_tiling: true - image_color_channel: BGR + train_subset: num_workers: 4 batch_size: 8 @@ -62,6 +47,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + val_subset: num_workers: 4 batch_size: 8 @@ -83,6 +69,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: num_workers: 4 batch_size: 8 diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 108684bddeb..6a58457689f 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -1,36 +1,20 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXS - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXS + init_args: + optimizer: + init_args: + lr: 0.001 + data: image_color_channel: BGR train_subset: @@ -87,6 +71,7 @@ overrides: std: [1.0, 1.0, 1.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -108,6 +93,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 8082aa58167..52cdcc679b6 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -1,41 +1,26 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXS - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXS + init_args: + optimizer: + init_args: + lr: 0.001 + data: + image_color_channel: BGR tile_config: enable_tiler: true enable_adaptive_tiling: true - image_color_channel: BGR + train_subset: num_workers: 4 batch_size: 8 @@ -62,6 +47,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + val_subset: num_workers: 4 batch_size: 8 @@ -83,6 +69,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: num_workers: 4 batch_size: 8 diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index f8cec8feda8..6cbaa4fe25e 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -1,36 +1,20 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXTINY - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.0002 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXTINY + init_args: + optimizer: + init_args: + lr: 0.0002 + data: train_subset: batch_size: 8 @@ -72,6 +56,7 @@ overrides: std: [58.395, 57.12, 57.375] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 8 transforms: @@ -93,6 +78,7 @@ overrides: init_args: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] + test_subset: batch_size: 8 transforms: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index d08012d7233..9bfeb07724b 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -1,40 +1,25 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXTINY - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.0002 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXTINY + init_args: + optimizer: + init_args: + lr: 0.0002 + data: tile_config: enable_tiler: true enable_adaptive_tiling: true + train_subset: num_workers: 4 batch_size: 8 @@ -61,6 +46,7 @@ overrides: init_args: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] + val_subset: num_workers: 4 batch_size: 8 @@ -82,6 +68,7 @@ overrides: init_args: mean: [123.675, 116.28, 103.53] std: [58.395, 57.12, 57.375] + test_subset: num_workers: 4 batch_size: 8 diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index 3d83fcf6092..0d6eebda5d3 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -1,36 +1,20 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXX - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXX + init_args: + optimizer: + init_args: + lr: 0.001 + data: image_color_channel: BGR train_subset: @@ -84,6 +68,7 @@ overrides: std: [1.0, 1.0, 1.0] sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler + val_subset: batch_size: 4 transforms: @@ -105,6 +90,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: batch_size: 4 transforms: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index f1cec3d2a6e..b2197bc7802 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -1,41 +1,26 @@ -model: - class_path: otx.algo.detection.yolox.YOLOXX - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - engine: task: DETECTION device: auto callback_monitor: val/map_50 +model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 + model: + class_path: otx.algo.detection.yolox.YOLOXX + init_args: + optimizer: + init_args: + lr: 0.001 + data: + image_color_channel: BGR tile_config: enable_tiler: true enable_adaptive_tiling: true - image_color_channel: BGR + train_subset: num_workers: 4 batch_size: 4 @@ -62,6 +47,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + val_subset: num_workers: 4 batch_size: 4 @@ -83,6 +69,7 @@ overrides: init_args: mean: [0.0, 0.0, 0.0] std: [1.0, 1.0, 1.0] + test_subset: num_workers: 4 batch_size: 4 From 22d648bcfb06096714551a2f595ca5fcf8337d6e Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Mon, 8 Jul 2024 18:00:57 +0900 Subject: [PATCH 13/33] Add base engine recipe and update recipes for det --- src/otx/recipe/_base_/engine/detection.yaml | 5 +++++ src/otx/recipe/detection/atss_mobilenetv2.yaml | 7 +------ src/otx/recipe/detection/atss_mobilenetv2_tile.yaml | 7 +------ src/otx/recipe/detection/atss_resnext101.yaml | 7 +------ src/otx/recipe/detection/openvino_model.yaml | 10 ++++------ src/otx/recipe/detection/rtmdet_tiny.yaml | 7 +------ src/otx/recipe/detection/ssd_mobilenetv2.yaml | 7 +------ src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml | 7 +------ src/otx/recipe/detection/yolox_l.yaml | 7 +------ src/otx/recipe/detection/yolox_l_tile.yaml | 7 +------ src/otx/recipe/detection/yolox_s.yaml | 7 +------ src/otx/recipe/detection/yolox_s_tile.yaml | 7 +------ src/otx/recipe/detection/yolox_tiny.yaml | 7 +------ src/otx/recipe/detection/yolox_tiny_tile.yaml | 7 +------ src/otx/recipe/detection/yolox_x.yaml | 7 +------ src/otx/recipe/detection/yolox_x_tile.yaml | 7 +------ 16 files changed, 23 insertions(+), 90 deletions(-) create mode 100644 src/otx/recipe/_base_/engine/detection.yaml diff --git a/src/otx/recipe/_base_/engine/detection.yaml b/src/otx/recipe/_base_/engine/detection.yaml new file mode 100644 index 00000000000..5d070989766 --- /dev/null +++ b/src/otx/recipe/_base_/engine/detection.yaml @@ -0,0 +1,5 @@ +engine: + task: DETECTION + device: auto + +callback_monitor: val/map_50 diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index 97d9721cb6a..2e1d065434e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index bb83f88b07d..01ba4c8c79d 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 613ec3d04d3..7a060d0c61f 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index 66b7d61144f..e00b63e4780 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -7,14 +7,12 @@ model: model_type: "SSD" async_inference: True -engine: - task: DETECTION - device: cpu - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml data: ../_base_/data/detection.yaml overrides: + engine: + device: cpu + data: stack_images: false test_subset: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 515d2cedb74..65d4973ec98 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 5402fbc29bf..1b886e59ff4 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 53b9c575b1b..ef41cd153ca 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 43bbbe1ba1d..bb0b88b4512 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index a5b6dbf0942..9c5030a1477 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 6a58457689f..1eeaf35fd5a 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 52cdcc679b6..fa886b63245 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 6cbaa4fe25e..6a3504b77b7 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 9bfeb07724b..81a3164ac59 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index 0d6eebda5d3..f0adec989ec 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index b2197bc7802..ea9193d445d 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -1,9 +1,4 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 - +engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml overrides: From 2c03cb0509b17d96841e5bc703f8ba0f38561637 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Tue, 9 Jul 2024 10:00:39 +0900 Subject: [PATCH 14/33] Fix unit test and make recipes more readable --- src/otx/recipe/_base_/engine/detection.yaml | 7 ++----- src/otx/recipe/_base_/model/detection.yaml | 6 ------ src/otx/recipe/detection/atss_mobilenetv2.yaml | 6 ++++++ src/otx/recipe/detection/atss_mobilenetv2_tile.yaml | 6 ++++++ src/otx/recipe/detection/atss_resnext101.yaml | 13 ++++++++++--- src/otx/recipe/detection/openvino_model.yaml | 3 +++ src/otx/recipe/detection/rtmdet_tiny.yaml | 3 +++ src/otx/recipe/detection/ssd_mobilenetv2.yaml | 6 ++++++ src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml | 6 ++++++ src/otx/recipe/detection/yolox_l.yaml | 6 ++++++ src/otx/recipe/detection/yolox_l_tile.yaml | 6 ++++++ src/otx/recipe/detection/yolox_s.yaml | 6 ++++++ src/otx/recipe/detection/yolox_s_tile.yaml | 6 ++++++ src/otx/recipe/detection/yolox_tiny.yaml | 6 ++++++ src/otx/recipe/detection/yolox_tiny_tile.yaml | 6 ++++++ src/otx/recipe/detection/yolox_x.yaml | 6 ++++++ src/otx/recipe/detection/yolox_x_tile.yaml | 6 ++++++ 17 files changed, 90 insertions(+), 14 deletions(-) diff --git a/src/otx/recipe/_base_/engine/detection.yaml b/src/otx/recipe/_base_/engine/detection.yaml index 5d070989766..c1a665241ad 100644 --- a/src/otx/recipe/_base_/engine/detection.yaml +++ b/src/otx/recipe/_base_/engine/detection.yaml @@ -1,5 +1,2 @@ -engine: - task: DETECTION - device: auto - -callback_monitor: val/map_50 +task: DETECTION +device: auto diff --git a/src/otx/recipe/_base_/model/detection.yaml b/src/otx/recipe/_base_/model/detection.yaml index ff29e7bed39..e5e831fe673 100644 --- a/src/otx/recipe/_base_/model/detection.yaml +++ b/src/otx/recipe/_base_/model/detection.yaml @@ -1,12 +1,6 @@ init_args: label_info: 80 - optimizer: - class_path: torch.optim.SGD - init_args: - momentum: 0.9 - weight_decay: 0.0001 - scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index 2e1d065434e..acd63d2886e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.atss.MobileNetV2ATSS init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index 01ba4c8c79d..e9d0ef2fd75 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.atss.MobileNetV2ATSS init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 data: tile_config: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 7a060d0c61f..8a2d62a4836 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -1,13 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.atss.ResNeXt101ATSS - optimizer: - init_args: - lr: 0.004 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index e00b63e4780..dcb8b1606d1 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -9,6 +9,9 @@ model: engine: ../_base_/engine/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: engine: device: cpu diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 65d4973ec98..ed5ad069e63 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -1,6 +1,9 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 1b886e59ff4..ebe42cc526a 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.ssd.SSD init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.01 + momentum: 0.9 + weight_decay: 0.0001 data: train_subset: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index ef41cd153ca..735755352c8 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.ssd.SSD init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.01 + momentum: 0.9 + weight_decay: 0.0001 data: tile_config: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index bb0b88b4512..644188c20b5 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXL init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 9c5030a1477..bd4342f7aea 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXL init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index 1eeaf35fd5a..b742b7f325f 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXS init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index fa886b63245..a49dca23182 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXS init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 6a3504b77b7..3072f423f46 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXTINY init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.0002 + momentum: 0.9 + weight_decay: 0.0001 data: train_subset: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 81a3164ac59..940b9b6a9dc 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXTINY init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.0002 + momentum: 0.9 + weight_decay: 0.0001 data: tile_config: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index f0adec989ec..fc1a20426b0 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXX init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index ea9193d445d..ae9e6843ad0 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -1,14 +1,20 @@ engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml + +callback_monitor: val/map_50 + overrides: gradient_clip_val: 35.0 model: class_path: otx.algo.detection.yolox.YOLOXX init_args: optimizer: + class_path: torch.optim.SGD init_args: lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 data: image_color_channel: BGR From 6cc2a6471892b665a2f6fa865dcbcf44456cef84 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Tue, 9 Jul 2024 11:09:43 +0900 Subject: [PATCH 15/33] Update other tasks' recipes following det --- .../_base_/engine/instance_segmentation.yaml | 2 + .../_base_/engine/rotated_detection.yaml | 2 + .../_base_/engine/semantic_segmentation.yaml | 2 + .../_base_/engine/visual_prompting.yaml | 2 + .../engine/zero_shot_visual_prompting.yaml | 2 + .../_base_/model/instance_segmentation.yaml | 14 + .../_base_/model/rotated_detection.yaml | 14 + .../_base_/model/semantic_segmentation.yaml | 7 + .../recipe/_base_/model/visual_prompting.yaml | 22 ++ .../model/zero_shot_visual_prompting.yaml | 17 ++ src/otx/recipe/detection/openvino_model.yaml | 4 +- .../maskrcnn_efficientnetb2b.yaml | 41 +-- .../maskrcnn_efficientnetb2b_tile.yaml | 41 +-- .../instance_segmentation/maskrcnn_r50.yaml | 41 +-- .../maskrcnn_r50_tile.yaml | 41 +-- .../maskrcnn_r50_tv.yaml | 41 +-- .../maskrcnn_r50_tv_tile.yaml | 41 +-- .../instance_segmentation/maskrcnn_swint.yaml | 39 +-- .../maskrcnn_swint_tile.yaml | 39 +-- .../instance_segmentation/openvino_model.yaml | 13 +- .../rtmdet_inst_tiny.yaml | 50 ++-- .../rtmdet_inst_tiny_tile.yaml | 50 ++-- .../maskrcnn_efficientnetb2b.yaml | 41 +-- .../rotated_detection/maskrcnn_r50.yaml | 41 +-- .../rotated_detection/openvino_model.yaml | 13 +- .../recipe/semantic_segmentation/dino_v2.yaml | 5 +- .../semantic_segmentation/litehrnet_18.yaml | 241 ++++++++------- .../semantic_segmentation/litehrnet_s.yaml | 227 +++++++------- .../semantic_segmentation/litehrnet_x.yaml | 279 +++++++++--------- .../semantic_segmentation/openvino_model.yaml | 13 +- .../semantic_segmentation/segnext_b.yaml | 188 ++++++------ .../semantic_segmentation/segnext_s.yaml | 190 ++++++------ .../semantic_segmentation/segnext_t.yaml | 190 ++++++------ .../visual_prompting/openvino_model.yaml | 13 +- .../recipe/visual_prompting/sam_tiny_vit.yaml | 36 +-- .../recipe/visual_prompting/sam_vit_b.yaml | 36 +-- .../openvino_model.yaml | 8 +- .../sam_tiny_vit.yaml | 29 +- .../zero_shot_visual_prompting/sam_vit_b.yaml | 29 +- 39 files changed, 959 insertions(+), 1145 deletions(-) create mode 100644 src/otx/recipe/_base_/engine/instance_segmentation.yaml create mode 100644 src/otx/recipe/_base_/engine/rotated_detection.yaml create mode 100644 src/otx/recipe/_base_/engine/semantic_segmentation.yaml create mode 100644 src/otx/recipe/_base_/engine/visual_prompting.yaml create mode 100644 src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml create mode 100644 src/otx/recipe/_base_/model/instance_segmentation.yaml create mode 100644 src/otx/recipe/_base_/model/rotated_detection.yaml create mode 100644 src/otx/recipe/_base_/model/semantic_segmentation.yaml create mode 100644 src/otx/recipe/_base_/model/visual_prompting.yaml create mode 100644 src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml diff --git a/src/otx/recipe/_base_/engine/instance_segmentation.yaml b/src/otx/recipe/_base_/engine/instance_segmentation.yaml new file mode 100644 index 00000000000..83d0d12356b --- /dev/null +++ b/src/otx/recipe/_base_/engine/instance_segmentation.yaml @@ -0,0 +1,2 @@ +task: INSTANCE_SEGMENTATION +device: auto diff --git a/src/otx/recipe/_base_/engine/rotated_detection.yaml b/src/otx/recipe/_base_/engine/rotated_detection.yaml new file mode 100644 index 00000000000..9712b3795a8 --- /dev/null +++ b/src/otx/recipe/_base_/engine/rotated_detection.yaml @@ -0,0 +1,2 @@ +task: ROTATED_DETECTION +device: auto diff --git a/src/otx/recipe/_base_/engine/semantic_segmentation.yaml b/src/otx/recipe/_base_/engine/semantic_segmentation.yaml new file mode 100644 index 00000000000..13918779023 --- /dev/null +++ b/src/otx/recipe/_base_/engine/semantic_segmentation.yaml @@ -0,0 +1,2 @@ +task: SEMANTIC_SEGMENTATION +device: auto diff --git a/src/otx/recipe/_base_/engine/visual_prompting.yaml b/src/otx/recipe/_base_/engine/visual_prompting.yaml new file mode 100644 index 00000000000..a04edad33f6 --- /dev/null +++ b/src/otx/recipe/_base_/engine/visual_prompting.yaml @@ -0,0 +1,2 @@ +task: VISUAL_PROMPTING +device: auto diff --git a/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml new file mode 100644 index 00000000000..194ab50f67b --- /dev/null +++ b/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml @@ -0,0 +1,2 @@ +task: ZERO_SHOT_VISUAL_PROMPTING +device: auto diff --git a/src/otx/recipe/_base_/model/instance_segmentation.yaml b/src/otx/recipe/_base_/model/instance_segmentation.yaml new file mode 100644 index 00000000000..b27e76b5db0 --- /dev/null +++ b/src/otx/recipe/_base_/model/instance_segmentation.yaml @@ -0,0 +1,14 @@ +init_args: + label_info: 80 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 \ No newline at end of file diff --git a/src/otx/recipe/_base_/model/rotated_detection.yaml b/src/otx/recipe/_base_/model/rotated_detection.yaml new file mode 100644 index 00000000000..1e0f38f44da --- /dev/null +++ b/src/otx/recipe/_base_/model/rotated_detection.yaml @@ -0,0 +1,14 @@ +init_args: + label_info: 80 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 \ No newline at end of file diff --git a/src/otx/recipe/_base_/model/semantic_segmentation.yaml b/src/otx/recipe/_base_/model/semantic_segmentation.yaml new file mode 100644 index 00000000000..fc401bbc789 --- /dev/null +++ b/src/otx/recipe/_base_/model/semantic_segmentation.yaml @@ -0,0 +1,7 @@ +init_args: + label_info: 2 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 diff --git a/src/otx/recipe/_base_/model/visual_prompting.yaml b/src/otx/recipe/_base_/model/visual_prompting.yaml new file mode 100644 index 00000000000..3aa8bd70c12 --- /dev/null +++ b/src/otx/recipe/_base_/model/visual_prompting.yaml @@ -0,0 +1,22 @@ +class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything +init_args: + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: false + # options + use_stability_score: false + return_single_mask: true + return_extra_metrics: false + stability_score_offset: 1. + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.00001 + + scheduler: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 \ No newline at end of file diff --git a/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml new file mode 100644 index 00000000000..532de6e04df --- /dev/null +++ b/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml @@ -0,0 +1,17 @@ +class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything +init_args: + backbone: tiny_vit + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: true + default_threshold_reference: 0.3 + default_threshold_target: 0.65 + save_outputs: true + reference_info_dir: reference_infos + infer_reference_info_root: ../.latest/train + # options + use_stability_score: false + return_single_mask: false + return_extra_metrics: false + stability_score_offset: 1. \ No newline at end of file diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index dcb8b1606d1..4feb4e4c4bc 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -3,9 +3,9 @@ model: init_args: label_info: 80 model_name: ssd-resnet34-1200-onnx - use_throughput_mode: True model_type: "SSD" - async_inference: True + async_inference: true + use_throughput_mode: true engine: ../_base_/engine/detection.yaml data: ../_base_/data/detection.yaml diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index d968f341375..716df2b11da 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -1,36 +1,21 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index e7983fc9b0a..f69420a17b5 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -1,36 +1,21 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index 89cccf852a0..82569e633b0 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -1,37 +1,22 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index a56ea360183..b5a5f9ff15c 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -1,37 +1,22 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index bbc617f8f32..086fff7da0e 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -1,37 +1,22 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index 6cafc6c5e89..f934375ba89 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -1,37 +1,22 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 64b2878cc02..727899e2cf2 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -1,35 +1,20 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0001 - weight_decay: 0.05 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT + init_args: + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0001 + weight_decay: 0.05 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index e5d4d3512d0..91304658984 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -1,35 +1,20 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0001 - weight_decay: 0.05 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT + init_args: + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0001 + weight_decay: 0.05 + data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index 347c77adc72..e7115725a27 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -4,17 +4,18 @@ model: label_info: 80 model_name: openvino.xml model_type: MaskRCNN - async_inference: True - use_throughput_mode: True + async_inference: true + use_throughput_mode: true -engine: - task: INSTANCE_SEGMENTATION - device: cpu +engine: ../_base_/engine/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: + engine: + device: cpu + data: stack_images: false test_subset: diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml index 748bee485c4..33b2d9da6aa 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml @@ -1,39 +1,31 @@ -model: - class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 9 - monitor: val/map_50 - min_lr: 4e-06 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + init_args: + patience: 9 + min_lr: 4e-06 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml index ba2f6825f0f..ab0448b9cbf 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml @@ -1,39 +1,31 @@ -model: - class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 9 - monitor: val/map_50 - min_lr: 4e-06 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + init_args: + patience: 9 + min_lr: 4e-06 + data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index e0e82f207c9..9a5d9cdec6d 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -1,36 +1,21 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 9 - monitor: val/map_50 - -engine: - task: ROTATED_DETECTION - device: auto +engine: ../_base_/engine/rotated_detection.yaml +model: ../_base_/model/rotated_detection.yaml +data: ../_base_/data/rotated_detection.yaml callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index 1ea5d4b1e3b..fc64753527b 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -1,36 +1,21 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 9 - monitor: val/map_50 - -engine: - task: ROTATED_DETECTION - device: auto +engine: ../_base_/engine/rotated_detection.yaml +model: ../_base_/model/rotated_detection.yaml +data: ../_base_/data/rotated_detection.yaml callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 + model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/rotated_detection/openvino_model.yaml b/src/otx/recipe/rotated_detection/openvino_model.yaml index d8401af3b36..b11a4d1e285 100644 --- a/src/otx/recipe/rotated_detection/openvino_model.yaml +++ b/src/otx/recipe/rotated_detection/openvino_model.yaml @@ -4,17 +4,18 @@ model: label_info: 80 model_name: openvino.xml model_type: MaskRCNN - async_inference: True - use_throughput_mode: True + async_inference: true + use_throughput_mode: true -engine: - task: ROTATED_DETECTION - device: cpu +engine: ../_base_/engine/rotated_detection.yaml +data: ../_base_/data/rotated_detection.yaml callback_monitor: val/map_50 -data: ../_base_/data/rotated_detection.yaml overrides: + engine: + device: cpu + data: stack_images: false test_subset: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 234fa1315a6..11f230b5384 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -63,12 +63,9 @@ model: power: 0.9 last_epoch: -1 -engine: - task: SEMANTIC_SEGMENTATION - device: auto - callback_monitor: val/Dice +engine: ../_base_/engine/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml overrides: data: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index c9565a5e413..f84d4cd2fed 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -1,136 +1,131 @@ -model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - label_info: 2 - name_base_model: LiteHRNet18 +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - norm_eval: false - extra: - stem: - stem_channels: 32 - out_channels: 32 - expand_ratio: 1 - strides: - - 2 - - 2 - extra_stride: false - input_norm: false - num_stages: 3 - stages_spec: - num_modules: - - 2 - - 4 - - 2 - num_branches: - - 2 - - 3 - - 4 - num_blocks: - - 2 - - 2 - - 2 - module_type: - - LITE - - LITE - - LITE - with_fuse: - - true - - true - - true - reduce_ratios: - - 8 - - 8 - - 8 - num_channels: - - - 40 - - 80 - - - 40 - - 80 - - 160 - - - 40 - - 80 - - 160 - - 320 - out_modules: - conv: - enable: false - channels: 320 - position_att: - enable: false - key_channels: 128 - value_channels: 320 - psp_size: - - 1 +overrides: + model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + name_base_model: LiteHRNet18 + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + backbone_configuration: + norm_eval: false + extra: + stem: + stem_channels: 32 + out_channels: 32 + expand_ratio: 1 + strides: + - 2 + - 2 + extra_stride: false + input_norm: false + num_stages: 3 + stages_spec: + num_modules: + - 2 + - 4 + - 2 + num_branches: + - 2 - 3 - - 6 + - 4 + num_blocks: + - 2 + - 2 + - 2 + module_type: + - LITE + - LITE + - LITE + with_fuse: + - true + - true + - true + reduce_ratios: + - 8 + - 8 - 8 - local_att: + num_channels: + - - 40 + - 80 + - - 40 + - 80 + - 160 + - - 40 + - 80 + - 160 + - 320 + out_modules: + conv: + enable: false + channels: 320 + position_att: + enable: false + key_channels: 128 + value_channels: 320 + psp_size: + - 1 + - 3 + - 6 + - 8 + local_att: + enable: false + out_aggregator: enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnet18_imagenet1k_rsc.pth" + add_input: false + pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnet18_imagenet1k_rsc.pth" - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 40 - - 80 - - 160 - - 320 - in_index: - - 0 - - 1 - - 2 - - 3 - input_transform: "multiple_select" - channels: 40 - enable_aggregator: True - kernel_size: 1 - num_convs: 1 - concat_input: false - dropout_ratio: -1 - align_corners: false + decode_head_configuration: + norm_cfg: + type: BN + requires_grad: true + in_channels: + - 40 + - 80 + - 160 + - 320 + in_index: + - 0 + - 1 + - 2 + - 3 + input_transform: "multiple_select" + channels: 40 + enable_aggregator: True + kernel_size: 1 + num_convs: 1 + concat_input: false + dropout_ratio: -1 + align_corners: false - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 -callback_monitor: val/Dice + scheduler: + init_args: + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index 9c1e818e5c5..f9f2df321eb 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -1,129 +1,124 @@ -model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - label_info: 2 - name_base_model: LiteHRNetS +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - norm_cfg: - type: BN - requires_grad: true - norm_eval: false - extra: - stem: - stem_channels: 32 - out_channels: 32 - expand_ratio: 1 - strides: - - 2 - - 2 - extra_stride: true - input_norm: false - num_stages: 2 - stages_spec: - num_modules: - - 4 - - 4 - num_branches: - - 2 - - 3 - num_blocks: - - 2 - - 2 - module_type: - - LITE - - LITE - with_fuse: - - true - - true - reduce_ratios: - - 8 - - 8 - num_channels: - - - 60 - - 120 - - - 60 - - 120 - - 240 - out_modules: - conv: - enable: false - channels: 160 - position_att: - enable: false - key_channels: 64 - value_channels: 240 - psp_size: - - 1 +overrides: + model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + name_base_model: LiteHRNetS + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + backbone_configuration: + norm_cfg: + type: BN + requires_grad: true + norm_eval: false + extra: + stem: + stem_channels: 32 + out_channels: 32 + expand_ratio: 1 + strides: + - 2 + - 2 + extra_stride: true + input_norm: false + num_stages: 2 + stages_spec: + num_modules: + - 4 + - 4 + num_branches: + - 2 - 3 - - 6 + num_blocks: + - 2 + - 2 + module_type: + - LITE + - LITE + with_fuse: + - true + - true + reduce_ratios: + - 8 - 8 - local_att: + num_channels: + - - 60 + - 120 + - - 60 + - 120 + - 240 + out_modules: + conv: + enable: false + channels: 160 + position_att: + enable: false + key_channels: 64 + value_channels: 240 + psp_size: + - 1 + - 3 + - 6 + - 8 + local_att: + enable: false + out_aggregator: enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetsv2_imagenet1k_rsc.pth" + add_input: false + pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetsv2_imagenet1k_rsc.pth" - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 60 - - 120 - - 240 - in_index: - - 0 - - 1 - - 2 - input_transform: "multiple_select" - channels: 60 - kernel_size: 1 - num_convs: 1 - concat_input: false - enable_aggregator: True - aggregator_merge_norm: None - aggregator_use_concat: False - dropout_ratio: -1 - align_corners: false + decode_head_configuration: + norm_cfg: + type: BN + requires_grad: true + in_channels: + - 60 + - 120 + - 240 + in_index: + - 0 + - 1 + - 2 + input_transform: "multiple_select" + channels: 60 + kernel_size: 1 + num_convs: 1 + concat_input: false + enable_aggregator: True + aggregator_merge_norm: None + aggregator_use_concat: False + dropout_ratio: -1 + align_corners: false - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 -callback_monitor: val/Dice + scheduler: + init_args: + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 6ae3d1a4c8d..0b45ef1d5be 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -1,156 +1,151 @@ -model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - label_info: 2 - name_base_model: LiteHRNetX +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - norm_cfg: - type: BN - requires_grad: true - norm_eval: false - extra: - stem: - stem_channels: 60 - out_channels: 60 - expand_ratio: 1 - strides: - - 2 - - 1 - extra_stride: false - input_norm: false - num_stages: 4 - stages_spec: - weighting_module_version: v1 - num_modules: - - 2 - - 4 - - 4 - - 2 - num_branches: - - 2 - - 3 - - 4 - - 5 - num_blocks: - - 2 - - 2 - - 2 - - 2 - module_type: - - LITE - - LITE - - LITE - - LITE - with_fuse: - - true - - true - - true - - true - reduce_ratios: - - 2 - - 4 - - 8 - - 8 - num_channels: - - - 18 - - 60 - - - 18 - - 60 - - 80 - - - 18 - - 60 - - 80 - - 160 - - - 18 - - 60 - - 80 - - 160 - - 320 - out_modules: - conv: - enable: false - channels: 320 - position_att: - enable: false - key_channels: 128 - value_channels: 320 - psp_size: +overrides: + model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + name_base_model: LiteHRNetX + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + backbone_configuration: + norm_cfg: + type: BN + requires_grad: true + norm_eval: false + extra: + stem: + stem_channels: 60 + out_channels: 60 + expand_ratio: 1 + strides: + - 2 - 1 + extra_stride: false + input_norm: false + num_stages: 4 + stages_spec: + weighting_module_version: v1 + num_modules: + - 2 + - 4 + - 4 + - 2 + num_branches: + - 2 - 3 - - 6 + - 4 + - 5 + num_blocks: + - 2 + - 2 + - 2 + - 2 + module_type: + - LITE + - LITE + - LITE + - LITE + with_fuse: + - true + - true + - true + - true + reduce_ratios: + - 2 + - 4 + - 8 - 8 - local_att: + num_channels: + - - 18 + - 60 + - - 18 + - 60 + - 80 + - - 18 + - 60 + - 80 + - 160 + - - 18 + - 60 + - 80 + - 160 + - 320 + out_modules: + conv: + enable: false + channels: 320 + position_att: + enable: false + key_channels: 128 + value_channels: 320 + psp_size: + - 1 + - 3 + - 6 + - 8 + local_att: + enable: false + out_aggregator: enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetxv3_imagenet1k_rsc.pth" + add_input: false + pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetxv3_imagenet1k_rsc.pth" - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 18 - - 60 - - 80 - - 160 - - 320 - in_index: - - 0 - - 1 - - 2 - - 3 - - 4 - input_transform: "multiple_select" - channels: 60 - kernel_size: 1 - num_convs: 1 - concat_input: false - dropout_ratio: -1 - enable_aggregator: True - aggregator_min_channels: 60 - aggregator_merge_norm: None - aggregator_use_concat: False - align_corners: false + decode_head_configuration: + norm_cfg: + type: BN + requires_grad: true + in_channels: + - 18 + - 60 + - 80 + - 160 + - 320 + in_index: + - 0 + - 1 + - 2 + - 3 + - 4 + input_transform: "multiple_select" + channels: 60 + kernel_size: 1 + num_convs: 1 + concat_input: false + dropout_ratio: -1 + enable_aggregator: True + aggregator_min_channels: 60 + aggregator_merge_norm: None + aggregator_use_concat: False + align_corners: false - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 -callback_monitor: val/Dice + scheduler: + init_args: + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index f5513f20566..486caa05053 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -3,18 +3,19 @@ model: init_args: label_info: 19 model_name: drn-d-38 - async_inference: True - use_throughput_mode: True model_type: "Segmentation" + async_inference: true + use_throughput_mode: true -engine: - task: SEMANTIC_SEGMENTATION - device: cpu +engine: ../_base_/engine/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml callback_monitor: val/Dice -data: ../_base_/data/semantic_segmentation.yaml overrides: + engine: + device: cpu + data: stack_images: false train_subset: diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index 6a1bb620f54..0064ac8b5ec 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -1,108 +1,104 @@ -model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - label_info: 2 - name_base_model: SegNextB +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 3 - - 3 - - 12 - - 3 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 64 - - 128 - - 320 - - 512 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_b_20230227-3ab7d230.pth +overrides: + model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + name_base_model: SegNextB - decode_head_configuration: - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - train_steps: 6 - in_channels: - - 128 - - 320 - - 512 - in_index: - - 1 - - 2 - - 3 - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - align_corners: false - channels: 512 - dropout_ratio: 0.1 - ham_channels: 512 + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 + backbone_configuration: + act_cfg: + type: GELU + attention_kernel_paddings: + - 2 + - - 0 + - 3 + - - 0 + - 5 + - - 0 + - 10 + attention_kernel_sizes: + - 5 + - - 1 + - 7 + - - 1 + - 11 + - - 1 + - 21 + depths: + - 3 + - 3 + - 12 + - 3 + drop_path_rate: 0.1 + drop_rate: 0.0 + embed_dims: + - 64 + - 128 + - 320 + - 512 + mlp_ratios: + - 8 + - 8 + - 4 + - 4 + norm_cfg: + requires_grad: true + type: BN + pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_b_20230227-3ab7d230.pth - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 + decode_head_configuration: + ham_kwargs: + md_r: 16 + md_s: 1 + eval_steps: 7 + train_steps: 6 + in_channels: + - 128 + - 320 + - 512 + in_index: + - 1 + - 2 + - 3 + norm_cfg: + num_groups: 32 + requires_grad: true + type: GN + align_corners: false + channels: 512 + dropout_ratio: 0.1 + ham_channels: 512 -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 -callback_monitor: val/Dice + scheduler: + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index 510fdccaf69..3e439ef8931 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -1,109 +1,105 @@ -model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - label_info: 2 - name_base_model: SegNextS +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 2 - - 2 - - 4 - - 2 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 64 - - 128 - - 320 - - 512 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_s_20230227-f33ccdf2.pth +overrides: + model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + name_base_model: SegNextS - decode_head_configuration: - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - rand_init: true - train_steps: 6 - in_channels: - - 128 - - 320 - - 512 - in_index: - - 1 - - 2 - - 3 - align_corners: false - channels: 256 - dropout_ratio: 0.1 - ham_channels: 256 + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 + backbone_configuration: + act_cfg: + type: GELU + attention_kernel_paddings: + - 2 + - - 0 + - 3 + - - 0 + - 5 + - - 0 + - 10 + attention_kernel_sizes: + - 5 + - - 1 + - 7 + - - 1 + - 11 + - - 1 + - 21 + depths: + - 2 + - 2 + - 4 + - 2 + drop_path_rate: 0.1 + drop_rate: 0.0 + embed_dims: + - 64 + - 128 + - 320 + - 512 + mlp_ratios: + - 8 + - 8 + - 4 + - 4 + norm_cfg: + requires_grad: true + type: BN + pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_s_20230227-f33ccdf2.pth - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 + decode_head_configuration: + norm_cfg: + num_groups: 32 + requires_grad: true + type: GN + ham_kwargs: + md_r: 16 + md_s: 1 + eval_steps: 7 + rand_init: true + train_steps: 6 + in_channels: + - 128 + - 320 + - 512 + in_index: + - 1 + - 2 + - 3 + align_corners: false + channels: 256 + dropout_ratio: 0.1 + ham_channels: 256 -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 -callback_monitor: val/Dice + scheduler: + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 7f95019174b..6a9bee0124b 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -1,109 +1,105 @@ -model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - label_info: 2 - name_base_model: SegNextT +engine: ../_base_/engine/semantic_segmentation.yaml +model: ../_base_/model/semantic_segmentation.yaml +data: ../_base_/data/semantic_segmentation.yaml - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 +callback_monitor: val/Dice - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 3 - - 3 - - 5 - - 2 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 32 - - 64 - - 160 - - 256 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_t_20230227-119e8c9f.pth +overrides: + model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + name_base_model: SegNextT - decode_head_configuration: - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - rand_init: true - train_steps: 6 - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - in_channels: - - 64 - - 160 - - 256 - in_index: - - 1 - - 2 - - 3 - align_corners: false - channels: 256 - dropout_ratio: 0.1 - ham_channels: 256 + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 + backbone_configuration: + act_cfg: + type: GELU + attention_kernel_paddings: + - 2 + - - 0 + - 3 + - - 0 + - 5 + - - 0 + - 10 + attention_kernel_sizes: + - 5 + - - 1 + - 7 + - - 1 + - 11 + - - 1 + - 21 + depths: + - 3 + - 3 + - 5 + - 2 + drop_path_rate: 0.1 + drop_rate: 0.0 + embed_dims: + - 32 + - 64 + - 160 + - 256 + mlp_ratios: + - 8 + - 8 + - 4 + - 4 + norm_cfg: + requires_grad: true + type: BN + pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_t_20230227-119e8c9f.pth - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 + decode_head_configuration: + ham_kwargs: + md_r: 16 + md_s: 1 + eval_steps: 7 + rand_init: true + train_steps: 6 + norm_cfg: + num_groups: 32 + requires_grad: true + type: GN + in_channels: + - 64 + - 160 + - 256 + in_index: + - 1 + - 2 + - 3 + align_corners: false + channels: 256 + dropout_ratio: 0.1 + ham_channels: 256 -engine: - task: SEMANTIC_SEGMENTATION - device: auto + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 -callback_monitor: val/Dice + scheduler: + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 -overrides: callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: warmup_iters: 100 - -data: ../_base_/data/semantic_segmentation.yaml diff --git a/src/otx/recipe/visual_prompting/openvino_model.yaml b/src/otx/recipe/visual_prompting/openvino_model.yaml index 2686044a507..671aa120e53 100644 --- a/src/otx/recipe/visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/visual_prompting/openvino_model.yaml @@ -4,17 +4,18 @@ model: label_info: 0 model_name: segment_anything model_type: Visual_Prompting - async_inference: False - use_throughput_mode: False + async_inference: false + use_throughput_mode: false -engine: - task: VISUAL_PROMPTING - device: cpu +engine: ../_base_/engine/visual_prompting.yaml +data: ../_base_/data/visual_prompting.yaml callback_monitor: val/Dice -data: ../_base_/data/visual_prompting.yaml overrides: + engine: + device: cpu + data: train_subset: batch_size: 1 diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 1dee69aca66..3562ca85446 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -1,37 +1,15 @@ -model: - class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything - init_args: - backbone: tiny_vit - label_info: 0 - freeze_image_encoder: True - freeze_prompt_encoder: True - freeze_mask_decoder: False - # options - use_stability_score: False - return_single_mask: True - return_extra_metrics: False - stability_score_offset: 1. - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.00001 - - scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR - init_args: - factor: 1 - total_iters: -1 - -engine: - task: VISUAL_PROMPTING - device: auto +engine: ../_base_/engine/visual_prompting.yaml +model: ../_base_/model/visual_prompting.yaml +data: ../_base_/data/visual_prompting.yaml callback_monitor: val/f1-score -data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 + model: + init_args: + backbone: tiny_vit + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index f27cc5984c9..14301e60143 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -1,37 +1,15 @@ -model: - class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything - init_args: - backbone: vit_b - label_info: 0 - freeze_image_encoder: True - freeze_prompt_encoder: True - freeze_mask_decoder: False - # options - use_stability_score: False - return_single_mask: True - return_extra_metrics: False - stability_score_offset: 1. - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.00001 - - scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR - init_args: - factor: 1 - total_iters: -1 - -engine: - task: VISUAL_PROMPTING - device: auto +engine: ../_base_/engine/visual_prompting.yaml +model: ../_base_/model/visual_prompting.yaml +data: ../_base_/data/visual_prompting.yaml callback_monitor: val/f1-score -data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 + model: + init_args: + backbone: vit_b + callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml index 5ceb1838a97..78adbb290cd 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml @@ -10,16 +10,16 @@ model: infer_reference_info_root: ../.latest/train save_outputs: True -engine: - task: ZERO_SHOT_VISUAL_PROMPTING - device: cpu - callback_monitor: step +engine: ../_base_/engine/zero_shot_visual_prompting.yaml data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 + engine: + device: cpu + data: train_subset: num_workers: 0 # TODO (sungchul): CVS-135462 diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml index fd98922a8fd..afd4b7f7a97 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml @@ -1,29 +1,12 @@ -model: - class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything - init_args: - backbone: tiny_vit - label_info: 0 - freeze_image_encoder: True - freeze_prompt_encoder: True - freeze_mask_decoder: True - default_threshold_reference: 0.3 - default_threshold_target: 0.65 - save_outputs: True - reference_info_dir: reference_infos - infer_reference_info_root: ../.latest/train - # options - use_stability_score: False - return_single_mask: False - return_extra_metrics: False - stability_score_offset: 1. - -engine: - task: ZERO_SHOT_VISUAL_PROMPTING - device: auto +engine: ../_base_/engine/zero_shot_visual_prompting.yaml +model: ../_base_/model/zero_shot_visual_prompting.yaml +data: ../_base_/data/zero_shot_visual_prompting.yaml callback_monitor: step -data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 + model: + init_args: + backbone: tiny_vit diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml index 0fd90018098..928fb81bac6 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml @@ -1,29 +1,12 @@ -model: - class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything - init_args: - backbone: vit_b - label_info: 0 - freeze_image_encoder: True - freeze_prompt_encoder: True - freeze_mask_decoder: True - default_threshold_reference: 0.3 - default_threshold_target: 0.65 - save_outputs: True - reference_info_dir: reference_infos - infer_reference_info_root: ../.latest/train - # options - use_stability_score: False - return_single_mask: False - return_extra_metrics: False - stability_score_offset: 1. - -engine: - task: ZERO_SHOT_VISUAL_PROMPTING - device: auto +engine: ../_base_/engine/zero_shot_visual_prompting.yaml +model: ../_base_/model/zero_shot_visual_prompting.yaml +data: ../_base_/data/zero_shot_visual_prompting.yaml callback_monitor: step -data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 + model: + init_args: + backbone: vit_b From 2ea2389752cef3cb97eac510cd6740cb0983e285 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Tue, 9 Jul 2024 15:35:28 +0900 Subject: [PATCH 16/33] Remove model components that are redundantly written --- .../semantic_segmentation/litehrnet_18.yaml | 91 -------------- .../semantic_segmentation/litehrnet_s.yaml | 84 ------------- .../semantic_segmentation/litehrnet_x.yaml | 111 ------------------ .../semantic_segmentation/segnext_b.yaml | 64 ---------- .../semantic_segmentation/segnext_s.yaml | 65 ---------- .../semantic_segmentation/segnext_t.yaml | 65 ---------- 6 files changed, 480 deletions(-) diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index f84d4cd2fed..890a19dae8c 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -15,97 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - norm_eval: false - extra: - stem: - stem_channels: 32 - out_channels: 32 - expand_ratio: 1 - strides: - - 2 - - 2 - extra_stride: false - input_norm: false - num_stages: 3 - stages_spec: - num_modules: - - 2 - - 4 - - 2 - num_branches: - - 2 - - 3 - - 4 - num_blocks: - - 2 - - 2 - - 2 - module_type: - - LITE - - LITE - - LITE - with_fuse: - - true - - true - - true - reduce_ratios: - - 8 - - 8 - - 8 - num_channels: - - - 40 - - 80 - - - 40 - - 80 - - 160 - - - 40 - - 80 - - 160 - - 320 - out_modules: - conv: - enable: false - channels: 320 - position_att: - enable: false - key_channels: 128 - value_channels: 320 - psp_size: - - 1 - - 3 - - 6 - - 8 - local_att: - enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnet18_imagenet1k_rsc.pth" - - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 40 - - 80 - - 160 - - 320 - in_index: - - 0 - - 1 - - 2 - - 3 - input_transform: "multiple_select" - channels: 40 - enable_aggregator: True - kernel_size: 1 - num_convs: 1 - concat_input: false - dropout_ratio: -1 - align_corners: false - optimizer: class_path: torch.optim.Adam init_args: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index f9f2df321eb..fb9f1fe5ab6 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -15,90 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - norm_cfg: - type: BN - requires_grad: true - norm_eval: false - extra: - stem: - stem_channels: 32 - out_channels: 32 - expand_ratio: 1 - strides: - - 2 - - 2 - extra_stride: true - input_norm: false - num_stages: 2 - stages_spec: - num_modules: - - 4 - - 4 - num_branches: - - 2 - - 3 - num_blocks: - - 2 - - 2 - module_type: - - LITE - - LITE - with_fuse: - - true - - true - reduce_ratios: - - 8 - - 8 - num_channels: - - - 60 - - 120 - - - 60 - - 120 - - 240 - out_modules: - conv: - enable: false - channels: 160 - position_att: - enable: false - key_channels: 64 - value_channels: 240 - psp_size: - - 1 - - 3 - - 6 - - 8 - local_att: - enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetsv2_imagenet1k_rsc.pth" - - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 60 - - 120 - - 240 - in_index: - - 0 - - 1 - - 2 - input_transform: "multiple_select" - channels: 60 - kernel_size: 1 - num_convs: 1 - concat_input: false - enable_aggregator: True - aggregator_merge_norm: None - aggregator_use_concat: False - dropout_ratio: -1 - align_corners: false - optimizer: class_path: torch.optim.Adam init_args: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 0b45ef1d5be..116bce16343 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -15,117 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - norm_cfg: - type: BN - requires_grad: true - norm_eval: false - extra: - stem: - stem_channels: 60 - out_channels: 60 - expand_ratio: 1 - strides: - - 2 - - 1 - extra_stride: false - input_norm: false - num_stages: 4 - stages_spec: - weighting_module_version: v1 - num_modules: - - 2 - - 4 - - 4 - - 2 - num_branches: - - 2 - - 3 - - 4 - - 5 - num_blocks: - - 2 - - 2 - - 2 - - 2 - module_type: - - LITE - - LITE - - LITE - - LITE - with_fuse: - - true - - true - - true - - true - reduce_ratios: - - 2 - - 4 - - 8 - - 8 - num_channels: - - - 18 - - 60 - - - 18 - - 60 - - 80 - - - 18 - - 60 - - 80 - - 160 - - - 18 - - 60 - - 80 - - 160 - - 320 - out_modules: - conv: - enable: false - channels: 320 - position_att: - enable: false - key_channels: 128 - value_channels: 320 - psp_size: - - 1 - - 3 - - 6 - - 8 - local_att: - enable: false - out_aggregator: - enable: false - add_input: false - pretrained_weights: "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/models/custom_semantic_segmentation/litehrnetxv3_imagenet1k_rsc.pth" - - decode_head_configuration: - norm_cfg: - type: BN - requires_grad: true - in_channels: - - 18 - - 60 - - 80 - - 160 - - 320 - in_index: - - 0 - - 1 - - 2 - - 3 - - 4 - input_transform: "multiple_select" - channels: 60 - kernel_size: 1 - num_convs: 1 - concat_input: false - dropout_ratio: -1 - enable_aggregator: True - aggregator_min_channels: 60 - aggregator_merge_norm: None - aggregator_use_concat: False - align_corners: false - optimizer: class_path: torch.optim.Adam init_args: diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index 0064ac8b5ec..7214637f540 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -15,70 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 3 - - 3 - - 12 - - 3 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 64 - - 128 - - 320 - - 512 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_b_20230227-3ab7d230.pth - - decode_head_configuration: - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - train_steps: 6 - in_channels: - - 128 - - 320 - - 512 - in_index: - - 1 - - 2 - - 3 - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - align_corners: false - channels: 512 - dropout_ratio: 0.1 - ham_channels: 512 - optimizer: class_path: torch.optim.AdamW init_args: diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index 3e439ef8931..49064d05016 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -15,71 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 2 - - 2 - - 4 - - 2 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 64 - - 128 - - 320 - - 512 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_s_20230227-f33ccdf2.pth - - decode_head_configuration: - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - rand_init: true - train_steps: 6 - in_channels: - - 128 - - 320 - - 512 - in_index: - - 1 - - 2 - - 3 - align_corners: false - channels: 256 - dropout_ratio: 0.1 - ham_channels: 256 - optimizer: class_path: torch.optim.AdamW init_args: diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 6a9bee0124b..5d8e830efac 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -15,71 +15,6 @@ overrides: params: ignore_index: 255 - backbone_configuration: - act_cfg: - type: GELU - attention_kernel_paddings: - - 2 - - - 0 - - 3 - - - 0 - - 5 - - - 0 - - 10 - attention_kernel_sizes: - - 5 - - - 1 - - 7 - - - 1 - - 11 - - - 1 - - 21 - depths: - - 3 - - 3 - - 5 - - 2 - drop_path_rate: 0.1 - drop_rate: 0.0 - embed_dims: - - 32 - - 64 - - 160 - - 256 - mlp_ratios: - - 8 - - 8 - - 4 - - 4 - norm_cfg: - requires_grad: true - type: BN - pretrained_weights: https://download.openmmlab.com/mmsegmentation/v0.5/pretrain/segnext/mscan_t_20230227-119e8c9f.pth - - decode_head_configuration: - ham_kwargs: - md_r: 16 - md_s: 1 - eval_steps: 7 - rand_init: true - train_steps: 6 - norm_cfg: - num_groups: 32 - requires_grad: true - type: GN - in_channels: - - 64 - - 160 - - 256 - in_index: - - 1 - - 2 - - 3 - align_corners: false - channels: 256 - dropout_ratio: 0.1 - ham_channels: 256 - optimizer: class_path: torch.optim.AdamW init_args: From 207a0cc7d837730cb3f11e7c1e9fa27afdf1d3d6 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Tue, 9 Jul 2024 16:50:12 +0900 Subject: [PATCH 17/33] pre-commit --- src/otx/recipe/_base_/model/instance_segmentation.yaml | 2 +- src/otx/recipe/_base_/model/rotated_detection.yaml | 2 +- src/otx/recipe/_base_/model/visual_prompting.yaml | 2 +- src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/otx/recipe/_base_/model/instance_segmentation.yaml b/src/otx/recipe/_base_/model/instance_segmentation.yaml index b27e76b5db0..ae2ba57752a 100644 --- a/src/otx/recipe/_base_/model/instance_segmentation.yaml +++ b/src/otx/recipe/_base_/model/instance_segmentation.yaml @@ -11,4 +11,4 @@ init_args: mode: max factor: 0.1 patience: 4 - monitor: val/map_50 \ No newline at end of file + monitor: val/map_50 diff --git a/src/otx/recipe/_base_/model/rotated_detection.yaml b/src/otx/recipe/_base_/model/rotated_detection.yaml index 1e0f38f44da..38a177d42ef 100644 --- a/src/otx/recipe/_base_/model/rotated_detection.yaml +++ b/src/otx/recipe/_base_/model/rotated_detection.yaml @@ -11,4 +11,4 @@ init_args: mode: max factor: 0.1 patience: 9 - monitor: val/map_50 \ No newline at end of file + monitor: val/map_50 diff --git a/src/otx/recipe/_base_/model/visual_prompting.yaml b/src/otx/recipe/_base_/model/visual_prompting.yaml index 3aa8bd70c12..65224ea572c 100644 --- a/src/otx/recipe/_base_/model/visual_prompting.yaml +++ b/src/otx/recipe/_base_/model/visual_prompting.yaml @@ -19,4 +19,4 @@ init_args: class_path: torch.optim.lr_scheduler.ConstantLR init_args: factor: 1 - total_iters: -1 \ No newline at end of file + total_iters: -1 diff --git a/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml index 532de6e04df..49d9ca22dfa 100644 --- a/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml +++ b/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml @@ -14,4 +14,4 @@ init_args: use_stability_score: false return_single_mask: false return_extra_metrics: false - stability_score_offset: 1. \ No newline at end of file + stability_score_offset: 1. From 61572172120c85433a28c2f67bf310df20f42da9 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 09:30:04 +0900 Subject: [PATCH 18/33] Revert `to_tv_image` to True --- src/otx/recipe/_base_/data/instance_segmentation.yaml | 6 +++--- src/otx/recipe/instance_segmentation/openvino_model.yaml | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml index 7227f8a2f54..91d6cebf185 100644 --- a/src/otx/recipe/_base_/data/instance_segmentation.yaml +++ b/src/otx/recipe/_base_/data/instance_segmentation.yaml @@ -9,7 +9,7 @@ unannotated_items_ratio: 0.0 train_subset: subset_name: train transform_lib_type: TORCHVISION - to_tv_image: false + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 @@ -19,7 +19,7 @@ train_subset: val_subset: subset_name: val transform_lib_type: TORCHVISION - to_tv_image: false + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 @@ -29,7 +29,7 @@ val_subset: test_subset: subset_name: test transform_lib_type: TORCHVISION - to_tv_image: false + to_tv_image: true transforms: - class_path: torchvision.transforms.v2.ToImage batch_size: 1 diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index e7115725a27..8ed1bcbbe92 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -19,5 +19,4 @@ overrides: data: stack_images: false test_subset: - to_tv_image: true batch_size: 64 From 2ea6ee879ae4cc0387cf842f85e6e36fbc77f64c Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 10:48:09 +0900 Subject: [PATCH 19/33] Revert maskrcnn_r50_tv due to overflow issue --- .../maskrcnn_r50_tv.yaml | 56 +++++++++++++----- .../maskrcnn_r50_tv_tile.yaml | 59 ++++++++++++++----- 2 files changed, 87 insertions(+), 28 deletions(-) diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index 086fff7da0e..21f10282e1d 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -1,23 +1,42 @@ -engine: ../_base_/engine/instance_segmentation.yaml -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 + +engine: + task: INSTANCE_SEGMENTATION + device: auto callback_monitor: val/map_50 +data: ../_base_/data/torchvision_base.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: + task: INSTANCE_SEGMENTATION + stack_images: true + data_format: coco_instances + include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -41,6 +60,7 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -52,16 +72,20 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true + transform_bbox: false + transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true + transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -73,17 +97,21 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true + transform_bbox: false + transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true + transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + std: [58.395, 57.12, 57.375] \ No newline at end of file diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index f934375ba89..3fb32288fb6 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -1,32 +1,52 @@ -engine: ../_base_/engine/instance_segmentation.yaml -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 + +engine: + task: INSTANCE_SEGMENTATION + device: auto callback_monitor: val/map_50 +data: ../_base_/data/torchvision_base.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: + task: INSTANCE_SEGMENTATION + stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true + data_format: coco_instances + include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: + keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -43,6 +63,7 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -53,16 +74,21 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: + keep_ratio: false + transform_bbox: false + transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 + transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -73,17 +99,22 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: + keep_ratio: false + transform_bbox: false + transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 + transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} + scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] + std: [58.395, 57.12, 57.375] \ No newline at end of file From 9428ce88b3c2937e969fefaea1d1634ad419e183 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 11:00:31 +0900 Subject: [PATCH 20/33] pre-commit --- src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml | 2 +- src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index 21f10282e1d..c5c1e7d707a 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -114,4 +114,4 @@ overrides: - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] \ No newline at end of file + std: [58.395, 57.12, 57.375] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index 3fb32288fb6..6ef16d7df8e 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -117,4 +117,4 @@ overrides: - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] - std: [58.395, 57.12, 57.375] \ No newline at end of file + std: [58.395, 57.12, 57.375] From 71b3cab81e8819cc72dad854121143553b0dc408 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 11:20:24 +0900 Subject: [PATCH 21/33] Revert "Revert maskrcnn_r50_tv due to overflow issue" This reverts commit 2ea6ee879ae4cc0387cf842f85e6e36fbc77f64c. --- .../maskrcnn_r50_tv.yaml | 54 +++++------------- .../maskrcnn_r50_tv_tile.yaml | 57 +++++-------------- 2 files changed, 26 insertions(+), 85 deletions(-) diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index c5c1e7d707a..086fff7da0e 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -1,42 +1,23 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: - task: INSTANCE_SEGMENTATION - stack_images: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 @@ -60,7 +41,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -72,20 +52,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -97,20 +73,16 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: keep_ratio: true - transform_bbox: false - transform_mask: false scale: - 1024 - 1024 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: pad_to_square: true - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index 6ef16d7df8e..f934375ba89 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -1,52 +1,32 @@ -model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - label_info: 80 - - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 - -engine: - task: INSTANCE_SEGMENTATION - device: auto +engine: ../_base_/engine/instance_segmentation.yaml +model: ../_base_/model/instance_segmentation.yaml +data: ../_base_/data/instance_segmentation.yaml callback_monitor: val/map_50 -data: ../_base_/data/torchvision_base.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 + model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + data: - task: INSTANCE_SEGMENTATION - stack_images: true tile_config: enable_tiler: true enable_adaptive_tiling: true - data_format: coco_instances - include_polygons: true train_subset: batch_size: 4 num_workers: 8 transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false transform_bbox: true transform_mask: true scale: @@ -63,7 +43,6 @@ overrides: - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -74,21 +53,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] @@ -99,21 +73,16 @@ overrides: transforms: - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - keep_ratio: false - transform_bbox: false - transform_mask: false scale: - 512 - 512 - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: size_divisor: 32 - transform_mask: false is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} - scale: False - class_path: torchvision.transforms.v2.Normalize init_args: mean: [123.675, 116.28, 103.53] From e5104b023a6fd431c671267c1643b544aa0ce213 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 14:23:44 +0900 Subject: [PATCH 22/33] Revert engine --- src/otx/recipe/_base_/engine/detection.yaml | 2 -- .../recipe/_base_/engine/instance_segmentation.yaml | 2 -- src/otx/recipe/_base_/engine/rotated_detection.yaml | 2 -- .../recipe/_base_/engine/semantic_segmentation.yaml | 2 -- src/otx/recipe/_base_/engine/visual_prompting.yaml | 2 -- .../_base_/engine/zero_shot_visual_prompting.yaml | 2 -- src/otx/recipe/detection/atss_mobilenetv2.yaml | 5 ++++- src/otx/recipe/detection/atss_mobilenetv2_tile.yaml | 5 ++++- src/otx/recipe/detection/atss_resnext101.yaml | 5 ++++- src/otx/recipe/detection/openvino_model.yaml | 8 ++++---- src/otx/recipe/detection/rtmdet_tiny.yaml | 5 ++++- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 5 ++++- src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml | 5 ++++- src/otx/recipe/detection/yolox_l.yaml | 5 ++++- src/otx/recipe/detection/yolox_l_tile.yaml | 5 ++++- src/otx/recipe/detection/yolox_s.yaml | 5 ++++- src/otx/recipe/detection/yolox_s_tile.yaml | 5 ++++- src/otx/recipe/detection/yolox_tiny.yaml | 5 ++++- src/otx/recipe/detection/yolox_tiny_tile.yaml | 5 ++++- src/otx/recipe/detection/yolox_x.yaml | 5 ++++- src/otx/recipe/detection/yolox_x_tile.yaml | 5 ++++- .../maskrcnn_efficientnetb2b.yaml | 5 ++++- .../maskrcnn_efficientnetb2b_tile.yaml | 5 ++++- .../recipe/instance_segmentation/maskrcnn_r50.yaml | 5 ++++- .../instance_segmentation/maskrcnn_r50_tile.yaml | 5 ++++- .../recipe/instance_segmentation/maskrcnn_r50_tv.yaml | 5 ++++- .../instance_segmentation/maskrcnn_r50_tv_tile.yaml | 5 ++++- .../recipe/instance_segmentation/maskrcnn_swint.yaml | 5 ++++- .../instance_segmentation/maskrcnn_swint_tile.yaml | 5 ++++- .../recipe/instance_segmentation/openvino_model.yaml | 8 ++++---- .../instance_segmentation/rtmdet_inst_tiny.yaml | 5 ++++- .../instance_segmentation/rtmdet_inst_tiny_tile.yaml | 5 ++++- .../rotated_detection/maskrcnn_efficientnetb2b.yaml | 5 ++++- src/otx/recipe/rotated_detection/maskrcnn_r50.yaml | 5 ++++- src/otx/recipe/rotated_detection/openvino_model.yaml | 8 ++++---- src/otx/recipe/semantic_segmentation/dino_v2.yaml | 8 ++++++-- .../recipe/semantic_segmentation/litehrnet_18.yaml | 5 ++++- src/otx/recipe/semantic_segmentation/litehrnet_s.yaml | 5 ++++- src/otx/recipe/semantic_segmentation/litehrnet_x.yaml | 5 ++++- .../recipe/semantic_segmentation/openvino_model.yaml | 8 ++++---- src/otx/recipe/semantic_segmentation/segnext_b.yaml | 5 ++++- src/otx/recipe/semantic_segmentation/segnext_s.yaml | 5 ++++- src/otx/recipe/semantic_segmentation/segnext_t.yaml | 5 ++++- src/otx/recipe/visual_prompting/openvino_model.yaml | 8 ++++---- src/otx/recipe/visual_prompting/sam_tiny_vit.yaml | 5 ++++- src/otx/recipe/visual_prompting/sam_vit_b.yaml | 5 ++++- .../zero_shot_visual_prompting/openvino_model.yaml | 11 ++++++----- .../zero_shot_visual_prompting/sam_tiny_vit.yaml | 5 ++++- .../recipe/zero_shot_visual_prompting/sam_vit_b.yaml | 5 ++++- 49 files changed, 176 insertions(+), 75 deletions(-) delete mode 100644 src/otx/recipe/_base_/engine/detection.yaml delete mode 100644 src/otx/recipe/_base_/engine/instance_segmentation.yaml delete mode 100644 src/otx/recipe/_base_/engine/rotated_detection.yaml delete mode 100644 src/otx/recipe/_base_/engine/semantic_segmentation.yaml delete mode 100644 src/otx/recipe/_base_/engine/visual_prompting.yaml delete mode 100644 src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml diff --git a/src/otx/recipe/_base_/engine/detection.yaml b/src/otx/recipe/_base_/engine/detection.yaml deleted file mode 100644 index c1a665241ad..00000000000 --- a/src/otx/recipe/_base_/engine/detection.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: DETECTION -device: auto diff --git a/src/otx/recipe/_base_/engine/instance_segmentation.yaml b/src/otx/recipe/_base_/engine/instance_segmentation.yaml deleted file mode 100644 index 83d0d12356b..00000000000 --- a/src/otx/recipe/_base_/engine/instance_segmentation.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: INSTANCE_SEGMENTATION -device: auto diff --git a/src/otx/recipe/_base_/engine/rotated_detection.yaml b/src/otx/recipe/_base_/engine/rotated_detection.yaml deleted file mode 100644 index 9712b3795a8..00000000000 --- a/src/otx/recipe/_base_/engine/rotated_detection.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: ROTATED_DETECTION -device: auto diff --git a/src/otx/recipe/_base_/engine/semantic_segmentation.yaml b/src/otx/recipe/_base_/engine/semantic_segmentation.yaml deleted file mode 100644 index 13918779023..00000000000 --- a/src/otx/recipe/_base_/engine/semantic_segmentation.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: SEMANTIC_SEGMENTATION -device: auto diff --git a/src/otx/recipe/_base_/engine/visual_prompting.yaml b/src/otx/recipe/_base_/engine/visual_prompting.yaml deleted file mode 100644 index a04edad33f6..00000000000 --- a/src/otx/recipe/_base_/engine/visual_prompting.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: VISUAL_PROMPTING -device: auto diff --git a/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml deleted file mode 100644 index 194ab50f67b..00000000000 --- a/src/otx/recipe/_base_/engine/zero_shot_visual_prompting.yaml +++ /dev/null @@ -1,2 +0,0 @@ -task: ZERO_SHOT_VISUAL_PROMPTING -device: auto diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index acd63d2886e..0fb8492df27 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index e9d0ef2fd75..e9ae072c83d 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 8a2d62a4836..40f049b61fa 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index 4feb4e4c4bc..c498cdfd148 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -7,15 +7,15 @@ model: async_inference: true use_throughput_mode: true -engine: ../_base_/engine/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: cpu + callback_monitor: val/map_50 overrides: - engine: - device: cpu - data: stack_images: false test_subset: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index ed5ad069e63..4c3f1dc58f5 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index ebe42cc526a..331807851c3 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 735755352c8..9b2abc48b0a 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 644188c20b5..b4d2367c725 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index bd4342f7aea..7c377dadfa3 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index b742b7f325f..e3d90fa8082 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index a49dca23182..155b08d4bea 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 3072f423f46..428bb621dce 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 940b9b6a9dc..d0e49b803cc 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index fc1a20426b0..3831a03a102 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index ae9e6843ad0..cb59e364e69 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/detection.yaml model: ../_base_/model/detection.yaml data: ../_base_/data/detection.yaml +engine: + task: DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index 716df2b11da..9725702a3b2 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index f69420a17b5..128c37e86d1 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index 82569e633b0..6b5bf0c715a 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index b5a5f9ff15c..4a8eca10e00 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index 086fff7da0e..7d9b86c8146 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index f934375ba89..4bc2b183ce3 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 727899e2cf2..1b333368f23 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index 91304658984..ae4e7c3be5e 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index 8ed1bcbbe92..bd03ffc4cc9 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -7,15 +7,15 @@ model: async_inference: true use_throughput_mode: true -engine: ../_base_/engine/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: cpu + callback_monitor: val/map_50 overrides: - engine: - device: cpu - data: stack_images: false test_subset: diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml index 33b2d9da6aa..2909e909a42 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml index ab0448b9cbf..64377d19f9f 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/instance_segmentation.yaml model: ../_base_/model/instance_segmentation.yaml data: ../_base_/data/instance_segmentation.yaml +engine: + task: INSTANCE_SEGMENTATION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index 9a5d9cdec6d..dc2ab9ac39b 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/rotated_detection.yaml model: ../_base_/model/rotated_detection.yaml data: ../_base_/data/rotated_detection.yaml +engine: + task: ROTATED_DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index fc64753527b..fed2fc8e1d4 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/rotated_detection.yaml model: ../_base_/model/rotated_detection.yaml data: ../_base_/data/rotated_detection.yaml +engine: + task: ROTATED_DETECTION + device: auto + callback_monitor: val/map_50 overrides: diff --git a/src/otx/recipe/rotated_detection/openvino_model.yaml b/src/otx/recipe/rotated_detection/openvino_model.yaml index b11a4d1e285..1e4765b3eee 100644 --- a/src/otx/recipe/rotated_detection/openvino_model.yaml +++ b/src/otx/recipe/rotated_detection/openvino_model.yaml @@ -7,15 +7,15 @@ model: async_inference: true use_throughput_mode: true -engine: ../_base_/engine/rotated_detection.yaml data: ../_base_/data/rotated_detection.yaml +engine: + task: ROTATED_DETECTION + device: cpu + callback_monitor: val/map_50 overrides: - engine: - device: cpu - data: stack_images: false test_subset: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 11f230b5384..258c1f8ffd7 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -63,10 +63,14 @@ model: power: 0.9 last_epoch: -1 +data: ../_base_/data/semantic_segmentation.yaml + +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice -engine: ../_base_/engine/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml overrides: data: train_subset: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index 890a19dae8c..64af9c97cf1 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index fb9f1fe5ab6..a9c9ebfc639 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 116bce16343..5db6ede3f30 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index 486caa05053..aa64cb69de4 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -7,15 +7,15 @@ model: async_inference: true use_throughput_mode: true -engine: ../_base_/engine/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: cpu + callback_monitor: val/Dice overrides: - engine: - device: cpu - data: stack_images: false train_subset: diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index 7214637f540..bd23f125de8 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index 49064d05016..43bf46f02f5 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 5d8e830efac..9733b82a2da 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/semantic_segmentation.yaml model: ../_base_/model/semantic_segmentation.yaml data: ../_base_/data/semantic_segmentation.yaml +engine: + task: SEMANTIC_SEGMENTATION + device: auto + callback_monitor: val/Dice overrides: diff --git a/src/otx/recipe/visual_prompting/openvino_model.yaml b/src/otx/recipe/visual_prompting/openvino_model.yaml index 671aa120e53..e2854b39b25 100644 --- a/src/otx/recipe/visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/visual_prompting/openvino_model.yaml @@ -7,15 +7,15 @@ model: async_inference: false use_throughput_mode: false -engine: ../_base_/engine/visual_prompting.yaml data: ../_base_/data/visual_prompting.yaml +engine: + task: VISUAL_PROMPTING + device: cpu + callback_monitor: val/Dice overrides: - engine: - device: cpu - data: train_subset: batch_size: 1 diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 3562ca85446..0769857462f 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/visual_prompting.yaml model: ../_base_/model/visual_prompting.yaml data: ../_base_/data/visual_prompting.yaml +engine: + task: VISUAL_PROMPTING + device: auto + callback_monitor: val/f1-score overrides: diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index 14301e60143..1674e7d3216 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/visual_prompting.yaml model: ../_base_/model/visual_prompting.yaml data: ../_base_/data/visual_prompting.yaml +engine: + task: VISUAL_PROMPTING + device: auto + callback_monitor: val/f1-score overrides: diff --git a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml index 78adbb290cd..1124668d6d1 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml @@ -10,16 +10,17 @@ model: infer_reference_info_root: ../.latest/train save_outputs: True +data: ../_base_/data/zero_shot_visual_prompting.yaml + +engine: + task: ZERO_SHOT_VISUAL_PROMPTING + device: cpu + callback_monitor: step -engine: ../_base_/engine/zero_shot_visual_prompting.yaml -data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 - engine: - device: cpu - data: train_subset: num_workers: 0 # TODO (sungchul): CVS-135462 diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml index afd4b7f7a97..262ccb5ea74 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/zero_shot_visual_prompting.yaml model: ../_base_/model/zero_shot_visual_prompting.yaml data: ../_base_/data/zero_shot_visual_prompting.yaml +engine: + task: ZERO_SHOT_VISUAL_PROMPTING + device: auto + callback_monitor: step overrides: diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml index 928fb81bac6..f81b95864f2 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml @@ -1,7 +1,10 @@ -engine: ../_base_/engine/zero_shot_visual_prompting.yaml model: ../_base_/model/zero_shot_visual_prompting.yaml data: ../_base_/data/zero_shot_visual_prompting.yaml +engine: + task: ZERO_SHOT_VISUAL_PROMPTING + device: auto + callback_monitor: step overrides: From 6135260ea6ebfcb5f1f32c6aa1e4389671d5bd2e Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 14:57:05 +0900 Subject: [PATCH 23/33] Revert model and update dino_v2_seg --- src/otx/algo/segmentation/dino_v2_seg.py | 2 +- src/otx/recipe/_base_/model/detection.yaml | 14 ----- .../_base_/model/instance_segmentation.yaml | 14 ----- .../_base_/model/rotated_detection.yaml | 14 ----- .../_base_/model/semantic_segmentation.yaml | 7 --- .../recipe/_base_/model/visual_prompting.yaml | 22 ------- .../model/zero_shot_visual_prompting.yaml | 17 ----- .../recipe/detection/atss_mobilenetv2.yaml | 36 +++++++---- .../detection/atss_mobilenetv2_tile.yaml | 36 +++++++---- src/otx/recipe/detection/atss_resnext101.yaml | 36 +++++++---- src/otx/recipe/detection/openvino_model.yaml | 3 +- src/otx/recipe/detection/rtmdet_tiny.yaml | 34 ++++++---- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 36 +++++++---- .../detection/ssd_mobilenetv2_tile.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_l.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_l_tile.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_s.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_s_tile.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_tiny.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_tiny_tile.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_x.yaml | 36 +++++++---- src/otx/recipe/detection/yolox_x_tile.yaml | 36 +++++++---- .../maskrcnn_efficientnetb2b.yaml | 36 +++++++---- .../maskrcnn_efficientnetb2b_tile.yaml | 36 +++++++---- .../instance_segmentation/maskrcnn_r50.yaml | 36 +++++++---- .../maskrcnn_r50_tile.yaml | 36 +++++++---- .../maskrcnn_r50_tv.yaml | 36 +++++++---- .../maskrcnn_r50_tv_tile.yaml | 36 +++++++---- .../instance_segmentation/maskrcnn_swint.yaml | 34 ++++++---- .../maskrcnn_swint_tile.yaml | 34 ++++++---- .../instance_segmentation/openvino_model.yaml | 3 +- .../rtmdet_inst_tiny.yaml | 45 +++++++------ .../rtmdet_inst_tiny_tile.yaml | 45 +++++++------ .../maskrcnn_efficientnetb2b.yaml | 36 +++++++---- .../rotated_detection/maskrcnn_r50.yaml | 36 +++++++---- .../rotated_detection/openvino_model.yaml | 3 +- .../recipe/semantic_segmentation/dino_v2.yaml | 35 +---------- .../semantic_segmentation/litehrnet_18.yaml | 63 ++++++++++--------- .../semantic_segmentation/litehrnet_s.yaml | 63 ++++++++++--------- .../semantic_segmentation/litehrnet_x.yaml | 63 ++++++++++--------- .../semantic_segmentation/openvino_model.yaml | 3 +- .../semantic_segmentation/segnext_b.yaml | 62 +++++++++--------- .../semantic_segmentation/segnext_s.yaml | 62 +++++++++--------- .../semantic_segmentation/segnext_t.yaml | 62 +++++++++--------- .../visual_prompting/openvino_model.yaml | 3 +- .../recipe/visual_prompting/sam_tiny_vit.yaml | 31 +++++++-- .../recipe/visual_prompting/sam_vit_b.yaml | 31 +++++++-- .../openvino_model.yaml | 9 ++- .../sam_tiny_vit.yaml | 24 +++++-- .../zero_shot_visual_prompting/sam_vit_b.yaml | 24 +++++-- 50 files changed, 911 insertions(+), 671 deletions(-) delete mode 100644 src/otx/recipe/_base_/model/detection.yaml delete mode 100644 src/otx/recipe/_base_/model/instance_segmentation.yaml delete mode 100644 src/otx/recipe/_base_/model/rotated_detection.yaml delete mode 100644 src/otx/recipe/_base_/model/semantic_segmentation.yaml delete mode 100644 src/otx/recipe/_base_/model/visual_prompting.yaml delete mode 100644 src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml diff --git a/src/otx/algo/segmentation/dino_v2_seg.py b/src/otx/algo/segmentation/dino_v2_seg.py index b0722643b31..d38001ada88 100644 --- a/src/otx/algo/segmentation/dino_v2_seg.py +++ b/src/otx/algo/segmentation/dino_v2_seg.py @@ -22,7 +22,7 @@ class DinoV2Seg(BaseSegmModel): """DinoV2Seg Model.""" default_backbone_configuration: ClassVar[dict[str, Any]] = { - "name": "dinov2_vits14_reg", + "name": "dinov2_vits14", "freeze_backbone": True, "out_index": [8, 9, 10, 11], } diff --git a/src/otx/recipe/_base_/model/detection.yaml b/src/otx/recipe/_base_/model/detection.yaml deleted file mode 100644 index e5e831fe673..00000000000 --- a/src/otx/recipe/_base_/model/detection.yaml +++ /dev/null @@ -1,14 +0,0 @@ -init_args: - label_info: 80 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 3 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 diff --git a/src/otx/recipe/_base_/model/instance_segmentation.yaml b/src/otx/recipe/_base_/model/instance_segmentation.yaml deleted file mode 100644 index ae2ba57752a..00000000000 --- a/src/otx/recipe/_base_/model/instance_segmentation.yaml +++ /dev/null @@ -1,14 +0,0 @@ -init_args: - label_info: 80 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/map_50 diff --git a/src/otx/recipe/_base_/model/rotated_detection.yaml b/src/otx/recipe/_base_/model/rotated_detection.yaml deleted file mode 100644 index 38a177d42ef..00000000000 --- a/src/otx/recipe/_base_/model/rotated_detection.yaml +++ /dev/null @@ -1,14 +0,0 @@ -init_args: - label_info: 80 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 9 - monitor: val/map_50 diff --git a/src/otx/recipe/_base_/model/semantic_segmentation.yaml b/src/otx/recipe/_base_/model/semantic_segmentation.yaml deleted file mode 100644 index fc401bbc789..00000000000 --- a/src/otx/recipe/_base_/model/semantic_segmentation.yaml +++ /dev/null @@ -1,7 +0,0 @@ -init_args: - label_info: 2 - - scheduler: - class_path: otx.core.schedulers.LinearWarmupSchedulerCallable - init_args: - num_warmup_steps: 100 diff --git a/src/otx/recipe/_base_/model/visual_prompting.yaml b/src/otx/recipe/_base_/model/visual_prompting.yaml deleted file mode 100644 index 65224ea572c..00000000000 --- a/src/otx/recipe/_base_/model/visual_prompting.yaml +++ /dev/null @@ -1,22 +0,0 @@ -class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything -init_args: - label_info: 0 - freeze_image_encoder: true - freeze_prompt_encoder: true - freeze_mask_decoder: false - # options - use_stability_score: false - return_single_mask: true - return_extra_metrics: false - stability_score_offset: 1. - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.00001 - - scheduler: - class_path: torch.optim.lr_scheduler.ConstantLR - init_args: - factor: 1 - total_iters: -1 diff --git a/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml b/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml deleted file mode 100644 index 49d9ca22dfa..00000000000 --- a/src/otx/recipe/_base_/model/zero_shot_visual_prompting.yaml +++ /dev/null @@ -1,17 +0,0 @@ -class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything -init_args: - backbone: tiny_vit - label_info: 0 - freeze_image_encoder: true - freeze_prompt_encoder: true - freeze_mask_decoder: true - default_threshold_reference: 0.3 - default_threshold_target: 0.65 - save_outputs: true - reference_info_dir: reference_infos - infer_reference_info_root: ../.latest/train - # options - use_stability_score: false - return_single_mask: false - return_extra_metrics: false - stability_score_offset: 1. diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index 0fb8492df27..b682ff47385 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.atss.MobileNetV2ATSS + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.atss.MobileNetV2ATSS - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index e9ae072c83d..f1400a086b0 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.atss.MobileNetV2ATSS + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.atss.MobileNetV2ATSS - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 40f049b61fa..2fefe3111e2 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.atss.ResNeXt101ATSS + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.004 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.atss.ResNeXt101ATSS - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.004 - momentum: 0.9 - weight_decay: 0.0001 - callbacks: - class_path: otx.algo.callbacks.adaptive_train_scheduling.AdaptiveTrainScheduling init_args: diff --git a/src/otx/recipe/detection/openvino_model.yaml b/src/otx/recipe/detection/openvino_model.yaml index c498cdfd148..dbfd5d63826 100644 --- a/src/otx/recipe/detection/openvino_model.yaml +++ b/src/otx/recipe/detection/openvino_model.yaml @@ -7,14 +7,13 @@ model: async_inference: true use_throughput_mode: true -data: ../_base_/data/detection.yaml - engine: task: DETECTION device: cpu callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: data: stack_images: false diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 4c3f1dc58f5..8f73712f0c9 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -1,5 +1,25 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.rtmdet.RTMDetTiny + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0007 + weight_decay: 0.05 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,17 +27,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.rtmdet.RTMDetTiny - init_args: - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0007 - weight_decay: 0.05 - data: image_color_channel: BGR train_subset: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index 331807851c3..a0278adae5e 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.ssd.SSD + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.01 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.ssd.SSD - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.01 - momentum: 0.9 - weight_decay: 0.0001 - data: train_subset: batch_size: 8 diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index 9b2abc48b0a..b6580970f20 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.ssd.SSD + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.01 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.ssd.SSD - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.01 - momentum: 0.9 - weight_decay: 0.0001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index b4d2367c725..3e76975edb2 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXL + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXL - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR train_subset: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index 7c377dadfa3..fe80768ec5a 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXL + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXL - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR tile_config: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index e3d90fa8082..bfca1779313 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXS + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXS - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR train_subset: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index 155b08d4bea..c03953d0f0a 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXS + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXS - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR tile_config: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 428bb621dce..4362e9c38de 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXTINY + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.0002 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXTINY - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.0002 - momentum: 0.9 - weight_decay: 0.0001 - data: train_subset: batch_size: 8 diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index d0e49b803cc..0b60238a116 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXTINY + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.0002 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXTINY - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.0002 - momentum: 0.9 - weight_decay: 0.0001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index 3831a03a102..ecfb300e326 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXX + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXX - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR train_subset: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index cb59e364e69..9758f771a83 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/detection.yaml -data: ../_base_/data/detection.yaml +model: + class_path: otx.algo.detection.yolox.YOLOXX + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 3 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/detection.yaml overrides: gradient_clip_val: 35.0 - model: - class_path: otx.algo.detection.yolox.YOLOXX - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - data: image_color_channel: BGR tile_config: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index 9725702a3b2..6fbee8c7202 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index 128c37e86d1..64f11a4fc7c 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index 6b5bf0c715a..c493f81b9a2 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,19 +28,10 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index 4a8eca10e00..c2961fec183 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,19 +28,10 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index 7d9b86c8146..7283c4df30d 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,19 +28,10 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index 4bc2b183ce3..a1afa0ac64c 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,19 +28,10 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 1b333368f23..6d3cc888ab2 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -1,5 +1,25 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0001 + weight_decay: 0.05 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,17 +27,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT - init_args: - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0001 - weight_decay: 0.05 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index ae4e7c3be5e..51c10961eaa 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -1,5 +1,25 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.0001 + weight_decay: 0.05 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/map_50 engine: task: INSTANCE_SEGMENTATION @@ -7,17 +27,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT - init_args: - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.0001 - weight_decay: 0.05 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/instance_segmentation/openvino_model.yaml b/src/otx/recipe/instance_segmentation/openvino_model.yaml index bd03ffc4cc9..b1d0d719704 100644 --- a/src/otx/recipe/instance_segmentation/openvino_model.yaml +++ b/src/otx/recipe/instance_segmentation/openvino_model.yaml @@ -7,14 +7,13 @@ model: async_inference: true use_throughput_mode: true -data: ../_base_/data/instance_segmentation.yaml - engine: task: INSTANCE_SEGMENTATION device: cpu callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: data: stack_images: false diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml index 2909e909a42..748bee485c4 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny.yaml @@ -1,5 +1,27 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 + min_lr: 4e-06 engine: task: INSTANCE_SEGMENTATION @@ -7,28 +29,11 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - init_args: - patience: 9 - min_lr: 4e-06 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml index 64377d19f9f..ba2f6825f0f 100644 --- a/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml +++ b/src/otx/recipe/instance_segmentation/rtmdet_inst_tiny_tile.yaml @@ -1,5 +1,27 @@ -model: ../_base_/model/instance_segmentation.yaml -data: ../_base_/data/instance_segmentation.yaml +model: + class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.001 + momentum: 0.9 + weight_decay: 0.0001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 + min_lr: 4e-06 engine: task: INSTANCE_SEGMENTATION @@ -7,28 +29,11 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/instance_segmentation.yaml overrides: precision: 16 max_epochs: 100 gradient_clip_val: 35.0 - model: - class_path: otx.algo.instance_segmentation.rtmdet_inst.RTMDetInstTiny - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.001 - momentum: 0.9 - weight_decay: 0.0001 - - scheduler: - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - init_args: - patience: 9 - min_lr: 4e-06 - data: tile_config: enable_tiler: true diff --git a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml index dc2ab9ac39b..e0e82f207c9 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_efficientnetb2b.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/rotated_detection.yaml -data: ../_base_/data/rotated_detection.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 engine: task: ROTATED_DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml index fed2fc8e1d4..1ea5d4b1e3b 100644 --- a/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml +++ b/src/otx/recipe/rotated_detection/maskrcnn_r50.yaml @@ -1,5 +1,26 @@ -model: ../_base_/model/rotated_detection.yaml -data: ../_base_/data/rotated_detection.yaml +model: + class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 + init_args: + label_info: 80 + + optimizer: + class_path: torch.optim.SGD + init_args: + lr: 0.007 + momentum: 0.9 + weight_decay: 0.001 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 9 + monitor: val/map_50 engine: task: ROTATED_DETECTION @@ -7,18 +28,9 @@ engine: callback_monitor: val/map_50 +data: ../_base_/data/rotated_detection.yaml overrides: max_epochs: 100 - model: - class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 - init_args: - optimizer: - class_path: torch.optim.SGD - init_args: - lr: 0.007 - momentum: 0.9 - weight_decay: 0.001 - data: train_subset: batch_size: 4 diff --git a/src/otx/recipe/rotated_detection/openvino_model.yaml b/src/otx/recipe/rotated_detection/openvino_model.yaml index 1e4765b3eee..927763df66d 100644 --- a/src/otx/recipe/rotated_detection/openvino_model.yaml +++ b/src/otx/recipe/rotated_detection/openvino_model.yaml @@ -7,14 +7,13 @@ model: async_inference: true use_throughput_mode: true -data: ../_base_/data/rotated_detection.yaml - engine: task: ROTATED_DETECTION device: cpu callback_monitor: val/map_50 +data: ../_base_/data/rotated_detection.yaml overrides: data: stack_images: false diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 258c1f8ffd7..d9b630183c2 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -8,38 +8,6 @@ model: params: ignore_index: 255 - backbone_configuration: - name: dinov2_vits14 - freeze_backbone: true - out_index: - - 8 - - 9 - - 10 - - 11 - - decode_head_configuration: - norm_cfg: - type: SyncBN - requires_grad: true - in_channels: - - 384 - - 384 - - 384 - - 384 - in_index: - - 0 - - 1 - - 2 - - 3 - input_transform: resize_concat - channels: 1536 - kernel_size: 1 - num_convs: 1 - concat_input: false - dropout_ratio: -1 - align_corners: false - pretrained_weights: https://dl.fbaipublicfiles.com/dinov2/dinov2_vits14/dinov2_vits14_ade20k_linear_head.pth - optimizer: class_path: torch.optim.AdamW init_args: @@ -63,14 +31,13 @@ model: power: 0.9 last_epoch: -1 -data: ../_base_/data/semantic_segmentation.yaml - engine: task: SEMANTIC_SEGMENTATION device: auto callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: data: train_subset: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml index 64af9c97cf1..9156bd38a11 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_18.yaml @@ -1,5 +1,34 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + label_info: 2 + name_base_model: LiteHRNet18 + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +36,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - name_base_model: LiteHRNet18 - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - init_args: - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml index a9c9ebfc639..a62938480bd 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_s.yaml @@ -1,5 +1,34 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + label_info: 2 + name_base_model: LiteHRNetS + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +36,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - name_base_model: LiteHRNetS - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - init_args: - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml index 5db6ede3f30..100edf1d8b2 100644 --- a/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml +++ b/src/otx/recipe/semantic_segmentation/litehrnet_x.yaml @@ -1,5 +1,34 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet + init_args: + label_info: 2 + name_base_model: LiteHRNetX + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.001 + betas: + - 0.9 + - 0.999 + weight_decay: 0.0 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 100 + main_scheduler_callable: + class_path: lightning.pytorch.cli.ReduceLROnPlateau + init_args: + mode: max + factor: 0.1 + patience: 4 + monitor: val/Dice engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +36,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.litehrnet.OTXLiteHRNet - init_args: - name_base_model: LiteHRNetX - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.Adam - init_args: - lr: 0.001 - betas: - - 0.9 - - 0.999 - weight_decay: 0.0 - - scheduler: - init_args: - main_scheduler_callable: - class_path: lightning.pytorch.cli.ReduceLROnPlateau - init_args: - mode: max - factor: 0.1 - patience: 4 - monitor: val/Dice - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/semantic_segmentation/openvino_model.yaml b/src/otx/recipe/semantic_segmentation/openvino_model.yaml index aa64cb69de4..e4827322b82 100644 --- a/src/otx/recipe/semantic_segmentation/openvino_model.yaml +++ b/src/otx/recipe/semantic_segmentation/openvino_model.yaml @@ -7,14 +7,13 @@ model: async_inference: true use_throughput_mode: true -data: ../_base_/data/semantic_segmentation.yaml - engine: task: SEMANTIC_SEGMENTATION device: cpu callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: data: stack_images: false diff --git a/src/otx/recipe/semantic_segmentation/segnext_b.yaml b/src/otx/recipe/semantic_segmentation/segnext_b.yaml index bd23f125de8..62cced98c53 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_b.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_b.yaml @@ -1,5 +1,33 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + label_info: 2 + name_base_model: SegNextB + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +35,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - name_base_model: SegNextB - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 - - scheduler: - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/semantic_segmentation/segnext_s.yaml b/src/otx/recipe/semantic_segmentation/segnext_s.yaml index 43bf46f02f5..8686002822c 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_s.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_s.yaml @@ -1,5 +1,33 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + label_info: 2 + name_base_model: SegNextS + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +35,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - name_base_model: SegNextS - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 - - scheduler: - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/semantic_segmentation/segnext_t.yaml b/src/otx/recipe/semantic_segmentation/segnext_t.yaml index 9733b82a2da..621c827f334 100644 --- a/src/otx/recipe/semantic_segmentation/segnext_t.yaml +++ b/src/otx/recipe/semantic_segmentation/segnext_t.yaml @@ -1,5 +1,33 @@ -model: ../_base_/model/semantic_segmentation.yaml -data: ../_base_/data/semantic_segmentation.yaml +model: + class_path: otx.algo.segmentation.segnext.OTXSegNext + init_args: + label_info: 2 + name_base_model: SegNextT + + criterion_configuration: + - type: CrossEntropyLoss + params: + ignore_index: 255 + + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: 0.00006 + betas: + - 0.9 + - 0.999 + weight_decay: 0.01 + + scheduler: + class_path: otx.core.schedulers.LinearWarmupSchedulerCallable + init_args: + num_warmup_steps: 20 + main_scheduler_callable: + class_path: torch.optim.lr_scheduler.PolynomialLR + init_args: + total_iters: 100 + power: 0.9 + last_epoch: -1 engine: task: SEMANTIC_SEGMENTATION @@ -7,36 +35,8 @@ engine: callback_monitor: val/Dice +data: ../_base_/data/semantic_segmentation.yaml overrides: - model: - class_path: otx.algo.segmentation.segnext.OTXSegNext - init_args: - name_base_model: SegNextT - - criterion_configuration: - - type: CrossEntropyLoss - params: - ignore_index: 255 - - optimizer: - class_path: torch.optim.AdamW - init_args: - lr: 0.00006 - betas: - - 0.9 - - 0.999 - weight_decay: 0.01 - - scheduler: - init_args: - num_warmup_steps: 20 - main_scheduler_callable: - class_path: torch.optim.lr_scheduler.PolynomialLR - init_args: - total_iters: 100 - power: 0.9 - last_epoch: -1 - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/visual_prompting/openvino_model.yaml b/src/otx/recipe/visual_prompting/openvino_model.yaml index e2854b39b25..c76abd1e60f 100644 --- a/src/otx/recipe/visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/visual_prompting/openvino_model.yaml @@ -7,14 +7,13 @@ model: async_inference: false use_throughput_mode: false -data: ../_base_/data/visual_prompting.yaml - engine: task: VISUAL_PROMPTING device: cpu callback_monitor: val/Dice +data: ../_base_/data/visual_prompting.yaml overrides: data: train_subset: diff --git a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml index 0769857462f..b329cdd74b9 100644 --- a/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/visual_prompting/sam_tiny_vit.yaml @@ -1,5 +1,27 @@ -model: ../_base_/model/visual_prompting.yaml -data: ../_base_/data/visual_prompting.yaml +model: + class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything + init_args: + backbone: tiny_vit + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: false + # options + use_stability_score: false + return_single_mask: true + return_extra_metrics: false + stability_score_offset: 1. + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.00001 + + scheduler: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -7,12 +29,9 @@ engine: callback_monitor: val/f1-score +data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 - model: - init_args: - backbone: tiny_vit - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/visual_prompting/sam_vit_b.yaml b/src/otx/recipe/visual_prompting/sam_vit_b.yaml index 1674e7d3216..da58ad228d8 100644 --- a/src/otx/recipe/visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/visual_prompting/sam_vit_b.yaml @@ -1,5 +1,27 @@ -model: ../_base_/model/visual_prompting.yaml -data: ../_base_/data/visual_prompting.yaml +model: + class_path: otx.algo.visual_prompting.segment_anything.OTXSegmentAnything + init_args: + backbone: vit_b + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: false + # options + use_stability_score: false + return_single_mask: true + return_extra_metrics: false + stability_score_offset: 1. + + optimizer: + class_path: torch.optim.Adam + init_args: + lr: 0.00001 + + scheduler: + class_path: torch.optim.lr_scheduler.ConstantLR + init_args: + factor: 1 + total_iters: -1 engine: task: VISUAL_PROMPTING @@ -7,12 +29,9 @@ engine: callback_monitor: val/f1-score +data: ../_base_/data/visual_prompting.yaml overrides: max_epochs: 100 - model: - init_args: - backbone: vit_b - callbacks: - class_path: otx.algo.callbacks.adaptive_early_stopping.EarlyStoppingWithWarmup init_args: diff --git a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml index 1124668d6d1..e580f00996a 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/openvino_model.yaml @@ -4,13 +4,11 @@ model: label_info: 0 model_name: segment_anything model_type: Zero_Shot_Visual_Prompting - async_inference: False - use_throughput_mode: True + async_inference: false + use_throughput_mode: true reference_info_dir: reference_infos infer_reference_info_root: ../.latest/train - save_outputs: True - -data: ../_base_/data/zero_shot_visual_prompting.yaml + save_outputs: true engine: task: ZERO_SHOT_VISUAL_PROMPTING @@ -18,6 +16,7 @@ engine: callback_monitor: step +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml index 262ccb5ea74..09cf6d468db 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_tiny_vit.yaml @@ -1,5 +1,21 @@ -model: ../_base_/model/zero_shot_visual_prompting.yaml -data: ../_base_/data/zero_shot_visual_prompting.yaml +model: + class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything + init_args: + backbone: tiny_vit + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: true + default_threshold_reference: 0.3 + default_threshold_target: 0.65 + save_outputs: true + reference_info_dir: reference_infos + infer_reference_info_root: ../.latest/train + # options + use_stability_score: false + return_single_mask: false + return_extra_metrics: false + stability_score_offset: 1. engine: task: ZERO_SHOT_VISUAL_PROMPTING @@ -7,9 +23,7 @@ engine: callback_monitor: step +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 - model: - init_args: - backbone: tiny_vit diff --git a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml index f81b95864f2..9722e9f1a08 100644 --- a/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml +++ b/src/otx/recipe/zero_shot_visual_prompting/sam_vit_b.yaml @@ -1,5 +1,21 @@ -model: ../_base_/model/zero_shot_visual_prompting.yaml -data: ../_base_/data/zero_shot_visual_prompting.yaml +model: + class_path: otx.algo.visual_prompting.zero_shot_segment_anything.OTXZeroShotSegmentAnything + init_args: + backbone: vit_b + label_info: 0 + freeze_image_encoder: true + freeze_prompt_encoder: true + freeze_mask_decoder: true + default_threshold_reference: 0.3 + default_threshold_target: 0.65 + save_outputs: true + reference_info_dir: reference_infos + infer_reference_info_root: ../.latest/train + # options + use_stability_score: false + return_single_mask: false + return_extra_metrics: false + stability_score_offset: 1. engine: task: ZERO_SHOT_VISUAL_PROMPTING @@ -7,9 +23,7 @@ engine: callback_monitor: step +data: ../_base_/data/zero_shot_visual_prompting.yaml overrides: max_epochs: 1 limit_val_batches: 0 - model: - init_args: - backbone: vit_b From 7849186aa46a353d3294796c5dedca1dd200f531 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Wed, 10 Jul 2024 14:57:41 +0900 Subject: [PATCH 24/33] pre-commit --- src/otx/recipe/detection/atss_mobilenetv2.yaml | 2 +- src/otx/recipe/detection/atss_mobilenetv2_tile.yaml | 2 +- src/otx/recipe/detection/atss_resnext101.yaml | 2 +- src/otx/recipe/detection/rtmdet_tiny.yaml | 2 +- src/otx/recipe/detection/ssd_mobilenetv2.yaml | 2 +- src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml | 2 +- src/otx/recipe/detection/yolox_l.yaml | 2 +- src/otx/recipe/detection/yolox_l_tile.yaml | 2 +- src/otx/recipe/detection/yolox_s.yaml | 2 +- src/otx/recipe/detection/yolox_s_tile.yaml | 2 +- src/otx/recipe/detection/yolox_tiny.yaml | 2 +- src/otx/recipe/detection/yolox_tiny_tile.yaml | 2 +- src/otx/recipe/detection/yolox_x.yaml | 2 +- src/otx/recipe/detection/yolox_x_tile.yaml | 2 +- .../instance_segmentation/maskrcnn_efficientnetb2b.yaml | 2 +- .../instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml | 2 +- src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml | 2 +- src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml | 2 +- src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml | 2 +- .../recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml | 4 ++-- src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml | 2 +- src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml | 2 +- 22 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/otx/recipe/detection/atss_mobilenetv2.yaml b/src/otx/recipe/detection/atss_mobilenetv2.yaml index b682ff47385..a93a6a0e50e 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.atss.MobileNetV2ATSS init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml index f1400a086b0..012e68f7922 100644 --- a/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/atss_mobilenetv2_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.atss.MobileNetV2ATSS init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/atss_resnext101.yaml b/src/otx/recipe/detection/atss_resnext101.yaml index 2fefe3111e2..e58fe810047 100644 --- a/src/otx/recipe/detection/atss_resnext101.yaml +++ b/src/otx/recipe/detection/atss_resnext101.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.atss.ResNeXt101ATSS init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/rtmdet_tiny.yaml b/src/otx/recipe/detection/rtmdet_tiny.yaml index 8f73712f0c9..10a77fd5873 100644 --- a/src/otx/recipe/detection/rtmdet_tiny.yaml +++ b/src/otx/recipe/detection/rtmdet_tiny.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.rtmdet.RTMDetTiny init_args: label_info: 80 - + optimizer: class_path: torch.optim.AdamW init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2.yaml b/src/otx/recipe/detection/ssd_mobilenetv2.yaml index a0278adae5e..0c09769be73 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.ssd.SSD init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml index b6580970f20..455891ebbd4 100644 --- a/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml +++ b/src/otx/recipe/detection/ssd_mobilenetv2_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.ssd.SSD init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_l.yaml b/src/otx/recipe/detection/yolox_l.yaml index 3e76975edb2..9693a82841f 100644 --- a/src/otx/recipe/detection/yolox_l.yaml +++ b/src/otx/recipe/detection/yolox_l.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXL init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_l_tile.yaml b/src/otx/recipe/detection/yolox_l_tile.yaml index fe80768ec5a..35f345c25ca 100644 --- a/src/otx/recipe/detection/yolox_l_tile.yaml +++ b/src/otx/recipe/detection/yolox_l_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXL init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_s.yaml b/src/otx/recipe/detection/yolox_s.yaml index bfca1779313..e861931527c 100644 --- a/src/otx/recipe/detection/yolox_s.yaml +++ b/src/otx/recipe/detection/yolox_s.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXS init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_s_tile.yaml b/src/otx/recipe/detection/yolox_s_tile.yaml index c03953d0f0a..82400614ef8 100644 --- a/src/otx/recipe/detection/yolox_s_tile.yaml +++ b/src/otx/recipe/detection/yolox_s_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXS init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_tiny.yaml b/src/otx/recipe/detection/yolox_tiny.yaml index 4362e9c38de..655a3d5e08d 100644 --- a/src/otx/recipe/detection/yolox_tiny.yaml +++ b/src/otx/recipe/detection/yolox_tiny.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXTINY init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_tiny_tile.yaml b/src/otx/recipe/detection/yolox_tiny_tile.yaml index 0b60238a116..45fa55fe3da 100644 --- a/src/otx/recipe/detection/yolox_tiny_tile.yaml +++ b/src/otx/recipe/detection/yolox_tiny_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXTINY init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_x.yaml b/src/otx/recipe/detection/yolox_x.yaml index ecfb300e326..ef16e8314b1 100644 --- a/src/otx/recipe/detection/yolox_x.yaml +++ b/src/otx/recipe/detection/yolox_x.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXX init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/detection/yolox_x_tile.yaml b/src/otx/recipe/detection/yolox_x_tile.yaml index 9758f771a83..54a9d73909e 100644 --- a/src/otx/recipe/detection/yolox_x_tile.yaml +++ b/src/otx/recipe/detection/yolox_x_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.detection.yolox.YOLOXX init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml index 6fbee8c7202..d968f341375 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml index 64f11a4fc7c..e7983fc9b0a 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_efficientnetb2b_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNEfficientNet init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml index c493f81b9a2..89cccf852a0 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml index c2961fec183..a56ea360183 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNResNet50 init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml index 7283c4df30d..bbc617f8f32 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml index a1afa0ac64c..6cafc6c5e89 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_r50_tv_tile.yaml @@ -2,14 +2,14 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn_tv.TVMaskRCNNR50 init_args: label_info: 80 - + optimizer: class_path: torch.optim.SGD init_args: lr: 0.007 momentum: 0.9 weight_decay: 0.001 - + scheduler: class_path: otx.core.schedulers.LinearWarmupSchedulerCallable init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml index 6d3cc888ab2..64b2878cc02 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT init_args: label_info: 80 - + optimizer: class_path: torch.optim.AdamW init_args: diff --git a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml index 51c10961eaa..e5d4d3512d0 100644 --- a/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml +++ b/src/otx/recipe/instance_segmentation/maskrcnn_swint_tile.yaml @@ -2,7 +2,7 @@ model: class_path: otx.algo.instance_segmentation.maskrcnn.MaskRCNNSwinT init_args: label_info: 80 - + optimizer: class_path: torch.optim.AdamW init_args: From 0eab3c47d236a62dc4e75b96f5abd67b4897a011 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 11 Jul 2024 14:45:23 +0900 Subject: [PATCH 25/33] Update transforms for sseg and edit entity name of mask to `masks` for compatibility with other task (iseg) --- src/otx/core/data/dataset/segmentation.py | 7 +- src/otx/core/data/entity/segmentation.py | 6 +- src/otx/core/data/transform_libs/mmseg.py | 4 +- .../core/data/transform_libs/torchvision.py | 130 +++++++++++------- 4 files changed, 92 insertions(+), 55 deletions(-) diff --git a/src/otx/core/data/dataset/segmentation.py b/src/otx/core/data/dataset/segmentation.py index b9328509db7..3651e961d23 100644 --- a/src/otx/core/data/dataset/segmentation.py +++ b/src/otx/core/data/dataset/segmentation.py @@ -213,11 +213,10 @@ def _get_item_impl(self, index: int) -> SegDataEntity | None: image_color_channel=self.image_color_channel, ignored_labels=ignored_labels, ), - gt_seg_map=tv_tensors.Mask( - mask, - ), + masks=tv_tensors.Mask(mask[None]), ) - return self._apply_transforms(entity) + transformed_entity = self._apply_transforms(entity) + return transformed_entity.wrap(masks=transformed_entity.masks[0]) if transformed_entity else None @property def collate_fn(self) -> Callable: diff --git a/src/otx/core/data/entity/segmentation.py b/src/otx/core/data/entity/segmentation.py index 638461fb209..84cb85aab62 100644 --- a/src/otx/core/data/entity/segmentation.py +++ b/src/otx/core/data/entity/segmentation.py @@ -24,7 +24,7 @@ class SegDataEntity(OTXDataEntity): """Data entity for segmentation task. - :param gt_seg_map: mask annotations + :param mask: mask annotations """ @property @@ -32,7 +32,7 @@ def task(self) -> OTXTaskType: """OTX Task type definition.""" return OTXTaskType.SEMANTIC_SEGMENTATION - gt_seg_map: tv_tensors.Mask + masks: tv_tensors.Mask @dataclass @@ -66,7 +66,7 @@ def collate_fn( batch_size=batch_data.batch_size, images=batch_data.images, imgs_info=batch_data.imgs_info, - masks=[entity.gt_seg_map for entity in entities], + masks=[entity.masks for entity in entities], ) def pin_memory(self) -> SegBatchDataEntity: diff --git a/src/otx/core/data/transform_libs/mmseg.py b/src/otx/core/data/transform_libs/mmseg.py index 2c167ebbc2d..98557dff2c7 100644 --- a/src/otx/core/data/transform_libs/mmseg.py +++ b/src/otx/core/data/transform_libs/mmseg.py @@ -37,7 +37,7 @@ def transform(self, results: dict) -> dict: msg = "__otx__ key should be passed from the previous pipeline (LoadImageFromFile)" raise RuntimeError(msg) if isinstance(otx_data_entity, SegDataEntity): - gt_masks = otx_data_entity.gt_seg_map.numpy() + gt_masks = otx_data_entity.masks.numpy() results["gt_seg_map"] = gt_masks # we need this to properly handle seg maps during transforms results["seg_fields"] = ["gt_seg_map"] @@ -69,7 +69,7 @@ def transform(self, results: dict) -> SegDataEntity: return SegDataEntity( image=image, img_info=image_info, - gt_seg_map=masks, + masks=masks, ) diff --git a/src/otx/core/data/transform_libs/torchvision.py b/src/otx/core/data/transform_libs/torchvision.py index 90cf4c4a4f8..0bc05c390c3 100644 --- a/src/otx/core/data/transform_libs/torchvision.py +++ b/src/otx/core/data/transform_libs/torchvision.py @@ -358,7 +358,7 @@ class MinIoURandomCrop(tvt_v2.Transform, NumpytoTVTensorMixin): min_ious (Sequence[float]): minimum IoU threshold for all intersections with bounding boxes. min_crop_size (float): minimum crop's size (i.e. h,w := a*h, a*w, where a >= min_crop_size). bbox_clip_border (bool, optional): Whether clip the objects outside the border of the image. Defaults to True. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -482,7 +482,7 @@ class Resize(tvt_v2.Transform, NumpytoTVTensorMixin): interpolation_mask (str): Interpolation method for mask. Defaults to 'nearest'. transform_bbox (bool): Whether to transform bounding boxes. Defaults to False. transform_mask (bool): Whether to transform masks. Defaults to False. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -632,7 +632,7 @@ class RandomResizedCrop(tvt_v2.Transform, NumpytoTVTensorMixin): 'bilinear'. backend (str): The image resize backend type, accepted values are 'cv2' and 'pillow'. Defaults to 'cv2'. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1023,7 +1023,7 @@ class RandomFlip(tvt_v2.Transform, NumpytoTVTensorMixin): If input is a list, the length must equal ``prob``. Each element in ``prob`` indicates the flip probability of corresponding direction. Defaults to 'horizontal'. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1143,7 +1143,7 @@ class PhotoMetricDistortion(tvt_v2.Transform, NumpytoTVTensorMixin): contrast_range (sequence): range of contrast. saturation_range (sequence): range of saturation. hue_delta (int): delta of hue. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1293,7 +1293,7 @@ class RandomAffine(tvt_v2.Transform, NumpytoTVTensorMixin): the border of the image. In some dataset like MOT17, the gt bboxes are allowed to cross the border of images. Therefore, we don't need to clip the gt bboxes in these cases. Defaults to True. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1420,13 +1420,13 @@ class CachedMosaic(tvt_v2.Transform, NumpytoTVTensorMixin): img_scale (Sequence[int]): Image size before mosaic pipeline of single image. The shape order should be (height, width). Defaults to (640, 640). - center_ratio_range (Sequence[float]): Center ratio range of mosaic + center_ratio_range (tuple[float]): Center ratio range of mosaic output. Defaults to (0.5, 1.5). bbox_clip_border (bool, optional): Whether to clip the objects outside the border of the image. In some dataset like MOT17, the gt bboxes are allowed to cross the border of images. Therefore, we don't need to clip the gt bboxes in these cases. Defaults to True. - pad_val (int): Pad value. Defaults to 114. + pad_val (float): Pad value. Defaults to 114.0. prob (float): Probability of applying this transformation. Defaults to 1.0. max_cached_images (int): The maximum length of the cache. The larger @@ -1436,7 +1436,7 @@ class CachedMosaic(tvt_v2.Transform, NumpytoTVTensorMixin): random_pop (bool): Whether to randomly pop a result from the cache when the cache is full. If set to False, use FIFO popping method. Defaults to True. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1723,7 +1723,7 @@ class CachedMixUp(tvt_v2.Transform, NumpytoTVTensorMixin): Defaults to (0.5, 1.5). flip_ratio (float): Horizontal flip ratio of mixup image. Defaults to 0.5. - pad_val (int): Pad value. Defaults to 114. + pad_val (float): Pad value. Defaults to 114.0. max_iters (int): The maximum number of iterations. If the number of iterations is greater than `max_iters`, but gt_bbox is still empty, then the iteration is terminated. Defaults to 15. @@ -1740,7 +1740,7 @@ class CachedMixUp(tvt_v2.Transform, NumpytoTVTensorMixin): Defaults to True. prob (float): Probability of applying this transformation. Defaults to 1.0. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -1989,7 +1989,7 @@ class YOLOXHSVRandomAug(tvt_v2.Transform, NumpytoTVTensorMixin): hue_delta (int): delta of hue. Defaults to 5. saturation_delta (int): delta of saturation. Defaults to 30. value_delta (int): delat of value. Defaults to 30. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( @@ -2059,7 +2059,7 @@ class Pad(tvt_v2.Transform, NumpytoTVTensorMixin): None. pad_to_square (bool): Whether to pad the image into a square. Currently only used for YOLOX. Defaults to False. - pad_val (Number | dict[str, Number], optional) - Padding value for if + pad_val (int | float | dict[str, int | float], optional) - Padding value for if the pad_mode is "constant". If it is a single number, the value to pad the image is the number and to pad the semantic segmentation map is 255. If it is a dict, it should have the @@ -2082,7 +2082,7 @@ class Pad(tvt_v2.Transform, NumpytoTVTensorMixin): on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ border_type: ClassVar = { @@ -2106,9 +2106,9 @@ def __init__( self.size = size self.size_divisor = size_divisor - pad_val = pad_val or {"img": 0, "seg": 255} + pad_val = pad_val or {"img": 0, "mask": 0} if isinstance(pad_val, int): - pad_val = {"img": pad_val, "seg": 255} + pad_val = {"img": pad_val, "mask": 0} assert isinstance(pad_val, dict), "pad_val " # noqa: S101 self.pad_val = pad_val self.pad_to_square = pad_to_square @@ -2168,7 +2168,7 @@ def _pad_masks(self, inputs: T_OTXDataEntity) -> T_OTXDataEntity: if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0: masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks - pad_val = self.pad_val.get("masks", 0) + pad_val = self.pad_val.get("mask", 0) padding = inputs.img_info.padding padded_masks = np.stack( @@ -2210,7 +2210,7 @@ class RandomResize(tvt_v2.Transform, NumpytoTVTensorMixin): Args: scale (Sequence): Images scales for resizing with (height, width). Defaults to None. ratio_range (tuple[float], optional): (min_ratio, max_ratio). Defaults to None. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. **resize_kwargs: Other keyword arguments for the ``resize_type``. """ @@ -2329,22 +2329,26 @@ class RandomCrop(tvt_v2.Transform, NumpytoTVTensorMixin): crop_h in range [crop_size[0], min(h, crop_size[1])] and crop_w in range [crop_size[0], min(w, crop_size[1])]. Defaults to "absolute". + cat_max_ratio (float): The maximum ratio that single category could occupy. allow_negative_crop (bool, optional): Whether to allow a crop that does not contain any bbox area. Defaults to False. recompute_bbox (bool, optional): Whether to re-compute the boxes based on cropped instance masks. Defaults to False. bbox_clip_border (bool, optional): Whether clip the objects outside the border of the image. Defaults to True. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + ignore_index (int): The label index to be ignored. Defaults to 255. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( self, crop_size: tuple, # (H, W) crop_type: str = "absolute", + cat_max_ratio: float = 1, allow_negative_crop: bool = False, recompute_bbox: bool = False, bbox_clip_border: bool = True, + ignore_index: int = 255, is_numpy_to_tvtensor: bool = False, ) -> None: super().__init__() @@ -2363,11 +2367,35 @@ def __init__( assert 0 < crop_size[1] <= 1 # noqa: S101 self.crop_size = crop_size # (H, W) self.crop_type = crop_type + self.cat_max_ratio = cat_max_ratio self.allow_negative_crop = allow_negative_crop self.bbox_clip_border = bbox_clip_border self.recompute_bbox = recompute_bbox + self.ignore_index = ignore_index self.is_numpy_to_tvtensor = is_numpy_to_tvtensor + def _generate_crop_bbox( + self, + orig_shape: tuple[int, int], + crop_size: tuple[int, int], + ) -> tuple: + """Randomly get a crop bounding box. + + Args: + orig_shape (tuple): The original shape of the image. + crop_size (tuple): The size of the crop. + + Returns: + tuple: Coordinates of the cropped image. + """ + margin_h = max(orig_shape[0] - crop_size[0], 0) + margin_w = max(orig_shape[1] - crop_size[1], 0) + offset_h, offset_w = self._rand_offset((margin_h, margin_w)) + crop_y1, crop_y2 = offset_h, offset_h + crop_size[0] + crop_x1, crop_x2 = offset_w, offset_w + crop_size[1] + + return (crop_x1, crop_y1, crop_x2, crop_y2), offset_h, offset_w + def _crop_data( self, inputs: T_OTXDataEntity, @@ -2380,19 +2408,29 @@ def _crop_data( img: np.ndarray = to_np_image(inputs.image) orig_shape = inputs.img_info.img_shape - margin_h = max(orig_shape[0] - crop_size[0], 0) - margin_w = max(orig_shape[1] - crop_size[1], 0) - offset_h, offset_w = self._rand_offset((margin_h, margin_w)) - crop_y1, crop_y2 = offset_h, offset_h + crop_size[0] - crop_x1, crop_x2 = offset_w, offset_w + crop_size[1] + crop_bbox, offset_h, offset_w = self._generate_crop_bbox(orig_shape, crop_size) + + # for semantic segmentation + # reference : https://github.com/open-mmlab/mmsegmentation/blob/v1.2.1/mmseg/datasets/transforms/transforms.py#L281-L290 + if (self.cat_max_ratio < 1.0) and ((masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0): + # Repeat 10 times + for _ in range(10): + seg_temp = crop_masks(masks, np.array(crop_bbox)) + labels, cnt = np.unique(seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_index] + if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.cat_max_ratio: + break + crop_bbox, offset_h, offset_w = self._generate_crop_bbox(orig_shape, crop_size) # crop the image + crop_x1, crop_y1, crop_x2, crop_y2 = crop_bbox img = img[crop_y1:crop_y2, crop_x1:crop_x2, ...] cropped_img_shape = img.shape[:2] inputs.image = img inputs.img_info = _crop_image_info(inputs.img_info, *cropped_img_shape) + valid_inds: np.ndarray = np.array([1]) # for semantic segmentation # crop bboxes accordingly and clip to the image boundary if (bboxes := getattr(inputs, "bboxes", None)) is not None: bboxes = translate_bboxes(bboxes, [-offset_w, -offset_h]) @@ -2410,32 +2448,32 @@ def _crop_data( if (labels := getattr(inputs, "labels", None)) is not None: inputs.labels = labels[valid_inds] - if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0: - masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks - inputs.masks = crop_masks( - masks[valid_inds.nonzero()[0]], - np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]), + if (masks := getattr(inputs, "masks", None)) is not None and len(masks) > 0: + masks = masks.numpy() if not isinstance(masks, np.ndarray) else masks + inputs.masks = crop_masks( + masks[valid_inds.nonzero()[0]], + np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]), + ) + + if self.recompute_bbox: + inputs.bboxes = tv_tensors.wrap( + torch.as_tensor(get_bboxes_from_masks(inputs.masks)), + like=inputs.bboxes, ) - if self.recompute_bbox: - inputs.bboxes = tv_tensors.wrap( - torch.as_tensor(get_bboxes_from_masks(inputs.masks)), - like=inputs.bboxes, - ) + if (polygons := getattr(inputs, "polygons", None)) is not None and len(polygons) > 0: + inputs.polygons = crop_polygons( + [polygons[i] for i in valid_inds.nonzero()[0]], + np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]), + *orig_shape, + ) - if (polygons := getattr(inputs, "polygons", None)) is not None and len(polygons) > 0: - inputs.polygons = crop_polygons( - [polygons[i] for i in valid_inds.nonzero()[0]], - np.asarray([crop_x1, crop_y1, crop_x2, crop_y2]), - *orig_shape, + if self.recompute_bbox: + inputs.bboxes = tv_tensors.wrap( + torch.as_tensor(get_bboxes_from_polygons(inputs.polygons, *cropped_img_shape)), + like=inputs.bboxes, ) - if self.recompute_bbox: - inputs.bboxes = tv_tensors.wrap( - torch.as_tensor(get_bboxes_from_polygons(inputs.polygons, *cropped_img_shape)), - like=inputs.bboxes, - ) - return inputs @cache_randomness @@ -2522,7 +2560,7 @@ class FilterAnnotations(tvt_v2.Transform, NumpytoTVTensorMixin): min_gt_mask_area threshold. Default: False keep_empty (bool): Whether to return DataEntity as it is when it becomes an empty bbox after filtering. Defaults to True. - is_numpy_to_tvtensor(bool): Whether convert outputs to tensor. Defaults to False. + is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ def __init__( From c17cfb5bc8d1224bb6b5cf0b926233c00f54fffd Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 11 Jul 2024 14:45:36 +0900 Subject: [PATCH 26/33] Update recipe to use OTX's --- .../_base_/data/semantic_segmentation.yaml | 44 +++++++++++++------ 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index 32161c47b88..3a2e35e8e89 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -13,22 +13,36 @@ train_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: torchvision.transforms.v2.RandomResizedCrop + - class_path: otx.core.data.transform_libs.torchvision.RandomResize init_args: - size: - - 512 - - 512 scale: - - 0.2 - - 1.0 - ratio: + - 544 + - 544 + ratio_range: - 0.5 - 2.0 - antialias: true + keep_ratio: false + transform_mask: true + - class_path: otx.core.data.transform_libs.torchvision.RandomCrop + init_args: + crop_size: + - 512 + - 512 + cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.RandomFlip + init_args: + prob: 0.5 - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion + - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: + size: + - 512 + - 512 + pad_val: + img: 0 + mask: 255 + transform_mask: true is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.RandomHorizontalFlip - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} @@ -45,11 +59,13 @@ val_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: torchvision.transforms.v2.Resize + - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - size: + scale: - 512 - 512 + transform_mask: true + is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} @@ -66,11 +82,13 @@ test_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: torchvision.transforms.v2.Resize + - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - size: + scale: - 512 - 512 + transform_mask: true + is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} From b8e4bf32e85276b6b79c509860ae137658f792be Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 11 Jul 2024 15:24:19 +0900 Subject: [PATCH 27/33] Update `dino_v2` data config --- .../recipe/semantic_segmentation/dino_v2.yaml | 43 +++++++++++++------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index d9b630183c2..04519baf5f6 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -42,22 +42,35 @@ overrides: data: train_subset: transforms: - - class_path: torchvision.transforms.v2.RandomResizedCrop + - class_path: otx.core.data.transform_libs.torchvision.RandomResize init_args: - size: - - 560 - - 560 scale: - - 0.2 - - 1.0 - ratio: + - 640 + - 640 + ratio_range: - 0.5 - 2.0 - antialias: true + transform_mask: true + - class_path: otx.core.data.transform_libs.torchvision.RandomCrop + init_args: + crop_size: + - 560 + - 560 + cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.RandomFlip + init_args: + prob: 0.5 - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion + - class_path: otx.core.data.transform_libs.torchvision.Pad init_args: + size: + - 560 + - 560 + pad_val: + img: 0 + mask: 255 + transform_mask: true is_numpy_to_tvtensor: true - - class_path: torchvision.transforms.v2.RandomHorizontalFlip - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} @@ -68,11 +81,13 @@ overrides: val_subset: transforms: - - class_path: torchvision.transforms.v2.Resize + - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - size: + scale: - 560 - 560 + transform_mask: true + is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} @@ -83,11 +98,13 @@ overrides: test_subset: transforms: - - class_path: torchvision.transforms.v2.Resize + - class_path: otx.core.data.transform_libs.torchvision.Resize init_args: - size: + scale: - 560 - 560 + transform_mask: true + is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: dtype: ${as_torch_dtype:torch.float32} From cdff34115036db6b159d12c47832e336f246e6e2 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 11 Jul 2024 15:24:24 +0900 Subject: [PATCH 28/33] Update --- src/otx/recipe/_base_/data/instance_segmentation.yaml | 2 ++ src/otx/recipe/_base_/data/rotated_detection.yaml | 2 ++ src/otx/recipe/_base_/data/semantic_segmentation.yaml | 3 ++- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/otx/recipe/_base_/data/instance_segmentation.yaml b/src/otx/recipe/_base_/data/instance_segmentation.yaml index 91d6cebf185..ef96e88d0af 100644 --- a/src/otx/recipe/_base_/data/instance_segmentation.yaml +++ b/src/otx/recipe/_base_/data/instance_segmentation.yaml @@ -16,6 +16,7 @@ train_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -26,6 +27,7 @@ val_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test transform_lib_type: TORCHVISION diff --git a/src/otx/recipe/_base_/data/rotated_detection.yaml b/src/otx/recipe/_base_/data/rotated_detection.yaml index 0a4854646b8..3108fcbe8c6 100644 --- a/src/otx/recipe/_base_/data/rotated_detection.yaml +++ b/src/otx/recipe/_base_/data/rotated_detection.yaml @@ -16,6 +16,7 @@ train_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val transform_lib_type: TORCHVISION @@ -26,6 +27,7 @@ val_subset: num_workers: 2 sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test transform_lib_type: TORCHVISION diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index 3a2e35e8e89..afd81ee34d5 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -21,7 +21,6 @@ train_subset: ratio_range: - 0.5 - 2.0 - keep_ratio: false transform_mask: true - class_path: otx.core.data.transform_libs.torchvision.RandomCrop init_args: @@ -52,6 +51,7 @@ train_subset: std: [58.395, 57.12, 57.375] sampler: class_path: torch.utils.data.RandomSampler + val_subset: subset_name: val batch_size: 8 @@ -75,6 +75,7 @@ val_subset: std: [58.395, 57.12, 57.375] sampler: class_path: torch.utils.data.RandomSampler + test_subset: subset_name: test num_workers: 4 From 4711dbbb0a5f3b249cc2b3b83ab19ac2887f4e35 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Thu, 11 Jul 2024 16:05:50 +0900 Subject: [PATCH 29/33] Fix unit test --- tests/unit/core/data/test_dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/core/data/test_dataset.py b/tests/unit/core/data/test_dataset.py index fb55a133c55..44031b96a46 100644 --- a/tests/unit/core/data/test_dataset.py +++ b/tests/unit/core/data/test_dataset.py @@ -102,8 +102,8 @@ def test_ignore_index(self, fxt_mock_dm_subset): # The mask is np.eye(10) with label_id = 0, # so that the diagonal is filled with zero # and others are filled with ignore_index. - gt_seg_map = next(iter(dataset)).gt_seg_map - assert gt_seg_map.sum() == (10 * 10 - 10) * 100 + masks = next(iter(dataset)).masks + assert masks.sum() == (10 * 10 - 10) * 100 def test_overflown_ignore_index(self, fxt_mock_dm_subset): dataset = OTXSegmentationDataset( From 61e8550eadc57503abb13e8fa38ecf33f2df7426 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Fri, 12 Jul 2024 14:07:45 +0900 Subject: [PATCH 30/33] Update `RandomResizedCrop` for masks --- .../core/data/transform_libs/torchvision.py | 35 ++++++++----------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/src/otx/core/data/transform_libs/torchvision.py b/src/otx/core/data/transform_libs/torchvision.py index 0bc05c390c3..703053b0b40 100644 --- a/src/otx/core/data/transform_libs/torchvision.py +++ b/src/otx/core/data/transform_libs/torchvision.py @@ -630,8 +630,7 @@ class RandomResizedCrop(tvt_v2.Transform, NumpytoTVTensorMixin): interpolation (str): Interpolation method, accepted values are 'nearest', 'bilinear', 'bicubic', 'area', 'lanczos'. Defaults to 'bilinear'. - backend (str): The image resize backend type, accepted values are - 'cv2' and 'pillow'. Defaults to 'cv2'. + transform_mask (bool): Whether to transform masks. Defaults to False. is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ @@ -642,7 +641,7 @@ def __init__( aspect_ratio_range: tuple[float, float] = (3.0 / 4.0, 4.0 / 3.0), max_attempts: int = 10, interpolation: str = "bilinear", - backend: str = "cv2", + transform_mask: bool = False, is_numpy_to_tvtensor: bool = False, ) -> None: super().__init__() @@ -675,7 +674,7 @@ def __init__( self.aspect_ratio_range = aspect_ratio_range self.max_attempts = max_attempts self.interpolation = interpolation - self.backend = backend + self.transform_mask = transform_mask self.is_numpy_to_tvtensor = is_numpy_to_tvtensor @cache_randomness @@ -817,15 +816,7 @@ def _crop_img( return patches def forward(self, *_inputs: T_OTXDataEntity) -> T_OTXDataEntity | None: - """Transform function to randomly resized crop images. - - Args: - results (dict): Result dict from loading pipeline. - - Returns: - dict: Randomly resized cropped results, 'img_shape' - key in result dict is updated according to crop size. - """ + """Transform function to randomly resized crop images and masks.""" inputs = _inputs[0] if (img := getattr(inputs, "image", None)) is not None: img = to_np_image(img) @@ -840,15 +831,17 @@ def forward(self, *_inputs: T_OTXDataEntity) -> T_OTXDataEntity | None: ) img = self._crop_img(img, bboxes=bboxes) inputs.img_info = _crop_image_info(inputs.img_info, *img.shape[:2]) - img = cv2.resize( img, tuple(self.scale[::-1]), dst=None, interpolation=CV2_INTERP_CODES[self.interpolation], ) - if (masks := getattr(inputs, "gt_seg_map", None)) is not None: - masks = masks.numpy() + inputs.image = img + inputs.img_info = _resize_image_info(inputs.img_info, img.shape[:2]) + + if self.transform_mask and (masks := getattr(inputs, "masks", None)) is not None: + masks = to_np_image(masks) masks = self._crop_img(masks, bboxes=bboxes) masks = cv2.resize( masks, @@ -856,10 +849,10 @@ def forward(self, *_inputs: T_OTXDataEntity) -> T_OTXDataEntity | None: dst=None, interpolation=CV2_INTERP_CODES["nearest"], ) - inputs.gt_seg_map = torch.from_numpy(masks) # type: ignore[attr-defined] + if masks.ndim == 2: + masks = masks[None] + inputs.masks = tv_tensors.Mask(masks) # type: ignore[attr-defined] - inputs.image = img - inputs.img_info = _resize_image_info(inputs.img_info, img.shape[:2]) return self.convert(inputs) def __repr__(self): @@ -875,7 +868,8 @@ def __repr__(self): repr_str += f"{tuple(round(r, 4) for r in self.aspect_ratio_range)}" repr_str += f", max_attempts={self.max_attempts}" repr_str += f", interpolation={self.interpolation}" - repr_str += f", backend={self.backend})" + repr_str += f", transform_mask={self.transform_mask}" + repr_str += f", is_numpy_to_tvtensor={self.is_numpy_to_tvtensor})" return repr_str @@ -2082,6 +2076,7 @@ class Pad(tvt_v2.Transform, NumpytoTVTensorMixin): on the edge. For example, padding [1, 2, 3, 4] with 2 elements on both sides in symmetric mode will result in [2, 1, 1, 2, 3, 4, 4, 3] + transform_mask (bool): Whether to transform masks. Defaults to False. is_numpy_to_tvtensor (bool): Whether convert outputs to tensor. Defaults to False. """ From 2e32819856b5a084f97374fff17a4a20f9c972aa Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Fri, 12 Jul 2024 14:09:17 +0900 Subject: [PATCH 31/33] Update recipes to use `RandomResizedCrop` instead of `RandomResize` and `RandomCrop` --- .../_base_/data/semantic_segmentation.yaml | 28 ++++++------------- .../recipe/semantic_segmentation/dino_v2.yaml | 28 ++++++------------- 2 files changed, 16 insertions(+), 40 deletions(-) diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index afd81ee34d5..df4c750b99b 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -13,34 +13,22 @@ train_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: otx.core.data.transform_libs.torchvision.RandomResize + - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: - - 544 - - 544 - ratio_range: + - 512 + - 512 + crop_ratio_range: + - 0.2 + - 1.0 + aspect_ratio_range: - 0.5 - 2.0 transform_mask: true - - class_path: otx.core.data.transform_libs.torchvision.RandomCrop - init_args: - crop_size: - - 512 - - 512 - cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 - - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size: - - 512 - - 512 - pad_val: - img: 0 - mask: 255 - transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 04519baf5f6..f51a9033e78 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -42,34 +42,22 @@ overrides: data: train_subset: transforms: - - class_path: otx.core.data.transform_libs.torchvision.RandomResize + - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: - - 640 - - 640 - ratio_range: + - 560 + - 560 + crop_ratio_range: + - 0.2 + - 1.0 + aspect_ratio_range: - 0.5 - 2.0 transform_mask: true - - class_path: otx.core.data.transform_libs.torchvision.RandomCrop - init_args: - crop_size: - - 560 - - 560 - cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 - - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size: - - 560 - - 560 - pad_val: - img: 0 - mask: 255 - transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: From b8307a6be314352ba2b90aa73acb09f09b131af5 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Fri, 12 Jul 2024 14:57:46 +0900 Subject: [PATCH 32/33] Remove unused argument in recipes --- src/otx/recipe/classification/h_label_cls/deit_tiny.yaml | 1 - src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml | 1 - src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml | 1 - .../recipe/classification/h_label_cls/mobilenet_v3_large.yaml | 1 - .../recipe/classification/h_label_cls/tv_efficientnet_b3.yaml | 1 - .../recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml | 1 - .../recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml | 1 - src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml | 1 - src/otx/recipe/classification/multi_class_cls/dino_v2.yaml | 1 - .../recipe/classification/multi_class_cls/efficientnet_b0.yaml | 1 - .../recipe/classification/multi_class_cls/efficientnet_v2.yaml | 1 - .../classification/multi_class_cls/mobilenet_v3_large.yaml | 1 - src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml | 1 - .../recipe/classification/multi_label_cls/efficientnet_b0.yaml | 1 - .../recipe/classification/multi_label_cls/efficientnet_v2.yaml | 1 - .../classification/multi_label_cls/mobilenet_v3_large.yaml | 1 - .../classification/multi_label_cls/tv_efficientnet_b3.yaml | 1 - .../classification/multi_label_cls/tv_efficientnet_v2_l.yaml | 1 - .../classification/multi_label_cls/tv_mobilenet_v3_small.yaml | 1 - 19 files changed, 19 deletions(-) diff --git a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml index b2bc79fef45..9469563c59c 100644 --- a/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/h_label_cls/deit_tiny.yaml @@ -44,7 +44,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml index 5c006188c63..c5cc51445a8 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_b0.yaml @@ -43,7 +43,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml index 471b2baabff..981c0a5f300 100644 --- a/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/h_label_cls/efficientnet_v2.yaml @@ -43,7 +43,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml index 6d99e936385..cce11e2a3b7 100644 --- a/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/h_label_cls/mobilenet_v3_large.yaml @@ -49,7 +49,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml index 9a3f3d0b420..ce6165b9481 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_b3.yaml @@ -45,7 +45,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml index a1215c0c600..b370c8c641f 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_efficientnet_v2_l.yaml @@ -45,7 +45,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml index 6ca3c17f354..625c168a425 100644 --- a/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/h_label_cls/tv_mobilenet_v3_small.yaml @@ -45,7 +45,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml index 96d0f1dc84a..a343f92ce32 100644 --- a/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_class_cls/deit_tiny.yaml @@ -43,7 +43,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml index 8a15d393136..82992523510 100644 --- a/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/dino_v2.yaml @@ -50,7 +50,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 is_numpy_to_tvtensor: true sampler: class_path: otx.algo.samplers.balanced_sampler.BalancedSampler diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml index 4cae16271cf..b0a75b3acdf 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_b0.yaml @@ -44,7 +44,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml index ac69fb14801..fd8c8fb7a37 100644 --- a/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_class_cls/efficientnet_v2.yaml @@ -43,7 +43,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml index 7b693c633d4..b0f8b8be3d9 100644 --- a/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_class_cls/mobilenet_v3_large.yaml @@ -48,7 +48,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml index b65e3f17c28..e1807f459fb 100644 --- a/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml +++ b/src/otx/recipe/classification/multi_label_cls/deit_tiny.yaml @@ -49,7 +49,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml index 6f5e1f31e87..31cf496e5ed 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_b0.yaml @@ -46,7 +46,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml index 78173e84f85..081de4d4597 100644 --- a/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml +++ b/src/otx/recipe/classification/multi_label_cls/efficientnet_v2.yaml @@ -49,7 +49,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml index b40c65e435d..ac00415e09f 100644 --- a/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml +++ b/src/otx/recipe/classification/multi_label_cls/mobilenet_v3_large.yaml @@ -50,7 +50,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml index 6eb76223f79..a22867ecb1c 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_b3.yaml @@ -45,7 +45,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml index 7f8bf59b11e..542b7e94a7c 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_efficientnet_v2_l.yaml @@ -49,7 +49,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.EfficientNetRandomCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 diff --git a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml index b0885b7cf56..5c51a4e8bd9 100644 --- a/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml +++ b/src/otx/recipe/classification/multi_label_cls/tv_mobilenet_v3_small.yaml @@ -45,7 +45,6 @@ overrides: - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: 224 - backend: cv2 - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 From a7fdfe0cfe14a32cb065b64fcb9604b2684f1ea0 Mon Sep 17 00:00:00 2001 From: "Kim, Sungchul" Date: Fri, 12 Jul 2024 17:49:37 +0900 Subject: [PATCH 33/33] Update type annotation Co-authored-by: Eunwoo Shin --- src/otx/core/data/transform_libs/torchvision.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/otx/core/data/transform_libs/torchvision.py b/src/otx/core/data/transform_libs/torchvision.py index 703053b0b40..9997b9e76e2 100644 --- a/src/otx/core/data/transform_libs/torchvision.py +++ b/src/otx/core/data/transform_libs/torchvision.py @@ -2339,7 +2339,7 @@ def __init__( self, crop_size: tuple, # (H, W) crop_type: str = "absolute", - cat_max_ratio: float = 1, + cat_max_ratio: int | float = 1, allow_negative_crop: bool = False, recompute_bbox: bool = False, bbox_clip_border: bool = True,