Skip to content

Commit

Permalink
Recipe refinement (det, iseg, r-det, sseg, vpm, zsl) (#3712)
Browse files Browse the repository at this point in the history
* Create base data recipe for each task
* Remove configs which have default value
* Performance check w/ medium dataset for reproducibility
  • Loading branch information
sungchul2 authored Jul 11, 2024
1 parent 2adf5b1 commit 746d07e
Show file tree
Hide file tree
Showing 52 changed files with 493 additions and 1,256 deletions.
2 changes: 1 addition & 1 deletion src/otx/algo/segmentation/dino_v2_seg.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ class DinoV2Seg(BaseSegmModel):
"""DinoV2Seg Model."""

default_backbone_configuration: ClassVar[dict[str, Any]] = {
"name": "dinov2_vits14_reg",
"name": "dinov2_vits14",
"freeze_backbone": True,
"out_index": [8, 9, 10, 11],
}
Expand Down
39 changes: 39 additions & 0 deletions src/otx/recipe/_base_/data/detection.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
task: DETECTION
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: true
data_format: coco_instances
unannotated_items_ratio: 0.0
train_subset:
subset_name: train
transform_lib_type: TORCHVISION
batch_size: 1
num_workers: 2
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
sampler:
class_path: torch.utils.data.RandomSampler

val_subset:
subset_name: val
transform_lib_type: TORCHVISION
batch_size: 1
num_workers: 2
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
sampler:
class_path: torch.utils.data.RandomSampler

test_subset:
subset_name: test
transform_lib_type: TORCHVISION
batch_size: 1
num_workers: 2
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
sampler:
class_path: torch.utils.data.RandomSampler
38 changes: 38 additions & 0 deletions src/otx/recipe/_base_/data/instance_segmentation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
task: INSTANCE_SEGMENTATION
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: true
data_format: coco_instances
include_polygons: true
unannotated_items_ratio: 0.0
train_subset:
subset_name: train
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
val_subset:
subset_name: val
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
test_subset:
subset_name: test
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
38 changes: 38 additions & 0 deletions src/otx/recipe/_base_/data/rotated_detection.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
task: ROTATED_DETECTION
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: true
data_format: coco_instances
include_polygons: true
unannotated_items_ratio: 0.0
train_subset:
subset_name: train
transform_lib_type: TORCHVISION
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
val_subset:
subset_name: val
transform_lib_type: TORCHVISION
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
test_subset:
subset_name: test
transform_lib_type: TORCHVISION
to_tv_image: false
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 2
sampler:
class_path: torch.utils.data.RandomSampler
82 changes: 82 additions & 0 deletions src/otx/recipe/_base_/data/semantic_segmentation.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
task: SEMANTIC_SEGMENTATION
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
data_format: common_semantic_segmentation_with_subset_dirs
include_polygons: true
unannotated_items_ratio: 0.0
ignore_index: 255
train_subset:
subset_name: train
batch_size: 8
num_workers: 4
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.RandomResizedCrop
init_args:
size:
- 512
- 512
scale:
- 0.2
- 1.0
ratio:
- 0.5
- 2.0
antialias: true
- class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion
init_args:
is_numpy_to_tvtensor: true
- class_path: torchvision.transforms.v2.RandomHorizontalFlip
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
sampler:
class_path: torch.utils.data.RandomSampler
val_subset:
subset_name: val
batch_size: 8
num_workers: 4
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.Resize
init_args:
size:
- 512
- 512
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
sampler:
class_path: torch.utils.data.RandomSampler
test_subset:
subset_name: test
num_workers: 4
batch_size: 8
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.Resize
init_args:
size:
- 512
- 512
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
sampler:
class_path: torch.utils.data.RandomSampler
8 changes: 4 additions & 4 deletions src/otx/recipe/_base_/data/torchvision_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@ task: MULTI_CLASS_CLS
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: False
stack_images: false
data_format: imagenet_with_subset_dirs
unannotated_items_ratio: 0.0
train_subset:
subset_name: train
transform_lib_type: TORCHVISION
to_tv_image: True
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
Expand All @@ -18,7 +18,7 @@ train_subset:
val_subset:
subset_name: val
transform_lib_type: TORCHVISION
to_tv_image: True
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
Expand All @@ -28,7 +28,7 @@ val_subset:
test_subset:
subset_name: test
transform_lib_type: TORCHVISION
to_tv_image: True
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
Expand Down
76 changes: 76 additions & 0 deletions src/otx/recipe/_base_/data/visual_prompting.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
task: VISUAL_PROMPTING
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: false
data_format: coco_instances
unannotated_items_ratio: 0.0
vpm_config:
use_bbox: true
use_point: false

train_subset:
subset_name: train
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge
init_args:
size: 1024
antialias: true
- class_path: otx.core.data.transform_libs.torchvision.PadtoSquare
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
batch_size: 2
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler

val_subset:
subset_name: val
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge
init_args:
size: 1024
antialias: true
- class_path: otx.core.data.transform_libs.torchvision.PadtoSquare
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
batch_size: 1
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler

test_subset:
subset_name: test
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: otx.core.data.transform_libs.torchvision.ResizetoLongestEdge
init_args:
size: 1024
antialias: true
- class_path: otx.core.data.transform_libs.torchvision.PadtoSquare
- class_path: torchvision.transforms.v2.ToDtype
init_args:
dtype: ${as_torch_dtype:torch.float32}
- class_path: torchvision.transforms.v2.Normalize
init_args:
mean: [123.675, 116.28, 103.53]
std: [58.395, 57.12, 57.375]
batch_size: 1
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler
44 changes: 44 additions & 0 deletions src/otx/recipe/_base_/data/zero_shot_visual_prompting.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
task: ZERO_SHOT_VISUAL_PROMPTING
mem_cache_size: 1GB
mem_cache_img_max_size: null
image_color_channel: RGB
stack_images: false
data_format: coco_instances
unannotated_items_ratio: 0.0

vpm_config:
use_bbox: True
use_point: False

train_subset:
subset_name: train
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler

val_subset:
subset_name: val
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler

test_subset:
subset_name: test
transform_lib_type: TORCHVISION
to_tv_image: true
transforms:
- class_path: torchvision.transforms.v2.ToImage
batch_size: 1
num_workers: 4
sampler:
class_path: torch.utils.data.RandomSampler
Loading

0 comments on commit 746d07e

Please sign in to comment.