diff --git a/src/otx/recipe/_base_/data/semantic_segmentation.yaml b/src/otx/recipe/_base_/data/semantic_segmentation.yaml index afd81ee34d5..df4c750b99b 100644 --- a/src/otx/recipe/_base_/data/semantic_segmentation.yaml +++ b/src/otx/recipe/_base_/data/semantic_segmentation.yaml @@ -13,34 +13,22 @@ train_subset: transform_lib_type: TORCHVISION to_tv_image: true transforms: - - class_path: otx.core.data.transform_libs.torchvision.RandomResize + - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: - - 544 - - 544 - ratio_range: + - 512 + - 512 + crop_ratio_range: + - 0.2 + - 1.0 + aspect_ratio_range: - 0.5 - 2.0 transform_mask: true - - class_path: otx.core.data.transform_libs.torchvision.RandomCrop - init_args: - crop_size: - - 512 - - 512 - cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 - - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size: - - 512 - - 512 - pad_val: - img: 0 - mask: 255 - transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: diff --git a/src/otx/recipe/semantic_segmentation/dino_v2.yaml b/src/otx/recipe/semantic_segmentation/dino_v2.yaml index 04519baf5f6..f51a9033e78 100644 --- a/src/otx/recipe/semantic_segmentation/dino_v2.yaml +++ b/src/otx/recipe/semantic_segmentation/dino_v2.yaml @@ -42,34 +42,22 @@ overrides: data: train_subset: transforms: - - class_path: otx.core.data.transform_libs.torchvision.RandomResize + - class_path: otx.core.data.transform_libs.torchvision.RandomResizedCrop init_args: scale: - - 640 - - 640 - ratio_range: + - 560 + - 560 + crop_ratio_range: + - 0.2 + - 1.0 + aspect_ratio_range: - 0.5 - 2.0 transform_mask: true - - class_path: otx.core.data.transform_libs.torchvision.RandomCrop - init_args: - crop_size: - - 560 - - 560 - cat_max_ratio: 0.75 + - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - class_path: otx.core.data.transform_libs.torchvision.RandomFlip init_args: prob: 0.5 - - class_path: otx.core.data.transform_libs.torchvision.PhotoMetricDistortion - - class_path: otx.core.data.transform_libs.torchvision.Pad - init_args: - size: - - 560 - - 560 - pad_val: - img: 0 - mask: 255 - transform_mask: true is_numpy_to_tvtensor: true - class_path: torchvision.transforms.v2.ToDtype init_args: