openvinotoolkit · jaegukhyun · Apr 11, 2024 · Apr 8, 2024 · Apr 8, 2024 · Apr 8, 2024
@@ -9,6 +9,8 @@ All notable changes to this project will be documented in this file.
 ### New features
 
 - Add zero-shot visual prompting (<https://github.com/openvinotoolkit/training_extensions/pull/2616>, <https://github.com/openvinotoolkit/training_extensions/pull/2706>, <https://github.com/openvinotoolkit/training_extensions/pull/2753>)
+- Add support for the training and validation on the XPU devices (https://github.com/openvinotoolkit/training_extensions/pull/3058)
+- Add a new configurable parameter nms_iou_thresold to control iou threshold of NMS post processing (https://github.com/openvinotoolkit/training_extensions/pull/3287)
 - Add support for the training and validation on the Intel Max GPU devices (https://github.com/openvinotoolkit/training_extensions/pull/3058)
 
 ### Enhancements

@@ -231,6 +231,20 @@ class BasePostprocessing(ParameterGroup):
             affects_outcome_of=ModelLifecycle.INFERENCE,
         )
 
+        nms_iou_threshold = configurable_float(
+            default_value=0.5,
+            min_value=0,
+            max_value=1,
+            header="NMS IoU Threshold",
+            description="IoU Threshold for NMS Postprocessing."
+            "Intersection over Union (IoU) threshold is set to remove overlapping predictions."
+            "If the IoU between two predictions is greater than or equal to the IoU threshold, "
+            "they are considered overlapping and will be discarded.",
+            affects_outcome_of=ModelLifecycle.INFERENCE,
+            warning="If you want to chage the value of IoU Threshold of model, "
+            "then you need to re-train model with new IoU threshold.",
+        )
+
         max_num_detections = configurable_integer(
             header="Maximum number of detection per image",
             description="Extra detection outputs will be discared in non-maximum suppression process. "

@@ -42,6 +42,7 @@ def configure_model(self, cfg, data_classes, model_classes, ir_options, **kwargs
         super().configure_model(cfg, data_classes, model_classes, ir_options, **kwargs)
         self.configure_regularization(cfg)
         self.configure_max_num_detections(cfg, kwargs.get("max_num_detections", 0))
+        self.configure_nms_iou_threshold(cfg, kwargs.get("nms_iou_threshold", 0.5))
 
     def configure_max_num_detections(self, cfg, max_num_detections):
         """Patch config for maximum number of detections."""
@@ -61,6 +62,29 @@ def configure_max_num_detections(self, cfg, max_num_detections):
                 train_cfg.rpn_proposal.nms_pre = max_num_detections * 20
                 train_cfg.rpn_proposal.max_per_img = max_num_detections * 10
 
+    def configure_nms_iou_threshold(self, cfg, nms_iou_threshold):
+        """Configure nms iou threshold to user specified value if the object detector uses nms."""
+        if "test_cfg" in cfg.model and "nms" in cfg.model.test_cfg:
+            logger.info(
+                "IoU NMS Threshold will be updated from "
+                f"{cfg.model.test_cfg.nms.iou_threshold} --> {nms_iou_threshold}"
+            )
+            cfg.model.test_cfg.nms.iou_threshold = nms_iou_threshold
+        elif "test_cfg" in cfg.model and "rcnn" in cfg.model.test_cfg and "nms" in cfg.model.test_cfg.rcnn:
+            logger.info(
+                "IoU NMS Threshold will be updated from "
+                f"{cfg.model.test_cfg.rcnn.nms.iou_threshold} --> {nms_iou_threshold}"
+            )
+            cfg.model.test_cfg.rcnn.nms.iou_threshold = nms_iou_threshold
+        else:
+            logger.warning("Detector do not have nms postprocessing, user specified nms threshold will be omitted")
+        if "tile_cfg" in cfg:
+            cfg.tile_cfg.iou_threshold = nms_iou_threshold
+            logger.info(
+                "IoU NMS Threshold for tiling will be updated from "
+                f"{cfg.tile_cfg.iou_threshold} --> {nms_iou_threshold}"
+            )
+
     def configure_regularization(self, cfg):  # noqa: C901
         """Patch regularization parameters."""
         if self.training:

@@ -180,6 +180,7 @@ def configure(self, training=True, ir_options=None, train_dataset=None, export=F
             self._input_size,
             train_dataset=train_dataset,
             max_num_detections=self.max_num_detections,
+            nms_iou_threshold=self.nms_iou_threshold,
         )
         if should_cluster_anchors(self._recipe_cfg):
             if train_dataset is not None:
@@ -494,9 +495,11 @@ def _export_model(
         assert len(self._precision) == 1
         export_options["precision"] = str(self._precision[0])
         export_options["type"] = str(export_format)
+        logger.info(f"Export nms_iou_threshold: {self.nms_iou_threshold}")
+        post_proc_cfg = export_options["deploy_cfg"]["codebase_config"]["post_processing"]
+        post_proc_cfg["iou_threshold"] = self.nms_iou_threshold
         if self.max_num_detections > 0:
             logger.info(f"Export max_num_detections: {self.max_num_detections}")
-            post_proc_cfg = export_options["deploy_cfg"]["codebase_config"]["post_processing"]
             post_proc_cfg["max_output_boxes_per_class"] = self.max_num_detections
             post_proc_cfg["keep_top_k"] = self.max_num_detections
             post_proc_cfg["pre_top_k"] = self.max_num_detections * 10

@@ -8,7 +8,7 @@
 img_size = (992, 736)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)

@@ -8,7 +8,7 @@
 img_size = (512, 512)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

@@ -8,7 +8,7 @@
 img_size = (512, 512)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

@@ -10,7 +10,7 @@
 img_scale = (640, 640)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=False)

@@ -34,7 +34,7 @@
         debug=False,
     ),
     test_cfg=dict(
-        nms=dict(type="nms", iou_threshold=0.45),
+        nms=dict(type="nms", iou_threshold=0.5),
         min_bbox_size=0,
         score_thr=0.02,
         max_per_img=200,

@@ -294,6 +294,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  nms_iou_threshold:
+    affects_outcome_of: INFERENCE
+    default_value: 0.5
+    description:
+      IoU Threshold for NMS Postprocessing. Intersection over Union (IoU) threshold is set to remove overlapping predictions.
+      If the IoU between two predictions is greater than or equal to the IoU threshold, they are considered overlapping and will be discarded.
+    editable: true
+    header: NMS IoU Threshold
+    max_value: 1
+    min_value: 0
+    type: FLOAT
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0.01
+    visible_in_ui: true
+    warning: If you want to chage the value of IoU Threshold of model, then you need to re-train model with new IoU threshold.
   max_num_detections:
     affects_outcome_of: INFERENCE
     default_value: 0

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
 )
 load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox/\
 yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth"

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
     size_multiplier=160,
     random_size_range=(3, 5),
 )

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
 )
 load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox/\
 yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth"

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
     size_multiplier=160,
     random_size_range=(3, 5),
 )

@@ -26,7 +26,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
 )
 load_from = "https://storage.openvinotoolkit.org/repositories/openvino_training_extensions\
 /models/object_detection/v2/yolox_tiny_8x8.pth"

@@ -26,7 +26,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
     size_multiplier=160,
     random_size_range=(3, 5),
 )

@@ -23,7 +23,7 @@
 img_scale = (640, 640)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
 )
 load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox\
 /yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth"

@@ -15,7 +15,7 @@
     train_cfg=dict(assigner=dict(type="SimOTAAssigner", center_radius=2.5)),
     # In order to align the source code, the threshold of the val phase is
     # 0.01, and the threshold of the test phase is 0.001.
-    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.65), max_per_img=100),
+    test_cfg=dict(score_thr=0.01, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100),
     size_multiplier=160,
     random_size_range=(3, 5),
 )

@@ -72,7 +72,7 @@
         nms_pre=1000,
         min_bbox_size=0,
         score_thr=0.05,
-        nms=dict(type="nms", iou_threshold=0.6),
+        nms=dict(type="nms", iou_threshold=0.5),
         max_per_img=100,
     ),
     backbone=dict(

@@ -71,7 +71,7 @@
         nms_pre=1000,
         min_bbox_size=0,
         score_thr=0.05,
-        nms=dict(type="nms", iou_threshold=0.6),
+        nms=dict(type="nms", iou_threshold=0.5),
         max_per_img=100,
     ),
     backbone=dict(

@@ -19,7 +19,7 @@
 img_size = (864, 864)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[0, 0, 0], std=[255, 255, 255], to_rgb=True)

@@ -72,7 +72,7 @@
         nms_pre=1000,
         min_bbox_size=0,
         score_thr=0.05,
-        nms=dict(type="nms", iou_threshold=0.6),
+        nms=dict(type="nms", iou_threshold=0.5),
         max_per_img=100,
     ),
 )

@@ -76,7 +76,7 @@
         nms_pre=1000,
         min_bbox_size=0,
         score_thr=0.05,
-        nms=dict(type="nms", iou_threshold=0.6),
+        nms=dict(type="nms", iou_threshold=0.5),
         max_per_img=100,
     ),
 )

@@ -294,6 +294,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  nms_iou_threshold:
+    affects_outcome_of: INFERENCE
+    default_value: 0.5
+    description:
+      IoU Threshold for NMS Postprocessing. Intersection over Union (IoU) threshold is set to remove overlapping predictions.
+      If the IoU between two predictions is greater than or equal to the IoU threshold, they are considered overlapping and will be discarded.
+    editable: true
+    header: NMS IoU Threshold
+    max_value: 1
+    min_value: 0
+    type: FLOAT
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0.01
+    visible_in_ui: true
+    warning: If you want to chage the value of IoU Threshold of model, then you need to re-train model with new IoU threshold.
   max_num_detections:
     affects_outcome_of: INFERENCE
     default_value: 0

@@ -119,7 +119,7 @@
             nms=dict(type="nms", iou_threshold=0.8),
             min_bbox_size=0,
         ),
-        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.7), max_per_img=500, mask_thr_binary=0.5),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=500, mask_thr_binary=0.5),
     ),
 )
 load_from = "https://storage.openvinotoolkit.org/repositories/\

@@ -111,7 +111,7 @@
             nms=dict(type="nms", iou_threshold=0.8),
             min_bbox_size=0,
         ),
-        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.7), max_per_img=500, mask_thr_binary=0.5),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=500, mask_thr_binary=0.5),
     ),
 )
 load_from = "https://storage.openvinotoolkit.org/repositories/\

@@ -126,10 +126,10 @@
             nms_across_levels=False,
             nms_pre=1000,
             max_per_img=1000,
-            nms=dict(type="nms", iou_threshold=0.8),
+            nms=dict(type="nms", iou_threshold=0.7),
             min_bbox_size=0,
         ),
-        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.7), max_per_img=500, mask_thr_binary=0.5),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=500, mask_thr_binary=0.5),
     ),
 )
 

@@ -294,6 +294,25 @@ postprocessing:
     value: 0.01
     visible_in_ui: true
     warning: null
+  nms_iou_threshold:
+    affects_outcome_of: INFERENCE
+    default_value: 0.5
+    description:
+      IoU Threshold for NMS Postprocessing. Intersection over Union (IoU) threshold is set to remove overlapping predictions.
+      If the IoU between two predictions is greater than or equal to the IoU threshold, they are considered overlapping and will be discarded.
+    editable: true
+    header: NMS IoU Threshold
+    max_value: 1
+    min_value: 0
+    type: FLOAT
+    ui_rules:
+      action: DISABLE_EDITING
+      operator: AND
+      rules: []
+      type: UI_RULES
+    value: 0.01
+    visible_in_ui: true
+    warning: If you want to chage the value of IoU Threshold of model, then you need to re-train model with new IoU threshold.
   description: Postprocessing
   header: Postprocessing
   result_based_confidence_threshold:

@@ -109,7 +109,7 @@
             nms=dict(type="nms", iou_threshold=0.8),
             min_bbox_size=0,
         ),
-        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.7), max_per_img=500, mask_thr_binary=0.5),
+        rcnn=dict(score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=500, mask_thr_binary=0.5),
     ),
 )
 load_from = "https://storage.openvinotoolkit.org/repositories/\

@@ -8,7 +8,7 @@
 img_size = (512, 512)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=(103.53, 116.28, 123.675), std=(1.0, 1.0, 1.0), to_rgb=True)

@@ -8,7 +8,7 @@
 img_size = (512, 512)
 
 tile_cfg = dict(
-    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.45, max_per_img=1500, filter_empty_gt=True
+    tile_size=400, min_area_ratio=0.9, overlap_ratio=0.2, iou_threshold=0.5, max_per_img=1500, filter_empty_gt=True
 )
 
 img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)