From 5dfe5181ecd2e6e38d4b309ab0b4c4e02e5be04e Mon Sep 17 00:00:00 2001 From: kprokofi Date: Tue, 31 Oct 2023 16:04:08 +0000 Subject: [PATCH 01/16] added support for OD on habana --- .../common/adapters/mmcv/configurer.py | 17 ++++ .../common/adapters/mmcv/hooks/__init__.py | 3 + .../utils/_builder_build_data_parallel.py | 85 +++++++++++++++++++ src/otx/algorithms/common/utils/__init__.py | 1 + src/otx/algorithms/common/utils/utils.py | 1 + .../detection/adapters/mmdet/apis/train.py | 40 ++++++++- .../adapters/mmdet/evaluation/evaluator.py | 4 +- .../detectors/custom_single_stage_detector.py | 38 ++++++++- .../mmdet/models/heads/custom_ssd_head.py | 34 ++++++++ .../configs/detection/configuration.yaml | 2 +- .../detection/cspdarknet_yolox_x/model.py | 2 +- .../cspdarknet_yolox_x/template.yaml | 4 +- .../detection/mobilenetv2_ssd/model.py | 1 + .../detection/mobilenetv2_ssd/template.yaml | 2 +- .../recipes/stages/detection/incremental.py | 34 ++++---- 15 files changed, 241 insertions(+), 27 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/configurer.py b/src/otx/algorithms/common/adapters/mmcv/configurer.py index 54ee9326b80..3d0e936aef2 100644 --- a/src/otx/algorithms/common/adapters/mmcv/configurer.py +++ b/src/otx/algorithms/common/adapters/mmcv/configurer.py @@ -176,7 +176,11 @@ def configure_device(self, cfg): elif "gpu_ids" not in cfg: cfg.gpu_ids = range(1) +<<<<<<< HEAD # consider "cuda", "hpu" and "cpu" device only +======= + # consider "cuda", "xpu", "hpu" and "cpu" device only +>>>>>>> added support for OD on habana if is_hpu_available(): cfg.device = "hpu" elif torch.cuda.is_available(): @@ -184,7 +188,10 @@ def configure_device(self, cfg): elif is_xpu_available(): try: import intel_extension_for_pytorch as ipex # noqa: F401 +<<<<<<< HEAD +======= +>>>>>>> added support for OD on habana cfg.device = "xpu" except ModuleNotFoundError: cfg.device = "cpu" @@ -263,6 +270,7 @@ def configure_fp16(cfg: Config): distributed = getattr(cfg, "distributed", False) opts: Dict[str, Any] = {} if fp16_config is not None: +<<<<<<< HEAD if is_hpu_available(): if optim_type == "SAMOptimizerHook": # TODO (sungchul): consider SAM optimizer @@ -270,6 +278,9 @@ def configure_fp16(cfg: Config): opts["type"] = "HPUOptimizerHook" cfg.optimizer_config.update(opts) elif torch.cuda.is_available() or is_xpu_available(): +======= + if torch.cuda.is_available() or is_xpu_available(): +>>>>>>> added support for OD on habana opts.update({"distributed": distributed, **fp16_config}) if optim_type == "SAMOptimizerHook": opts["type"] = "Fp16SAMOptimizerHook" @@ -281,6 +292,12 @@ def configure_fp16(cfg: Config): cfg.fp16 = fp16_config opts = dict() cfg.optimizer_config.update(opts) + elif is_hpu_available(): + if optim_type == "SAMOptimizerHook": + # TODO (sungchul): consider SAM optimizer + logger.warning("SAMOptimizerHook is not supported on HPU. Changed to OptimizerHook.") + opts["type"] = "HPUOptimizerHook" + cfg.optimizer_config.update(opts) else: logger.info("Revert FP16 to FP32 on CPU device") diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py index a7c41d80fee..a6dd4ea965c 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py @@ -52,6 +52,7 @@ from .semisl_cls_hook import SemiSLClsHook from .task_adapt_hook import TaskAdaptHook from .two_crop_transform_hook import TwoCropTransformHook +from .hpu_optimizer_hook import HPUOptimizerHook, HPUDistOptimizerHook __all__ = [ "AdaptiveRepeatDataHook", @@ -90,6 +91,8 @@ "MeanTeacherHook", "MemCacheHook", "LossDynamicsTrackingHook", + "HPUOptimizerHook", + "HPUDistOptimizerHook", ] try: diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index 39c9bf5f7b3..206316db800 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -12,7 +12,13 @@ from mmcv import Config from mmcv.parallel import MMDataParallel, MMDistributedDataParallel +<<<<<<< HEAD from otx.algorithms.common.utils import is_hpu_available, is_xpu_available +======= +from otx.algorithms.common.utils import is_xpu_available, is_hpu_available +import habana_frameworks.torch as htorch +from torch._utils import _get_device_index +>>>>>>> added support for OD on habana @overload @@ -138,12 +144,63 @@ def val_step(self, *inputs, **kwargs): with torch.autocast(device_type="xpu", dtype=torch.bfloat16, enabled=self.enable_autocast): return super().val_step(*inputs, **kwargs) +<<<<<<< HEAD class HPUDataParallel(MMDataParallel): def __init__(self, *args, enable_autocast: bool = False, **kwargs): super().__init__(*args, **kwargs) self.enable_autocast = enable_autocast self.src_device_obj = torch.device("hpu", self.device_ids[0]) +======= +def _get_available_device_type(): + if torch.cuda.is_available(): + return "cuda" + if hasattr(torch, "xpu") and torch.xpu.is_available(): # type: ignore[attr-defined] + return "xpu" + if is_hpu_available(): + return "hpu" + # add more available device types here + return None + + +def _get_device_attr(get_member): + device_type = _get_available_device_type() + if device_type and device_type.lower() == "cuda": + return get_member(torch.cuda) + if device_type and device_type.lower() == "xpu": + return get_member(torch.xpu) # type: ignore[attr-defined] + if device_type and device_type.lower() == "hpu": + return get_member(htorch.hpu) + # add more available device types here + return None + + +def _get_all_device_indices(): + # all device index + return _get_device_attr(lambda m: list(range(m.device_count()))) + + +class HPUDataParallel(MMDataParallel): + def __init__(self, module, device_ids=None, output_device=None, dim=0, is_autocast=True): + super().__init__(module=module) + device_type = _get_available_device_type() + if device_type is None: + self.module = module + self.device_ids = [] + return + + if device_ids is None: + device_ids = _get_all_device_indices() + + if output_device is None: + output_device = device_ids[0] + + self.dim = dim + self.device_ids = [_get_device_index(x, True) for x in device_ids] + self.output_device = _get_device_index(output_device, True) + self.src_device_obj = torch.device(device_type, self.device_ids[0]) + self.is_autocast = is_autocast +>>>>>>> added support for OD on habana def scatter(self, inputs, kwargs, device_ids): inputs, kwargs = super().scatter(inputs, kwargs, [-1]) @@ -154,25 +211,42 @@ def scatter(self, inputs, kwargs, device_ids): if isinstance(val, dict): for k in val: if isinstance(val[k], torch.Tensor): +<<<<<<< HEAD val[k] = val[k].to(self.src_device_obj) elif isinstance(val[k], list): for i, item in enumerate(val[k]): if isinstance(item, torch.Tensor): val[k][i] = item.to(self.src_device_obj) +======= + val[k] = val[k].to(torch.device(f"hpu:{device_ids[0]}")) + elif isinstance(val[k], list): + for i, item in enumerate(val[k]): + if isinstance(item, torch.Tensor): + val[k][i] = item.to(torch.device(f"hpu:{device_ids[0]}")) +>>>>>>> added support for OD on habana for x in kwargs: if isinstance(x, dict): for k in x: if isinstance(x[k], torch.Tensor): +<<<<<<< HEAD x[k] = x[k].to(f"hpu:{device_ids[0]}") elif isinstance(x[k], list): for i, item in enumerate(x[k]): if isinstance(item, torch.Tensor): x[k][i] = item.to(self.src_device_obj) +======= + x[k] = x[k].to("hpu") + elif isinstance(x[k], list): + for i, item in enumerate(x[k]): + if isinstance(item, torch.Tensor): + x[k][i] = item.to(torch.device(f"hpu:{device_ids[0]}")) +>>>>>>> added support for OD on habana return inputs, kwargs def forward(self, *inputs, **kwargs): +<<<<<<< HEAD with torch.cuda.amp.autocast(dtype=torch.bfloat16, enabled=self.enable_autocast): return super().forward(*inputs, **kwargs) @@ -182,4 +256,15 @@ def train_step(self, *inputs, **kwargs): def val_step(self, *inputs, **kwargs): with torch.cuda.amp.autocast(dtype=torch.bfloat16, enabled=self.enable_autocast): +======= + with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): + return super().forward(*inputs, **kwargs) + + def train_step(self, *inputs, **kwargs): + with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): + return super().train_step(*inputs, **kwargs) + + def val_step(self, *inputs, **kwargs): + with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): +>>>>>>> added support for OD on habana return super().val_step(*inputs, **kwargs) diff --git a/src/otx/algorithms/common/utils/__init__.py b/src/otx/algorithms/common/utils/__init__.py index 6395bd6e60d..23bb01ed20f 100644 --- a/src/otx/algorithms/common/utils/__init__.py +++ b/src/otx/algorithms/common/utils/__init__.py @@ -30,6 +30,7 @@ get_task_class, is_hpu_available, is_xpu_available, + is_hpu_available, load_template, read_py_config, set_random_seed, diff --git a/src/otx/algorithms/common/utils/utils.py b/src/otx/algorithms/common/utils/utils.py index 92e1b2f0853..c9b651244a2 100644 --- a/src/otx/algorithms/common/utils/utils.py +++ b/src/otx/algorithms/common/utils/utils.py @@ -17,6 +17,7 @@ import torch import yaml from addict import Dict as adict +import habana_frameworks.torch as htorch HPU_AVAILABLE = None try: diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index caf8720b59a..d3f49219a17 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -23,11 +23,15 @@ from mmdet.utils.util_distribution import build_dp, dp_factory from torchvision.ops import nms as tv_nms from torchvision.ops import roi_align as tv_roi_align +from torch.profiler import profile, record_function, ProfilerActivity -from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel +from habana_frameworks.torch.utils.library_loader import load_habana_module +from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel, HPUDataParallel ext_module = ext_loader.load_ext("_ext", ["nms", "softnms", "nms_match", "nms_rotated", "nms_quadri"]) dp_factory["xpu"] = XPUDataParallel +dp_factory["hpu"] = HPUDataParallel +load_habana_module() def auto_scale_lr(cfg, distributed, logger): @@ -119,6 +123,15 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times elif cfg.device == "xpu": model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, enable_autocast=bool(fp16_cfg)) model.to(f"xpu:{cfg.gpu_ids[0]}") + elif cfg.device == "hpu": + import habana_frameworks.torch.core as htcore + os.environ["PT_HPU_LAZY_MODE"] = "1" + assert len(cfg.gpu_ids) == 1 + # CHECK IT + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, is_autocast=bool(fp16_cfg)) + # model = HPUDataParallel(model, dim=0, device_ids=cfg.gpu_ids, is_autocast=bool(fp16_cfg)) + model.to(f"hpu:{cfg.gpu_ids[0]}") + htcore.mark_step() else: model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) @@ -137,6 +150,30 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times model.train() model, optimizer = torch.xpu.optimize(model, optimizer=optimizer, dtype=dtype) + if cfg.device == "hpu": + NMSop.forward = monkey_patched_xpu_nms + RoIAlign.forward = monkey_patched_xpu_roi_align + from otx.algorithms.common.adapters.mmcv.optimizer.hpu_optimizer import register_habana_optimizers + habana_optimizers = register_habana_optimizers() + if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in habana_optimizers: + cfg.optimizer["type"] = new_type + # activities = [torch.profiler.ProfilerActivity.CPU] + # activities.append(torch.profiler.ProfilerActivity.HPU) + # for epoch in range(10): + # for det_out in data_loaders[0]: + # img = det_out["img"].data[-1].to(torch.device("hpu")) + # img_metas = det_out["img_metas"].data[-1] + # gt_bboxes = [bbox.to(torch.device("hpu")) for bbox in det_out["gt_bboxes"].data[-1]] + # gt_labels = [label.to(torch.device("hpu")) for label in det_out["gt_labels"].data[-1]] + # with torch.profiler.profile( + # # schedule=torch.profiler.schedule(wait=0, warmup=20, active=5, repeat=1), + # activities=activities, + # on_trace_ready=torch.profiler.tensorboard_trace_handler('logs')) as profiler: + # model.module.forward_train(img, img_metas, gt_bboxes, gt_labels) + # print(profiler.key_averages().table()) + # # print(losses) + # breakpoint() + runner = build_runner( cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) ) @@ -220,7 +257,6 @@ def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_thr inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) bboxes = bboxes.to(device) scores = scores.to(device) - if max_num > 0: inds = inds[:max_num] if is_filtering_by_score: diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index b6e5e6ab2dd..4fbf6a9da7f 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -60,7 +60,6 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches assert len(scale_ranges) == num_scales num_classes = len(results) - recalls = np.zeros((num_scales, num_classes), dtype=np.float32) aps = np.zeros((num_scales, num_classes), dtype=np.float32) num_gts = np.zeros((num_scales, num_classes), dtype=int) @@ -376,7 +375,7 @@ def evaluate(self, results, logger, iou_thr, scale_ranges): metric: mAP and mIoU metric """ if self.domain == Domain.DETECTION: - return eval_map( + output = eval_map( results, self.annotation, scale_ranges=scale_ranges, @@ -384,4 +383,5 @@ def evaluate(self, results, logger, iou_thr, scale_ranges): dataset=self.classes, logger=logger, ) + return output return self.evaluate_mask(results, logger, iou_thr) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py index f690c38d86b..702da56c428 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py @@ -6,8 +6,10 @@ import functools import torch +import time from mmdet.models.builder import DETECTORS from mmdet.models.detectors.single_stage import SingleStageDetector +from mmdet.core import bbox2result from otx.algorithms.common.adapters.mmcv.hooks.recording_forward_hook import ( FeatureVectorHook, @@ -34,7 +36,7 @@ class CustomSingleStageDetector(SAMDetectorMixin, DetLossDynamicsTrackingMixin, L2SPDetectorMixin, SingleStageDetector): """SAM optimizer & L2SP regularizer enabled custom SSD.""" - TRACKING_LOSS_TYPE = (TrackingLossType.cls, TrackingLossType.bbox) + # TRACKING_LOSS_TYPE = (TrackingLossType.cls, TrackingLossType.bbox) def __init__(self, *args, task_adapt=None, **kwargs): super().__init__(*args, **kwargs) @@ -74,10 +76,44 @@ def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore=N batch_input_shape = tuple(img[0].size()[-2:]) for img_meta in img_metas: img_meta["batch_input_shape"] = batch_input_shape + ttt = time.time() x = self.extract_feat(img) + print("extract_feat", time.time() - ttt) losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore, **kwargs) return losses + def simple_test(self, img, img_metas, rescale=False): + """Test function without test-time augmentation. + + Args: + img (torch.Tensor): Images with shape (N, C, H, W). + img_metas (list[dict]): List of image information. + rescale (bool, optional): Whether to rescale the results. + Defaults to False. + + Returns: + list[list[np.ndarray]]: BBox results of each image and classes. + The outer list corresponds to each image. The inner list + corresponds to each class. + """ + feat = self.extract_feat(img) + results_list = self.bbox_head.simple_test( + feat, img_metas, rescale=rescale) + + # bbox_results = [] + # for det_bboxes, det_labels in results_list: + # if det_bboxes.dtype == torch.bfloat16: + # det_bboxes = det_bboxes.to(torch.float32) + # det_labels = det_labels.to(torch.float32) + # bbox_results.append(bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)) + # else: + # bbox_results.append(bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)) + bbox_results = [ + bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) + for det_bboxes, det_labels in results_list + ] + return bbox_results + @staticmethod def load_state_dict_pre_hook(model, model_classes, chkpt_classes, chkpt_dict, prefix, *args, **kwargs): """Modify input state_dict according to class name matching before weight loading.""" diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py index 6d5f1ce8427..42f37571457 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py @@ -13,6 +13,7 @@ from mmdet.models.dense_heads.ssd_head import SSDHead from mmdet.models.losses import smooth_l1_loss from torch import nn +import time from otx.algorithms.detection.adapters.mmdet.models.heads.cross_dataset_detector_head import TrackingLossDynamicsMixIn from otx.algorithms.detection.adapters.mmdet.models.loss_dyns import ( @@ -81,6 +82,32 @@ def _init_layers(self): nn.Conv2d(in_channel, num_base_priors * self.cls_out_channels, kernel_size=3, padding=1) ) + def forward(self, feats): + """Forward features from the upstream network. + + Args: + feats (tuple[Tensor]): Features from the upstream network, each is + a 4D-tensor. + + Returns: + tuple: + cls_scores (list[Tensor]): Classification scores for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * num_classes. + bbox_preds (list[Tensor]): Box energies / deltas for all scale + levels, each is a 4D-tensor, the channels number is + num_anchors * 4. + """ + cls_scores = [] + bbox_preds = [] + start = time.time() + for feat, reg_conv, cls_conv in zip(feats, self.reg_convs, + self.cls_convs): + cls_scores.append(cls_conv(feat)) + bbox_preds.append(reg_conv(feat)) + print("bbox_head_forward: ", time.time() - start) + return cls_scores, bbox_preds + def loss_single( self, cls_score, @@ -118,6 +145,7 @@ def loss_single( """ # Re-weigting BG loss + start1 = time.time() label_weights = label_weights.reshape(-1) if self.bg_loss_weight >= 0.0: neg_indices = labels == self.num_classes @@ -125,6 +153,7 @@ def loss_single( label_weights[neg_indices] = self.bg_loss_weight loss_cls_all = self.loss_cls(cls_score, labels, label_weights) + print("loss_cls_all: ", time.time() - start1) if len(loss_cls_all.shape) > 1: loss_cls_all = loss_cls_all.sum(-1) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes @@ -146,7 +175,10 @@ def loss_single( # TODO: We need to verify that this is working properly. # pylint: disable=redundant-keyword-arg + start = time.time() loss_bbox = self._get_loss_bbox(bbox_pred, bbox_targets, bbox_weights, num_total_samples) + print("loss_bbox: ", time.time() - start) + print("loss_single: ", time.time() - start1) return loss_cls[None], loss_bbox def _get_pos_inds(self, labels): @@ -172,7 +204,9 @@ def _get_loss_cls(self, num_total_samples, loss_cls_all, pos_inds, topk_loss_cls def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Loss function.""" + start = time.time() losses = super().loss(cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore) + print("loss_ALL: ", time.time() - start) losses_cls = losses["loss_cls"] losses_bbox = losses["loss_bbox"] diff --git a/src/otx/algorithms/detection/configs/detection/configuration.yaml b/src/otx/algorithms/detection/configs/detection/configuration.yaml index d36b0d941bc..7fb866e16df 100644 --- a/src/otx/algorithms/detection/configs/detection/configuration.yaml +++ b/src/otx/algorithms/detection/configs/detection/configuration.yaml @@ -129,7 +129,7 @@ learning_parameters: warning: null enable_early_stopping: affects_outcome_of: TRAINING - default_value: true + default_value: false description: Early exit from training when validation accuracy isn't changed or decreased for several epochs. editable: true header: Enable early stopping of the training diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py index 857021810d1..e734996e4b1 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py @@ -20,5 +20,5 @@ load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox\ /yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth" -fp16 = dict(loss_scale=512.0) +fp16 = None ignore = False diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml index 50e07835a96..f59b60c12b1 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml @@ -26,10 +26,10 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 4 + default_value: 16 auto_hpo_state: POSSIBLE inference_batch_size: - default_value: 4 + default_value: 16 learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py index 45847b0b80c..bc4eff90aa2 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py @@ -96,4 +96,5 @@ /models/object_detection/v2/mobilenet_v2-2s_ssd-992x736.pth" fp16 = dict(loss_scale=512.0) +# fp16 = None ignore = False diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml index 7b517542b35..3e8768caad4 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml @@ -36,7 +36,7 @@ hyper_parameters: learning_rate_warmup_iters: default_value: 3 num_iters: - default_value: 200 + default_value: 50 nncf_optimization: enable_quantization: default_value: true diff --git a/src/otx/recipes/stages/detection/incremental.py b/src/otx/recipes/stages/detection/incremental.py index 9ddd2e28e55..a78cea85e93 100644 --- a/src/otx/recipes/stages/detection/incremental.py +++ b/src/otx/recipes/stages/detection/incremental.py @@ -21,28 +21,28 @@ interval=1, priority=75, ), - dict( - type="EMAHook", - priority="ABOVE_NORMAL", - momentum=0.1, - ), + # dict( + # type="EMAHook", + # priority="ABOVE_NORMAL", + # momentum=0.1, + # ), ] -lr_config = dict( - policy="ReduceLROnPlateau", - metric="mAP", - patience=5, - iteration_patience=0, - interval=1, - min_lr=1e-06, - warmup="linear", - warmup_iters=200, - warmup_ratio=0.3333333333333333, -) +# lr_config = dict( +# policy="ReduceLROnPlateau", +# metric="mAP", +# patience=5, +# iteration_patience=0, +# interval=1, +# min_lr=1e-06, +# warmup="linear", +# warmup_iters=200, +# warmup_ratio=0.3333333333333333, +# ) ignore = True adaptive_validation_interval = dict( max_interval=5, - enable_adaptive_interval_hook=True, + enable_adaptive_interval_hook=False, enable_eval_before_run=True, ) From 1f04dc0b56a883f4ae00156d849fe3f17b6f3fa6 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Tue, 31 Oct 2023 16:05:36 +0000 Subject: [PATCH 02/16] added hpu_opt --- .../common/adapters/mmcv/hooks/hpu_optimizer_hook.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py index f5e26c49083..13eef26de2d 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py @@ -4,8 +4,15 @@ # SPDX-License-Identifier: Apache-2.0 # +<<<<<<< HEAD import habana_frameworks.torch.core as htcore from mmcv.runner import HOOKS, OptimizerHook +======= +from mmcv.runner import HOOKS, OptimizerHook +from mmcls.core import DistOptimizerHook +import time +import habana_frameworks.torch.core as htcore +>>>>>>> added hpu_opt @HOOKS.register_module() From b61cea7f1a98998d418c3859db9636a3fee89e16 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Mon, 6 Nov 2023 14:57:13 +0000 Subject: [PATCH 03/16] added OD support. --- .../detection/adapters/mmdet/apis/train.py | 20 +------------------ .../mmdet/models/heads/custom_yolox_head.py | 13 +++++++++++- .../cspdarknet_yolox_x/template.yaml | 2 +- 3 files changed, 14 insertions(+), 21 deletions(-) diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index d3f49219a17..d1767756d99 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -23,7 +23,6 @@ from mmdet.utils.util_distribution import build_dp, dp_factory from torchvision.ops import nms as tv_nms from torchvision.ops import roi_align as tv_roi_align -from torch.profiler import profile, record_function, ProfilerActivity from habana_frameworks.torch.utils.library_loader import load_habana_module from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel, HPUDataParallel @@ -127,11 +126,10 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times import habana_frameworks.torch.core as htcore os.environ["PT_HPU_LAZY_MODE"] = "1" assert len(cfg.gpu_ids) == 1 - # CHECK IT model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, is_autocast=bool(fp16_cfg)) - # model = HPUDataParallel(model, dim=0, device_ids=cfg.gpu_ids, is_autocast=bool(fp16_cfg)) model.to(f"hpu:{cfg.gpu_ids[0]}") htcore.mark_step() + model.zero_grad() else: model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) @@ -157,22 +155,6 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times habana_optimizers = register_habana_optimizers() if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in habana_optimizers: cfg.optimizer["type"] = new_type - # activities = [torch.profiler.ProfilerActivity.CPU] - # activities.append(torch.profiler.ProfilerActivity.HPU) - # for epoch in range(10): - # for det_out in data_loaders[0]: - # img = det_out["img"].data[-1].to(torch.device("hpu")) - # img_metas = det_out["img_metas"].data[-1] - # gt_bboxes = [bbox.to(torch.device("hpu")) for bbox in det_out["gt_bboxes"].data[-1]] - # gt_labels = [label.to(torch.device("hpu")) for label in det_out["gt_labels"].data[-1]] - # with torch.profiler.profile( - # # schedule=torch.profiler.schedule(wait=0, warmup=20, active=5, repeat=1), - # activities=activities, - # on_trace_ready=torch.profiler.tensorboard_trace_handler('logs')) as profiler: - # model.module.forward_train(img, img_metas, gt_bboxes, gt_labels) - # print(profiler.key_averages().table()) - # # print(losses) - # breakpoint() runner = build_runner( cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py index 5de9fc272ff..8e04b37d16a 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py @@ -43,6 +43,7 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. """ + num_imgs = len(img_metas) featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] mlvl_priors = self.prior_generator.grid_priors( @@ -151,6 +152,14 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m flatten_priors = torch.cat(mlvl_priors) flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds) + if "hpu" in flatten_cls_preds.device: + # put loss computastion on CPU -> faster, avoid errors + flatten_cls_preds = flatten_cls_preds.cpu() + flatten_bbox_preds = flatten_bbox_preds.cpu() + flatten_objectness = flatten_objectness.cpu() + flatten_priors = flatten_priors.cpu() + flatten_bboxes = flatten_bboxes.cpu() + # Init variables for loss dynamics tracking self.cur_batch_idx = 0 self.max_gt_bboxes_len = max([len(gt_bbox) for gt_bbox in gt_bboxes]) @@ -219,7 +228,6 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m if self.use_l1: loss_l1 = self.loss_l1(flatten_bbox_preds.view(-1, 4)[pos_masks], l1_targets) / num_total_samples loss_dict.update(loss_l1=loss_l1) - return loss_dict @torch.no_grad() @@ -245,6 +253,9 @@ def _get_target_single(self, cls_preds, objectness, priors, decoded_bboxes, gt_b num_priors = priors.size(0) num_gts = gt_labels.size(0) gt_bboxes = gt_bboxes.to(decoded_bboxes.dtype) + if "hpu" in gt_bboxes.device: + gt_bboxes = gt_bboxes.cpu() + gt_labels = gt_labels.cpu() # No target if num_gts == 0: cls_target = cls_preds.new_zeros((0, self.num_classes)) diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml index f59b60c12b1..ca1d67ef754 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml @@ -36,7 +36,7 @@ hyper_parameters: learning_rate_warmup_iters: default_value: 3 num_iters: - default_value: 200 + default_value: 20 nncf_optimization: enable_quantization: default_value: true From bcc2408753f18f44f050fd496f2a248a2a8dcfd9 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Tue, 7 Nov 2023 12:42:03 +0000 Subject: [PATCH 04/16] optimize a bit YOLOX. Now, inference is fast. Training still freezes --- .../adapters/mmcv/hooks/hpu_optimizer_hook.py | 7 -- .../utils/_builder_build_data_parallel.py | 85 ------------------- .../detection/adapters/mmdet/apis/train.py | 5 +- .../adapters/mmdet/evaluation/evaluator.py | 21 +++-- .../mmdet/models/heads/custom_yolox_head.py | 27 ++++-- 5 files changed, 38 insertions(+), 107 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py index 13eef26de2d..f5e26c49083 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py @@ -4,15 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 # -<<<<<<< HEAD import habana_frameworks.torch.core as htcore from mmcv.runner import HOOKS, OptimizerHook -======= -from mmcv.runner import HOOKS, OptimizerHook -from mmcls.core import DistOptimizerHook -import time -import habana_frameworks.torch.core as htcore ->>>>>>> added hpu_opt @HOOKS.register_module() diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index 206316db800..39c9bf5f7b3 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -12,13 +12,7 @@ from mmcv import Config from mmcv.parallel import MMDataParallel, MMDistributedDataParallel -<<<<<<< HEAD from otx.algorithms.common.utils import is_hpu_available, is_xpu_available -======= -from otx.algorithms.common.utils import is_xpu_available, is_hpu_available -import habana_frameworks.torch as htorch -from torch._utils import _get_device_index ->>>>>>> added support for OD on habana @overload @@ -144,63 +138,12 @@ def val_step(self, *inputs, **kwargs): with torch.autocast(device_type="xpu", dtype=torch.bfloat16, enabled=self.enable_autocast): return super().val_step(*inputs, **kwargs) -<<<<<<< HEAD class HPUDataParallel(MMDataParallel): def __init__(self, *args, enable_autocast: bool = False, **kwargs): super().__init__(*args, **kwargs) self.enable_autocast = enable_autocast self.src_device_obj = torch.device("hpu", self.device_ids[0]) -======= -def _get_available_device_type(): - if torch.cuda.is_available(): - return "cuda" - if hasattr(torch, "xpu") and torch.xpu.is_available(): # type: ignore[attr-defined] - return "xpu" - if is_hpu_available(): - return "hpu" - # add more available device types here - return None - - -def _get_device_attr(get_member): - device_type = _get_available_device_type() - if device_type and device_type.lower() == "cuda": - return get_member(torch.cuda) - if device_type and device_type.lower() == "xpu": - return get_member(torch.xpu) # type: ignore[attr-defined] - if device_type and device_type.lower() == "hpu": - return get_member(htorch.hpu) - # add more available device types here - return None - - -def _get_all_device_indices(): - # all device index - return _get_device_attr(lambda m: list(range(m.device_count()))) - - -class HPUDataParallel(MMDataParallel): - def __init__(self, module, device_ids=None, output_device=None, dim=0, is_autocast=True): - super().__init__(module=module) - device_type = _get_available_device_type() - if device_type is None: - self.module = module - self.device_ids = [] - return - - if device_ids is None: - device_ids = _get_all_device_indices() - - if output_device is None: - output_device = device_ids[0] - - self.dim = dim - self.device_ids = [_get_device_index(x, True) for x in device_ids] - self.output_device = _get_device_index(output_device, True) - self.src_device_obj = torch.device(device_type, self.device_ids[0]) - self.is_autocast = is_autocast ->>>>>>> added support for OD on habana def scatter(self, inputs, kwargs, device_ids): inputs, kwargs = super().scatter(inputs, kwargs, [-1]) @@ -211,42 +154,25 @@ def scatter(self, inputs, kwargs, device_ids): if isinstance(val, dict): for k in val: if isinstance(val[k], torch.Tensor): -<<<<<<< HEAD val[k] = val[k].to(self.src_device_obj) elif isinstance(val[k], list): for i, item in enumerate(val[k]): if isinstance(item, torch.Tensor): val[k][i] = item.to(self.src_device_obj) -======= - val[k] = val[k].to(torch.device(f"hpu:{device_ids[0]}")) - elif isinstance(val[k], list): - for i, item in enumerate(val[k]): - if isinstance(item, torch.Tensor): - val[k][i] = item.to(torch.device(f"hpu:{device_ids[0]}")) ->>>>>>> added support for OD on habana for x in kwargs: if isinstance(x, dict): for k in x: if isinstance(x[k], torch.Tensor): -<<<<<<< HEAD x[k] = x[k].to(f"hpu:{device_ids[0]}") elif isinstance(x[k], list): for i, item in enumerate(x[k]): if isinstance(item, torch.Tensor): x[k][i] = item.to(self.src_device_obj) -======= - x[k] = x[k].to("hpu") - elif isinstance(x[k], list): - for i, item in enumerate(x[k]): - if isinstance(item, torch.Tensor): - x[k][i] = item.to(torch.device(f"hpu:{device_ids[0]}")) ->>>>>>> added support for OD on habana return inputs, kwargs def forward(self, *inputs, **kwargs): -<<<<<<< HEAD with torch.cuda.amp.autocast(dtype=torch.bfloat16, enabled=self.enable_autocast): return super().forward(*inputs, **kwargs) @@ -256,15 +182,4 @@ def train_step(self, *inputs, **kwargs): def val_step(self, *inputs, **kwargs): with torch.cuda.amp.autocast(dtype=torch.bfloat16, enabled=self.enable_autocast): -======= - with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): - return super().forward(*inputs, **kwargs) - - def train_step(self, *inputs, **kwargs): - with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): - return super().train_step(*inputs, **kwargs) - - def val_step(self, *inputs, **kwargs): - with torch.autocast(device_type="hpu", dtype=torch.bfloat16, enabled=self.is_autocast): ->>>>>>> added support for OD on habana return super().val_step(*inputs, **kwargs) diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index d1767756d99..819e51f8f6b 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -126,8 +126,9 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times import habana_frameworks.torch.core as htcore os.environ["PT_HPU_LAZY_MODE"] = "1" assert len(cfg.gpu_ids) == 1 - model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, is_autocast=bool(fp16_cfg)) - model.to(f"hpu:{cfg.gpu_ids[0]}") + model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, + is_autocast=bool(fp16_cfg), put_gt_on_device=False) + model.to(f"hpu:{cfg.gpu_ids[0]}", non_blocking=True) htcore.mark_step() model.zero_grad() else: diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index 4fbf6a9da7f..553b9bc7543 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -16,6 +16,7 @@ import multiprocessing as mp from typing import Dict, List, Tuple, Union +import time import mmcv import numpy as np @@ -25,12 +26,14 @@ from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps from mmdet.core.evaluation.class_names import get_classes from mmdet.core.evaluation.mean_ap import average_precision +from mmdet.core.evaluation import mean_ap from terminaltables import AsciiTable from otx.api.entities.label import Domain from otx.api.utils.time_utils import timeit + def print_map_summary( # pylint: disable=too-many-locals,too-many-branches mean_ap, results, dataset=None, scale_ranges=None, logger=None ): @@ -59,6 +62,7 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches if scale_ranges is not None: assert len(scale_ranges) == num_scales + segmentation = "miou" in results num_classes = len(results) recalls = np.zeros((num_scales, num_classes), dtype=np.float32) aps = np.zeros((num_scales, num_classes), dtype=np.float32) @@ -68,7 +72,8 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches if cls_result["recall"].size > 0: recalls[:, i] = np.array(cls_result["recall"], ndmin=2)[:, -1] aps[:, i] = cls_result["ap"] - mious[:, i] = cls_result["miou"] + if segmentation: + mious[:, i] = cls_result["miou"] num_gts[:, i] = cls_result["num_gts"] if dataset is None: @@ -81,7 +86,9 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches if not isinstance(mean_ap, list): mean_ap = [mean_ap] - header = ["class", "gts", "dets", "recall", "ap", "miou"] + header = ["class", "gts", "dets", "recall", "ap"] + if segmentation: + header.append("miou") for i in range(num_scales): if scale_ranges is not None: print_log(f"Scale range {scale_ranges[i]}", logger=logger) @@ -92,13 +99,16 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches num_gts[i, j], results[j]["num_dets"], f"{recalls[i, j]:.3f}", - f"{aps[i, j]:.3f}", - f"{mious[i, j]:.3f}", + f"{aps[i, j]:.3f}" ] + if segmentation: + row_data.append(f"{mious[i, j]:.3f}") table_data.append(row_data) - table_data.append(["mAP", "", "", "", f"{mean_ap[i]:.3f}", f"{np.mean(mious[i]):.3f}"]) + table_ = ["mAP", "", "", "", f"{mean_ap[i]:.3f}", f"{np.mean(mious[i]):.3f}"] if segmentation else ["mAP", "", "", "", f"{mean_ap[i]:.3f}"] + table_data.append(table_) table = AsciiTable(table_data) table.inner_footing_row_border = True + time.sleep(0.1) # prevent segmentation fault print_log("\n" + table.table, logger=logger) @@ -244,6 +254,7 @@ def __init__(self, annotation: List[Dict], domain: Domain, classes: List[str], n else: self.annotation = annotation self.nproc = nproc + mean_ap.print_map_summary = print_map_summary def get_gt_instance_masks(self, annotation: List[Dict]): """Format ground truth instance mask annotation. diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py index 8e04b37d16a..69fb668e2f3 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py @@ -104,6 +104,25 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m return loss_dict + def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg, + conv_obj): + """Forward feature of a single scale level.""" + + cls_feat = cls_convs(x) + reg_feat = reg_convs(x) + + cls_score = conv_cls(cls_feat) + bbox_pred = conv_reg(reg_feat) + objectness = conv_obj(reg_feat) + + if cls_score.device.type == "hpu": + # put on cpu for further post-processing + cls_score = cls_score.cpu() + bbox_pred = bbox_pred.cpu() + objectness = objectness.cpu() + + return cls_score, bbox_pred, objectness + @HEADS.register_module() class CustomYOLOXHeadTrackingLossDynamics(TrackingLossDynamicsMixIn, CustomYOLOXHead): @@ -152,14 +171,6 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m flatten_priors = torch.cat(mlvl_priors) flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds) - if "hpu" in flatten_cls_preds.device: - # put loss computastion on CPU -> faster, avoid errors - flatten_cls_preds = flatten_cls_preds.cpu() - flatten_bbox_preds = flatten_bbox_preds.cpu() - flatten_objectness = flatten_objectness.cpu() - flatten_priors = flatten_priors.cpu() - flatten_bboxes = flatten_bboxes.cpu() - # Init variables for loss dynamics tracking self.cur_batch_idx = 0 self.max_gt_bboxes_len = max([len(gt_bbox) for gt_bbox in gt_bboxes]) From 2455fe7d922a53dc59f78acbf2ae5947b3f0255b Mon Sep 17 00:00:00 2001 From: kprokofi Date: Tue, 7 Nov 2023 16:59:25 +0000 Subject: [PATCH 05/16] SSD, ATSS e2e training --- .../mmcv/hooks/recording_forward_hook.py | 2 ++ .../utils/_builder_build_data_parallel.py | 4 +-- .../detectors/custom_single_stage_detector.py | 11 ------ .../mmdet/models/heads/custom_atss_head.py | 35 +++++++++++++++++++ .../mmdet/models/heads/custom_ssd_head.py | 19 +++++----- .../detection/mobilenetv2_ssd/template.yaml | 2 +- .../recipes/stages/detection/incremental.py | 32 ++++++++--------- 7 files changed, 64 insertions(+), 41 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py index 062cc230367..d4df8bbbc22 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py @@ -74,6 +74,8 @@ def _recording_forward( ): # pylint: disable=unused-argument tensors = self.func(output) if isinstance(tensors, torch.Tensor): + if tensors.dtype == torch.bfloat16: + tensors = tensors.to(torch.float32) tensors_np = tensors.detach().cpu().numpy() elif isinstance(tensors, np.ndarray): tensors_np = tensors diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index 39c9bf5f7b3..df1a651bbc8 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -64,8 +64,8 @@ def build_data_parallel( model = model.xpu() model = XPUDataParallel(model, device_ids=config.gpu_ids) elif is_hpu_available() and config.get("gpu_ids", []): - model = model.hpu() - model = HPUDataParallel(model, device_ids=config.gpu_ids) + model = model.to("hpu") + model = HPUDataParallel(model, device_ids=config.gpu_ids, put_gt_on_device=False) elif torch.cuda.is_available() and config.get("gpu_ids", []): if distributed: model = model.cuda() diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py index 702da56c428..07bce4440ed 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py @@ -40,7 +40,6 @@ class CustomSingleStageDetector(SAMDetectorMixin, DetLossDynamicsTrackingMixin, def __init__(self, *args, task_adapt=None, **kwargs): super().__init__(*args, **kwargs) - # Hook for class-sensitive weight loading if task_adapt: self._register_load_state_dict_pre_hook( @@ -76,9 +75,7 @@ def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore=N batch_input_shape = tuple(img[0].size()[-2:]) for img_meta in img_metas: img_meta["batch_input_shape"] = batch_input_shape - ttt = time.time() x = self.extract_feat(img) - print("extract_feat", time.time() - ttt) losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore, **kwargs) return losses @@ -100,14 +97,6 @@ def simple_test(self, img, img_metas, rescale=False): results_list = self.bbox_head.simple_test( feat, img_metas, rescale=rescale) - # bbox_results = [] - # for det_bboxes, det_labels in results_list: - # if det_bboxes.dtype == torch.bfloat16: - # det_bboxes = det_bboxes.to(torch.float32) - # det_labels = det_labels.to(torch.float32) - # bbox_results.append(bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)) - # else: - # bbox_results.append(bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)) bbox_results = [ bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) for det_bboxes, det_labels in results_list diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py index 477790e0d4d..3d708de4460 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py @@ -49,6 +49,41 @@ def __init__(self, *args, bg_loss_weight=-1.0, use_qfl=False, qfl_cfg=None, **kw self.bg_loss_weight = bg_loss_weight self.use_qfl = use_qfl + def forward_single(self, x, scale): + """Forward feature of a single scale level. + + Args: + x (Tensor): Features of a single scale level. + scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize + the bbox prediction. + + Returns: + tuple: + cls_score (Tensor): Cls scores for a single scale level + the channels number is num_anchors * num_classes. + bbox_pred (Tensor): Box energies / deltas for a single scale + level, the channels number is num_anchors * 4. + centerness (Tensor): Centerness for a single scale level, the + channel number is (N, num_anchors * 1, H, W). + """ + cls_feat = x + reg_feat = x + for cls_conv in self.cls_convs: + cls_feat = cls_conv(cls_feat) + for reg_conv in self.reg_convs: + reg_feat = reg_conv(reg_feat) + cls_score = self.atss_cls(cls_feat) + # we just follow atss, not apply exp in bbox_pred + bbox_pred = scale(self.atss_reg(reg_feat)).float() + centerness = self.atss_centerness(reg_feat) + if cls_score.device.type == "hpu": + # put further post-processing on cpu + cls_score = cls_score.cpu() + bbox_pred = bbox_pred.cpu() + centerness = centerness.cpu() + + return cls_score, bbox_pred, centerness + @force_fp32(apply_to=("cls_scores", "bbox_preds", "centernesses")) def loss(self, cls_scores, bbox_preds, centernesses, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Compute losses of the head. diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py index 42f37571457..1ab2e6a8bbe 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py @@ -100,12 +100,16 @@ def forward(self, feats): """ cls_scores = [] bbox_preds = [] - start = time.time() for feat, reg_conv, cls_conv in zip(feats, self.reg_convs, self.cls_convs): - cls_scores.append(cls_conv(feat)) - bbox_preds.append(reg_conv(feat)) - print("bbox_head_forward: ", time.time() - start) + cls_out = cls_conv(feat) + reg_out = reg_conv(feat) + if cls_out.device.type == "hpu": + cls_scores.append(cls_out.cpu()) + bbox_preds.append(reg_out.cpu()) + else: + cls_scores.append(cls_out) + bbox_preds.append(reg_out) return cls_scores, bbox_preds def loss_single( @@ -145,7 +149,6 @@ def loss_single( """ # Re-weigting BG loss - start1 = time.time() label_weights = label_weights.reshape(-1) if self.bg_loss_weight >= 0.0: neg_indices = labels == self.num_classes @@ -153,7 +156,6 @@ def loss_single( label_weights[neg_indices] = self.bg_loss_weight loss_cls_all = self.loss_cls(cls_score, labels, label_weights) - print("loss_cls_all: ", time.time() - start1) if len(loss_cls_all.shape) > 1: loss_cls_all = loss_cls_all.sum(-1) # FG cat_id: [0, num_classes -1], BG cat_id: num_classes @@ -175,10 +177,7 @@ def loss_single( # TODO: We need to verify that this is working properly. # pylint: disable=redundant-keyword-arg - start = time.time() loss_bbox = self._get_loss_bbox(bbox_pred, bbox_targets, bbox_weights, num_total_samples) - print("loss_bbox: ", time.time() - start) - print("loss_single: ", time.time() - start1) return loss_cls[None], loss_bbox def _get_pos_inds(self, labels): @@ -204,9 +203,7 @@ def _get_loss_cls(self, num_total_samples, loss_cls_all, pos_inds, topk_loss_cls def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore=None): """Loss function.""" - start = time.time() losses = super().loss(cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore) - print("loss_ALL: ", time.time() - start) losses_cls = losses["loss_cls"] losses_bbox = losses["loss_bbox"] diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml index 3e8768caad4..7b517542b35 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/template.yaml @@ -36,7 +36,7 @@ hyper_parameters: learning_rate_warmup_iters: default_value: 3 num_iters: - default_value: 50 + default_value: 200 nncf_optimization: enable_quantization: default_value: true diff --git a/src/otx/recipes/stages/detection/incremental.py b/src/otx/recipes/stages/detection/incremental.py index a78cea85e93..692a7dbee7c 100644 --- a/src/otx/recipes/stages/detection/incremental.py +++ b/src/otx/recipes/stages/detection/incremental.py @@ -21,24 +21,24 @@ interval=1, priority=75, ), - # dict( - # type="EMAHook", - # priority="ABOVE_NORMAL", - # momentum=0.1, - # ), + dict( + type="EMAHook", + priority="ABOVE_NORMAL", + momentum=0.1, + ), ] -# lr_config = dict( -# policy="ReduceLROnPlateau", -# metric="mAP", -# patience=5, -# iteration_patience=0, -# interval=1, -# min_lr=1e-06, -# warmup="linear", -# warmup_iters=200, -# warmup_ratio=0.3333333333333333, -# ) +lr_config = dict( + policy="ReduceLROnPlateau", + metric="mAP", + patience=5, + iteration_patience=0, + interval=1, + min_lr=1e-06, + warmup="linear", + warmup_iters=200, + warmup_ratio=0.3333333333333333, +) ignore = True adaptive_validation_interval = dict( From 322aaf265c4b13562033e321a7e3484d29651e6e Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 12:31:13 +0000 Subject: [PATCH 06/16] stabilize mask rcnn a bit --- .../adapters/mmdet/models/heads/__init__.py | 2 + .../mmdet/models/heads/custom_roi_head.py | 79 +++++++++++++++++++ .../mmdet/models/heads/custom_rpn_head.py | 26 ++++++ .../resnet50_maskrcnn/model.py | 2 +- .../resnet50_maskrcnn/template.yaml | 4 +- 5 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py index 28da39d0a1b..a0a410f3035 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py @@ -14,6 +14,7 @@ from .custom_vfnet_head import CustomVFNetHead from .custom_yolox_head import CustomYOLOXHead from .detr_head import DETRHeadExtension +from .custom_rpn_head import CustomRPNHead __all__ = [ "CrossDatasetDetectorHead", @@ -27,6 +28,7 @@ "CustomVFNetHead", "CustomYOLOXHead", "DETRHeadExtension", + "CustomRPNHead", # Loss dynamics tracking "CustomATSSHeadTrackingLossDynamics", ] diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index 05902fc9e70..fbd8619c0e8 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -33,6 +33,39 @@ def init_bbox_head(self, bbox_roi_extractor, bbox_head): bbox_head.type = "CustomConvFCBBoxHead" self.bbox_head = build_head(bbox_head) + + def _bbox_forward(self, x, rois): + """Box head forward function used in both training and testing.""" + # TODO: a more flexible way to decide which feature maps to use + bbox_feats = self.bbox_roi_extractor( + x[:self.bbox_roi_extractor.num_inputs], rois) + if self.with_shared_head: + bbox_feats = self.shared_head(bbox_feats) + cls_score, bbox_pred = self.bbox_head(bbox_feats) + + bbox_results = dict( + cls_score=cls_score, bbox_pred=bbox_pred, bbox_feats=bbox_feats) + return bbox_results + + def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None): + """Mask head forward function used in both training and testing.""" + assert ((rois is not None) ^ + (pos_inds is not None and bbox_feats is not None)) + if rois is not None: + mask_feats = self.mask_roi_extractor( + x[:self.mask_roi_extractor.num_inputs], rois) + if self.with_shared_head: + mask_feats = self.shared_head(mask_feats) + else: + assert bbox_feats is not None + mask_feats = bbox_feats[pos_inds] + + mask_pred = self.mask_head(mask_feats) + if mask_pred.device.type == "hpu": + mask_pred = mask_pred.cpu() + mask_results = dict(mask_pred=mask_pred, mask_feats=mask_feats) + return mask_results + def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels, img_metas): """Run forward function and calculate loss for box head in training.""" rois = bbox2roi([res.bboxes for res in sampling_results]) @@ -125,6 +158,52 @@ def get_targets(self, sampling_results, gt_bboxes, gt_labels, img_metas, rcnn_tr valid_label_mask = torch.cat(valid_label_mask, 0) return labels, label_weights, bbox_targets, bbox_weights, valid_label_mask + + def forward(self, x): + # shared part + if self.num_shared_convs > 0: + for conv in self.shared_convs: + x = conv(x) + + if self.num_shared_fcs > 0: + if self.with_avg_pool: + x = self.avg_pool(x) + + x = x.flatten(1) + + for fc in self.shared_fcs: + x = self.relu(fc(x)) + # separate branches + x_cls = x + x_reg = x + + for conv in self.cls_convs: + x_cls = conv(x_cls) + if x_cls.dim() > 2: + if self.with_avg_pool: + x_cls = self.avg_pool(x_cls) + x_cls = x_cls.flatten(1) + for fc in self.cls_fcs: + x_cls = self.relu(fc(x_cls)) + + for conv in self.reg_convs: + x_reg = conv(x_reg) + if x_reg.dim() > 2: + if self.with_avg_pool: + x_reg = self.avg_pool(x_reg) + x_reg = x_reg.flatten(1) + for fc in self.reg_fcs: + x_reg = self.relu(fc(x_reg)) + + cls_score = self.fc_cls(x_cls) if self.with_cls else None + bbox_pred = self.fc_reg(x_reg) if self.with_reg else None + if cls_score.device.type == 'hpu': + cls_score = cls_score.cpu() + bbox_pred = bbox_pred.cpu() + + return cls_score, bbox_pred + + @force_fp32(apply_to=("cls_score", "bbox_pred")) def loss( self, diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py new file mode 100644 index 00000000000..84d23757cca --- /dev/null +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py @@ -0,0 +1,26 @@ +import torch.nn.functional as F +from mmdet.models.dense_heads import RPNHead +from mmdet.models.builder import HEADS + + +@HEADS.register_module() +class CustomRPNHead(RPNHead): + """RPN head. + + Args: + in_channels (int): Number of channels in the input feature map. + init_cfg (dict or list[dict], optional): Initialization config dict. + num_convs (int): Number of convolution layers in the head. Default 1. + """ + + def forward_single(self, x): + """Forward feature map of a single scale level.""" + x = self.rpn_conv(x) + x = F.relu(x, inplace=False) + rpn_cls_score = self.rpn_cls(x) + rpn_bbox_pred = self.rpn_reg(x) + if rpn_cls_score.device.type == "hpu": + rpn_cls_score = rpn_cls_score.cpu() + rpn_bbox_pred = rpn_bbox_pred.cpu() + + return rpn_cls_score, rpn_bbox_pred diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py index 6832028e425..d8918edc33f 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py @@ -33,7 +33,7 @@ num_outs=5, ), rpn_head=dict( - type="RPNHead", + type="CustomRPNHead", in_channels=256, feat_channels=256, anchor_generator=dict( diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml index 17a74b1c25e..a77d5a22c47 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -26,10 +26,10 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 4 + default_value: 16 auto_hpo_state: POSSIBLE inference_batch_size: - default_value: 1 + default_value: 16 learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE From 16b8d67351d5b997a99d7387818cac5d8298b2aa Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 12:42:46 +0000 Subject: [PATCH 07/16] don't put gt on hpu for OD --- .../adapters/mmcv/utils/_builder_build_data_parallel.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index df1a651bbc8..ae865bd10b6 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -140,9 +140,10 @@ def val_step(self, *inputs, **kwargs): class HPUDataParallel(MMDataParallel): - def __init__(self, *args, enable_autocast: bool = False, **kwargs): + def __init__(self, *args, enable_autocast: bool = False, put_gt_on_device=True, **kwargs): super().__init__(*args, **kwargs) self.enable_autocast = enable_autocast + self.put_gt_on_device = put_gt_on_device self.src_device_obj = torch.device("hpu", self.device_ids[0]) def scatter(self, inputs, kwargs, device_ids): @@ -153,6 +154,8 @@ def scatter(self, inputs, kwargs, device_ids): for val in x: if isinstance(val, dict): for k in val: + if not self.put_gt_on_device and k.startswith("gt_"): + continue if isinstance(val[k], torch.Tensor): val[k] = val[k].to(self.src_device_obj) elif isinstance(val[k], list): From 508e0afbfe4a07951253c7bf44eee28d90b4f0c6 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 12:45:25 +0000 Subject: [PATCH 08/16] minor fix --- .../algorithms/common/adapters/mmcv/configurer.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/configurer.py b/src/otx/algorithms/common/adapters/mmcv/configurer.py index 3d0e936aef2..f2c6aaec6ee 100644 --- a/src/otx/algorithms/common/adapters/mmcv/configurer.py +++ b/src/otx/algorithms/common/adapters/mmcv/configurer.py @@ -176,11 +176,7 @@ def configure_device(self, cfg): elif "gpu_ids" not in cfg: cfg.gpu_ids = range(1) -<<<<<<< HEAD - # consider "cuda", "hpu" and "cpu" device only -======= # consider "cuda", "xpu", "hpu" and "cpu" device only ->>>>>>> added support for OD on habana if is_hpu_available(): cfg.device = "hpu" elif torch.cuda.is_available(): @@ -188,10 +184,6 @@ def configure_device(self, cfg): elif is_xpu_available(): try: import intel_extension_for_pytorch as ipex # noqa: F401 -<<<<<<< HEAD - -======= ->>>>>>> added support for OD on habana cfg.device = "xpu" except ModuleNotFoundError: cfg.device = "cpu" @@ -270,7 +262,6 @@ def configure_fp16(cfg: Config): distributed = getattr(cfg, "distributed", False) opts: Dict[str, Any] = {} if fp16_config is not None: -<<<<<<< HEAD if is_hpu_available(): if optim_type == "SAMOptimizerHook": # TODO (sungchul): consider SAM optimizer @@ -278,9 +269,6 @@ def configure_fp16(cfg: Config): opts["type"] = "HPUOptimizerHook" cfg.optimizer_config.update(opts) elif torch.cuda.is_available() or is_xpu_available(): -======= - if torch.cuda.is_available() or is_xpu_available(): ->>>>>>> added support for OD on habana opts.update({"distributed": distributed, **fp16_config}) if optim_type == "SAMOptimizerHook": opts["type"] = "Fp16SAMOptimizerHook" From 7f2c17831decec355e90fdaa956f62727d18cf29 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 15:10:45 +0000 Subject: [PATCH 09/16] Enable e2e training for Instance Segmentation. --- .../common/adapters/mmcv/hooks/__init__.py | 5 +-- .../adapters/mmcv/hooks/hpu_optimizer_hook.py | 1 + .../detection/adapters/mmdet/apis/train.py | 24 ++++++++++--- .../mmdet/models/heads/custom_roi_head.py | 36 +++++++++++++++++++ .../instance_segmentation/configuration.yaml | 4 +-- .../efficientnetb2b_maskrcnn/model.py | 2 +- .../maskrcnn_swin_t/model.py | 3 +- .../resnet50_maskrcnn/model.py | 2 +- .../instance-segmentation/incremental.py | 6 ++-- .../stages/instance-segmentation/train.py | 7 ++-- 10 files changed, 69 insertions(+), 21 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py index a6dd4ea965c..75113aefa33 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py @@ -52,7 +52,6 @@ from .semisl_cls_hook import SemiSLClsHook from .task_adapt_hook import TaskAdaptHook from .two_crop_transform_hook import TwoCropTransformHook -from .hpu_optimizer_hook import HPUOptimizerHook, HPUDistOptimizerHook __all__ = [ "AdaptiveRepeatDataHook", @@ -90,9 +89,7 @@ "TwoCropTransformHook", "MeanTeacherHook", "MemCacheHook", - "LossDynamicsTrackingHook", - "HPUOptimizerHook", - "HPUDistOptimizerHook", + "LossDynamicsTrackingHook" ] try: diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py index f5e26c49083..292cbe8aa18 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py @@ -17,6 +17,7 @@ def after_train_iter(self, runner): runner.optimizer.zero_grad() if self.detect_anomalous_params: self.detect_anomalous_parameters(runner.outputs["loss"], runner) + runner.outputs["loss"].backward() htcore.mark_step() diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index 819e51f8f6b..802ed4e467e 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -26,6 +26,8 @@ from habana_frameworks.torch.utils.library_loader import load_habana_module from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel, HPUDataParallel +from otx.algorithms.common.adapters.mmcv.utils.hpu_optimizers import HABANA_OPTIMIZERS + ext_module = ext_loader.load_ext("_ext", ["nms", "softnms", "nms_match", "nms_rotated", "nms_quadri"]) dp_factory["xpu"] = XPUDataParallel @@ -127,7 +129,7 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times os.environ["PT_HPU_LAZY_MODE"] = "1" assert len(cfg.gpu_ids) == 1 model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, - is_autocast=bool(fp16_cfg), put_gt_on_device=False) + enable_autocast=bool(fp16_cfg), put_gt_on_device=False) model.to(f"hpu:{cfg.gpu_ids[0]}", non_blocking=True) htcore.mark_step() model.zero_grad() @@ -136,6 +138,8 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times # build optimizer auto_scale_lr(cfg, distributed, logger) + if cfg.device == "hpu": + cfg.optimizer = patch_optimizer(cfg.optimizer) optimizer = build_optimizer(model, cfg.optimizer) if cfg.device == "xpu": @@ -152,10 +156,10 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times if cfg.device == "hpu": NMSop.forward = monkey_patched_xpu_nms RoIAlign.forward = monkey_patched_xpu_roi_align - from otx.algorithms.common.adapters.mmcv.optimizer.hpu_optimizer import register_habana_optimizers - habana_optimizers = register_habana_optimizers() - if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in habana_optimizers: - cfg.optimizer["type"] = new_type + # build runner + if cfg.device == "hpu": + if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in HABANA_OPTIMIZERS: + cfg.optimizer["type"] = new_type runner = build_runner( cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) @@ -217,6 +221,16 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow) +def patch_optimizer(cfg_optim): + "Patch optimizer for OD and IS" + if cfg_optim["type"] == "SGD": + return cfg_optim + + # Only SGD for OD and IS supported by now on HPU + cfg_optim["type"] = "SGD" + if "betas" in cfg_optim: + del cfg_optim["betas"] + return cfg_optim def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): """Runs MMCVs NMS with torchvision.nms, or forces NMS from MMCV to run on CPU.""" diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index fbd8619c0e8..89652c80f5a 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -63,6 +63,8 @@ def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None): mask_pred = self.mask_head(mask_feats) if mask_pred.device.type == "hpu": mask_pred = mask_pred.cpu() + mask_feats = mask_feats.cpu() + mask_results = dict(mask_pred=mask_pred, mask_feats=mask_feats) return mask_results @@ -87,6 +89,40 @@ def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels, img_met bbox_results.update(loss_bbox=loss_bbox) return bbox_results + def _mask_forward_train(self, x, sampling_results, bbox_feats, gt_masks, + img_metas): + """Run forward function and calculate loss for mask head in + training.""" + if not self.share_roi_extractor: + pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results]) + mask_results = self._mask_forward(x, pos_rois) + else: + pos_inds = [] + device = bbox_feats.device + for res in sampling_results: + pos_inds.append( + torch.ones( + res.pos_bboxes.shape[0], + device=device, + dtype=torch.uint8)) + pos_inds.append( + torch.zeros( + res.neg_bboxes.shape[0], + device=device, + dtype=torch.uint8)) + pos_inds = torch.cat(pos_inds) + + mask_results = self._mask_forward( + x, pos_inds=pos_inds, bbox_feats=bbox_feats) + + mask_targets = self.mask_head.get_targets(sampling_results, gt_masks, + self.train_cfg) + pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results]) + loss_mask = self.mask_head.loss(mask_results['mask_pred'], + mask_targets, pos_labels) + + mask_results.update(loss_mask=loss_mask, mask_targets=mask_targets) + return mask_results @HEADS.register_module() class CustomConvFCBBoxHead(Shared2FCBBoxHead, CrossDatasetDetectorHead): diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml index f0672ae5ff8..c49981e2c66 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml @@ -194,7 +194,7 @@ learning_parameters: warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING - default_value: true + default_value: false description: Depending on the size of iteration per epoch, adaptively update the validation interval and related values. editable: true header: Use adaptive validation interval @@ -208,7 +208,7 @@ learning_parameters: warning: This will automatically control the patience and interval when early stopping is enabled. auto_adapt_batch_size: affects_outcome_of: TRAINING - default_value: Safe + default_value: None description: Safe => Prevent GPU out of memory. Full => Find a batch size using most of GPU memory. editable: true enum_name: BatchSizeAdaptType diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/model.py index 72ca9481ef3..03cb21733dc 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/model.py @@ -28,7 +28,7 @@ type="CustomMaskRCNN", # Use CustomMaskRCNN for Incremental Learning neck=dict(type="FPN", in_channels=[24, 48, 120, 352], out_channels=80, num_outs=5), rpn_head=dict( - type="RPNHead", + type="CustomRPNHead", in_channels=80, feat_channels=80, anchor_generator=dict(type="AnchorGenerator", scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py index 66f7522bdee..d6f95c1d366 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py @@ -38,7 +38,7 @@ ), neck=dict(type="FPN", in_channels=[96, 192, 384, 768], out_channels=256, num_outs=5), rpn_head=dict( - type="RPNHead", + type="CustomRPNHead", in_channels=256, feat_channels=256, anchor_generator=dict(type="AnchorGenerator", scales=[8], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), @@ -134,6 +134,7 @@ ) evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) + optimizer = dict( _delete_=True, type="AdamW", diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py index d8918edc33f..d0e51ef26c9 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py @@ -159,5 +159,5 @@ v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/\ mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth" -evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) +evaluation = dict(interval=100, metric="mAP", save_best="mAP", iou_thr=[0.5]) ignore = True diff --git a/src/otx/recipes/stages/instance-segmentation/incremental.py b/src/otx/recipes/stages/instance-segmentation/incremental.py index 93cda5428e7..d6c17f4767f 100644 --- a/src/otx/recipes/stages/instance-segmentation/incremental.py +++ b/src/otx/recipes/stages/instance-segmentation/incremental.py @@ -3,7 +3,7 @@ task = "instance-segmentation" evaluation = dict( - interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] + interval=100, metric="mAP", save_best="mAP", iou_thr=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] ) task_adapt = dict( @@ -19,6 +19,6 @@ ignore = True adaptive_validation_interval = dict( max_interval=5, - enable_adaptive_interval_hook=True, - enable_eval_before_run=True, + enable_adaptive_interval_hook=False, + enable_eval_before_run=False, ) diff --git a/src/otx/recipes/stages/instance-segmentation/train.py b/src/otx/recipes/stages/instance-segmentation/train.py index 0ac963fa94d..a9bd72da3e0 100644 --- a/src/otx/recipes/stages/instance-segmentation/train.py +++ b/src/otx/recipes/stages/instance-segmentation/train.py @@ -1,14 +1,14 @@ _base_ = [ "../_base_/default.py", "../_base_/logs/tensorboard_logger.py", - "../_base_/optimizers/sgd.py", + "../_base_/optimizers/adam.py", "../_base_/runners/epoch_runner_cancel.py", "../_base_/schedules/plateau.py", ] optimizer = dict( + type="SGD", lr=0.001, - momentum=0.9, weight_decay=0.0001, ) @@ -26,7 +26,6 @@ evaluation = dict(interval=1, metric="mAP", save_best="mAP") early_stop_metric = "mAP" - custom_hooks = [ dict( type="LazyEarlyStoppingHook", @@ -40,7 +39,7 @@ dict( type="AdaptiveTrainSchedulingHook", enable_adaptive_interval_hook=False, - enable_eval_before_run=True, + enable_eval_before_run=False, ), dict(type="LoggerReplaceHook"), dict( From 9400ed52d80b134166ffe9b523da1f972367f5e5 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 15:41:01 +0000 Subject: [PATCH 10/16] clean the code stage 1 --- .../common/adapters/mmcv/configurer.py | 6 - .../common/adapters/mmcv/hooks/__init__.py | 2 +- .../adapters/mmcv/hooks/hpu_optimizer_hook.py | 1 - .../utils/_builder_build_data_parallel.py | 2 +- src/otx/algorithms/common/utils/__init__.py | 1 - src/otx/algorithms/common/utils/utils.py | 1 - .../detection/adapters/mmdet/apis/train.py | 7 +- .../adapters/mmdet/evaluation/evaluator.py | 5 +- .../detectors/custom_single_stage_detector.py | 29 +---- .../mmdet/models/heads/custom_atss_head.py | 11 +- .../mmdet/models/heads/custom_roi_head.py | 109 +----------------- .../efficientnetb2b_maskrcnn/template.yaml | 2 +- .../instance-segmentation/incremental.py | 2 +- .../stages/instance-segmentation/train.py | 2 +- 14 files changed, 15 insertions(+), 165 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/configurer.py b/src/otx/algorithms/common/adapters/mmcv/configurer.py index f2c6aaec6ee..bfdf3af4b71 100644 --- a/src/otx/algorithms/common/adapters/mmcv/configurer.py +++ b/src/otx/algorithms/common/adapters/mmcv/configurer.py @@ -280,12 +280,6 @@ def configure_fp16(cfg: Config): cfg.fp16 = fp16_config opts = dict() cfg.optimizer_config.update(opts) - elif is_hpu_available(): - if optim_type == "SAMOptimizerHook": - # TODO (sungchul): consider SAM optimizer - logger.warning("SAMOptimizerHook is not supported on HPU. Changed to OptimizerHook.") - opts["type"] = "HPUOptimizerHook" - cfg.optimizer_config.update(opts) else: logger.info("Revert FP16 to FP32 on CPU device") diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py index 75113aefa33..a7c41d80fee 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/__init__.py @@ -89,7 +89,7 @@ "TwoCropTransformHook", "MeanTeacherHook", "MemCacheHook", - "LossDynamicsTrackingHook" + "LossDynamicsTrackingHook", ] try: diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py index 292cbe8aa18..f5e26c49083 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/hpu_optimizer_hook.py @@ -17,7 +17,6 @@ def after_train_iter(self, runner): runner.optimizer.zero_grad() if self.detect_anomalous_params: self.detect_anomalous_parameters(runner.outputs["loss"], runner) - runner.outputs["loss"].backward() htcore.mark_step() diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index ae865bd10b6..d20cd540ed1 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -65,7 +65,7 @@ def build_data_parallel( model = XPUDataParallel(model, device_ids=config.gpu_ids) elif is_hpu_available() and config.get("gpu_ids", []): model = model.to("hpu") - model = HPUDataParallel(model, device_ids=config.gpu_ids, put_gt_on_device=False) + model = HPUDataParallel(model, device_ids=config.gpu_ids) elif torch.cuda.is_available() and config.get("gpu_ids", []): if distributed: model = model.cuda() diff --git a/src/otx/algorithms/common/utils/__init__.py b/src/otx/algorithms/common/utils/__init__.py index 23bb01ed20f..6395bd6e60d 100644 --- a/src/otx/algorithms/common/utils/__init__.py +++ b/src/otx/algorithms/common/utils/__init__.py @@ -30,7 +30,6 @@ get_task_class, is_hpu_available, is_xpu_available, - is_hpu_available, load_template, read_py_config, set_random_seed, diff --git a/src/otx/algorithms/common/utils/utils.py b/src/otx/algorithms/common/utils/utils.py index c9b651244a2..92e1b2f0853 100644 --- a/src/otx/algorithms/common/utils/utils.py +++ b/src/otx/algorithms/common/utils/utils.py @@ -17,7 +17,6 @@ import torch import yaml from addict import Dict as adict -import habana_frameworks.torch as htorch HPU_AVAILABLE = None try: diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index 802ed4e467e..ef13e8d84b3 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -24,7 +24,6 @@ from torchvision.ops import nms as tv_nms from torchvision.ops import roi_align as tv_roi_align -from habana_frameworks.torch.utils.library_loader import load_habana_module from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel, HPUDataParallel from otx.algorithms.common.adapters.mmcv.utils.hpu_optimizers import HABANA_OPTIMIZERS @@ -32,7 +31,6 @@ ext_module = ext_loader.load_ext("_ext", ["nms", "softnms", "nms_match", "nms_rotated", "nms_quadri"]) dp_factory["xpu"] = XPUDataParallel dp_factory["hpu"] = HPUDataParallel -load_habana_module() def auto_scale_lr(cfg, distributed, logger): @@ -125,12 +123,14 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, enable_autocast=bool(fp16_cfg)) model.to(f"xpu:{cfg.gpu_ids[0]}") elif cfg.device == "hpu": + from habana_frameworks.torch.utils.library_loader import load_habana_module import habana_frameworks.torch.core as htcore + load_habana_module() os.environ["PT_HPU_LAZY_MODE"] = "1" assert len(cfg.gpu_ids) == 1 model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, enable_autocast=bool(fp16_cfg), put_gt_on_device=False) - model.to(f"hpu:{cfg.gpu_ids[0]}", non_blocking=True) + model.to(model.src_device_obj) htcore.mark_step() model.zero_grad() else: @@ -254,6 +254,7 @@ def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_thr inds = ext_module.nms(bboxes, scores, iou_threshold=float(iou_threshold), offset=offset) bboxes = bboxes.to(device) scores = scores.to(device) + if max_num > 0: inds = inds[:max_num] if is_filtering_by_score: diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index 553b9bc7543..715c260d371 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -33,7 +33,6 @@ from otx.api.utils.time_utils import timeit - def print_map_summary( # pylint: disable=too-many-locals,too-many-branches mean_ap, results, dataset=None, scale_ranges=None, logger=None ): @@ -64,6 +63,7 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches segmentation = "miou" in results num_classes = len(results) + recalls = np.zeros((num_scales, num_classes), dtype=np.float32) aps = np.zeros((num_scales, num_classes), dtype=np.float32) num_gts = np.zeros((num_scales, num_classes), dtype=int) @@ -386,7 +386,7 @@ def evaluate(self, results, logger, iou_thr, scale_ranges): metric: mAP and mIoU metric """ if self.domain == Domain.DETECTION: - output = eval_map( + return eval_map( results, self.annotation, scale_ranges=scale_ranges, @@ -394,5 +394,4 @@ def evaluate(self, results, logger, iou_thr, scale_ranges): dataset=self.classes, logger=logger, ) - return output return self.evaluate_mask(results, logger, iou_thr) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py index 07bce4440ed..e5587270545 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py @@ -6,11 +6,8 @@ import functools import torch -import time from mmdet.models.builder import DETECTORS from mmdet.models.detectors.single_stage import SingleStageDetector -from mmdet.core import bbox2result - from otx.algorithms.common.adapters.mmcv.hooks.recording_forward_hook import ( FeatureVectorHook, ) @@ -36,7 +33,7 @@ class CustomSingleStageDetector(SAMDetectorMixin, DetLossDynamicsTrackingMixin, L2SPDetectorMixin, SingleStageDetector): """SAM optimizer & L2SP regularizer enabled custom SSD.""" - # TRACKING_LOSS_TYPE = (TrackingLossType.cls, TrackingLossType.bbox) + TRACKING_LOSS_TYPE = (TrackingLossType.cls, TrackingLossType.bbox) def __init__(self, *args, task_adapt=None, **kwargs): super().__init__(*args, **kwargs) @@ -79,30 +76,6 @@ def forward_train(self, img, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore=N losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, gt_labels, gt_bboxes_ignore, **kwargs) return losses - def simple_test(self, img, img_metas, rescale=False): - """Test function without test-time augmentation. - - Args: - img (torch.Tensor): Images with shape (N, C, H, W). - img_metas (list[dict]): List of image information. - rescale (bool, optional): Whether to rescale the results. - Defaults to False. - - Returns: - list[list[np.ndarray]]: BBox results of each image and classes. - The outer list corresponds to each image. The inner list - corresponds to each class. - """ - feat = self.extract_feat(img) - results_list = self.bbox_head.simple_test( - feat, img_metas, rescale=rescale) - - bbox_results = [ - bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) - for det_bboxes, det_labels in results_list - ] - return bbox_results - @staticmethod def load_state_dict_pre_hook(model, model_classes, chkpt_classes, chkpt_dict, prefix, *args, **kwargs): """Modify input state_dict according to class name matching before weight loading.""" diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py index 3d708de4460..41b7fd3aa8b 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_atss_head.py @@ -66,16 +66,7 @@ def forward_single(self, x, scale): centerness (Tensor): Centerness for a single scale level, the channel number is (N, num_anchors * 1, H, W). """ - cls_feat = x - reg_feat = x - for cls_conv in self.cls_convs: - cls_feat = cls_conv(cls_feat) - for reg_conv in self.reg_convs: - reg_feat = reg_conv(reg_feat) - cls_score = self.atss_cls(cls_feat) - # we just follow atss, not apply exp in bbox_pred - bbox_pred = scale(self.atss_reg(reg_feat)).float() - centerness = self.atss_centerness(reg_feat) + cls_score, bbox_pred, centerness = super().forward_single(x, scale) if cls_score.device.type == "hpu": # put further post-processing on cpu cls_score = cls_score.cpu() diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index 89652c80f5a..247d8065797 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -33,20 +33,6 @@ def init_bbox_head(self, bbox_roi_extractor, bbox_head): bbox_head.type = "CustomConvFCBBoxHead" self.bbox_head = build_head(bbox_head) - - def _bbox_forward(self, x, rois): - """Box head forward function used in both training and testing.""" - # TODO: a more flexible way to decide which feature maps to use - bbox_feats = self.bbox_roi_extractor( - x[:self.bbox_roi_extractor.num_inputs], rois) - if self.with_shared_head: - bbox_feats = self.shared_head(bbox_feats) - cls_score, bbox_pred = self.bbox_head(bbox_feats) - - bbox_results = dict( - cls_score=cls_score, bbox_pred=bbox_pred, bbox_feats=bbox_feats) - return bbox_results - def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None): """Mask head forward function used in both training and testing.""" assert ((rois is not None) ^ @@ -68,61 +54,6 @@ def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None): mask_results = dict(mask_pred=mask_pred, mask_feats=mask_feats) return mask_results - def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels, img_metas): - """Run forward function and calculate loss for box head in training.""" - rois = bbox2roi([res.bboxes for res in sampling_results]) - bbox_results = self._bbox_forward(x, rois) - - labels, label_weights, bbox_targets, bbox_weights, valid_label_mask = self.bbox_head.get_targets( - sampling_results, gt_bboxes, gt_labels, img_metas, self.train_cfg - ) - loss_bbox = self.bbox_head.loss( - bbox_results["cls_score"], - bbox_results["bbox_pred"], - rois, - labels, - label_weights, - bbox_targets, - bbox_weights, - valid_label_mask=valid_label_mask, - ) - bbox_results.update(loss_bbox=loss_bbox) - return bbox_results - - def _mask_forward_train(self, x, sampling_results, bbox_feats, gt_masks, - img_metas): - """Run forward function and calculate loss for mask head in - training.""" - if not self.share_roi_extractor: - pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results]) - mask_results = self._mask_forward(x, pos_rois) - else: - pos_inds = [] - device = bbox_feats.device - for res in sampling_results: - pos_inds.append( - torch.ones( - res.pos_bboxes.shape[0], - device=device, - dtype=torch.uint8)) - pos_inds.append( - torch.zeros( - res.neg_bboxes.shape[0], - device=device, - dtype=torch.uint8)) - pos_inds = torch.cat(pos_inds) - - mask_results = self._mask_forward( - x, pos_inds=pos_inds, bbox_feats=bbox_feats) - - mask_targets = self.mask_head.get_targets(sampling_results, gt_masks, - self.train_cfg) - pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results]) - loss_mask = self.mask_head.loss(mask_results['mask_pred'], - mask_targets, pos_labels) - - mask_results.update(loss_mask=loss_mask, mask_targets=mask_targets) - return mask_results @HEADS.register_module() class CustomConvFCBBoxHead(Shared2FCBBoxHead, CrossDatasetDetectorHead): @@ -194,52 +125,16 @@ def get_targets(self, sampling_results, gt_bboxes, gt_labels, img_metas, rcnn_tr valid_label_mask = torch.cat(valid_label_mask, 0) return labels, label_weights, bbox_targets, bbox_weights, valid_label_mask - def forward(self, x): + '''ConvFCBBoxHead forward''' # shared part - if self.num_shared_convs > 0: - for conv in self.shared_convs: - x = conv(x) - - if self.num_shared_fcs > 0: - if self.with_avg_pool: - x = self.avg_pool(x) - - x = x.flatten(1) - - for fc in self.shared_fcs: - x = self.relu(fc(x)) - # separate branches - x_cls = x - x_reg = x - - for conv in self.cls_convs: - x_cls = conv(x_cls) - if x_cls.dim() > 2: - if self.with_avg_pool: - x_cls = self.avg_pool(x_cls) - x_cls = x_cls.flatten(1) - for fc in self.cls_fcs: - x_cls = self.relu(fc(x_cls)) - - for conv in self.reg_convs: - x_reg = conv(x_reg) - if x_reg.dim() > 2: - if self.with_avg_pool: - x_reg = self.avg_pool(x_reg) - x_reg = x_reg.flatten(1) - for fc in self.reg_fcs: - x_reg = self.relu(fc(x_reg)) - - cls_score = self.fc_cls(x_cls) if self.with_cls else None - bbox_pred = self.fc_reg(x_reg) if self.with_reg else None + cls_score, bbox_pred = super().forward(self, x) if cls_score.device.type == 'hpu': cls_score = cls_score.cpu() bbox_pred = bbox_pred.cpu() return cls_score, bbox_pred - @force_fp32(apply_to=("cls_score", "bbox_pred")) def loss( self, diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml index 272a648c551..82a27946ebb 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml @@ -36,7 +36,7 @@ hyper_parameters: learning_rate_warmup_iters: default_value: 100 num_iters: - default_value: 100 + default_value: 5 pot_parameters: stat_requests_number: default_value: 1 diff --git a/src/otx/recipes/stages/instance-segmentation/incremental.py b/src/otx/recipes/stages/instance-segmentation/incremental.py index d6c17f4767f..8e472d5c986 100644 --- a/src/otx/recipes/stages/instance-segmentation/incremental.py +++ b/src/otx/recipes/stages/instance-segmentation/incremental.py @@ -20,5 +20,5 @@ adaptive_validation_interval = dict( max_interval=5, enable_adaptive_interval_hook=False, - enable_eval_before_run=False, + enable_eval_before_run=True, ) diff --git a/src/otx/recipes/stages/instance-segmentation/train.py b/src/otx/recipes/stages/instance-segmentation/train.py index a9bd72da3e0..b866aed4b94 100644 --- a/src/otx/recipes/stages/instance-segmentation/train.py +++ b/src/otx/recipes/stages/instance-segmentation/train.py @@ -39,7 +39,7 @@ dict( type="AdaptiveTrainSchedulingHook", enable_adaptive_interval_hook=False, - enable_eval_before_run=False, + enable_eval_before_run=True, ), dict(type="LoggerReplaceHook"), dict( From 4d4cb91784f35c3d1b872aac632a76b34ef6d59b Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 16:27:00 +0000 Subject: [PATCH 11/16] clean code 2 --- .../mmdet/models/heads/custom_roi_head.py | 44 +++++++++++-------- .../mmdet/models/heads/custom_rpn_head.py | 6 +-- .../mmdet/models/heads/custom_yolox_head.py | 15 ++----- .../detection/cspdarknet_yolox_x/model.py | 2 +- .../cspdarknet_yolox_x/template.yaml | 6 +-- .../instance_segmentation/configuration.yaml | 4 +- .../efficientnetb2b_maskrcnn/template.yaml | 2 +- .../maskrcnn_swin_t/model.py | 1 - .../resnet50_maskrcnn/template.yaml | 4 +- .../recipes/stages/detection/incremental.py | 2 +- .../instance-segmentation/incremental.py | 4 +- .../stages/instance-segmentation/train.py | 4 +- 12 files changed, 45 insertions(+), 49 deletions(-) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index 247d8065797..45da297ca82 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -33,25 +33,33 @@ def init_bbox_head(self, bbox_roi_extractor, bbox_head): bbox_head.type = "CustomConvFCBBoxHead" self.bbox_head = build_head(bbox_head) + def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels, img_metas): + """Run forward function and calculate loss for box head in training.""" + rois = bbox2roi([res.bboxes for res in sampling_results]) + bbox_results = self._bbox_forward(x, rois) + + labels, label_weights, bbox_targets, bbox_weights, valid_label_mask = self.bbox_head.get_targets( + sampling_results, gt_bboxes, gt_labels, img_metas, self.train_cfg + ) + loss_bbox = self.bbox_head.loss( + bbox_results["cls_score"], + bbox_results["bbox_pred"], + rois, + labels, + label_weights, + bbox_targets, + bbox_weights, + valid_label_mask=valid_label_mask, + ) + bbox_results.update(loss_bbox=loss_bbox) + return bbox_results + def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None): """Mask head forward function used in both training and testing.""" - assert ((rois is not None) ^ - (pos_inds is not None and bbox_feats is not None)) - if rois is not None: - mask_feats = self.mask_roi_extractor( - x[:self.mask_roi_extractor.num_inputs], rois) - if self.with_shared_head: - mask_feats = self.shared_head(mask_feats) - else: - assert bbox_feats is not None - mask_feats = bbox_feats[pos_inds] - - mask_pred = self.mask_head(mask_feats) - if mask_pred.device.type == "hpu": - mask_pred = mask_pred.cpu() - mask_feats = mask_feats.cpu() - - mask_results = dict(mask_pred=mask_pred, mask_feats=mask_feats) + mask_results = super()._mask_forward(x, rois, pos_inds, bbox_feats) + if mask_results["mask_pred"].device.type == "hpu": + mask_results["mask_pred"] = mask_results["mask_pred"].cpu() + mask_results["mask_feats"] = mask_results["mask_feats"].cpu() return mask_results @@ -128,7 +136,7 @@ def get_targets(self, sampling_results, gt_bboxes, gt_labels, img_metas, rcnn_tr def forward(self, x): '''ConvFCBBoxHead forward''' # shared part - cls_score, bbox_pred = super().forward(self, x) + cls_score, bbox_pred = super().forward(x) if cls_score.device.type == 'hpu': cls_score = cls_score.cpu() bbox_pred = bbox_pred.cpu() diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py index 84d23757cca..6d7614bb844 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py @@ -15,12 +15,8 @@ class CustomRPNHead(RPNHead): def forward_single(self, x): """Forward feature map of a single scale level.""" - x = self.rpn_conv(x) - x = F.relu(x, inplace=False) - rpn_cls_score = self.rpn_cls(x) - rpn_bbox_pred = self.rpn_reg(x) + rpn_cls_score, rpn_bbox_pred = super().forward_single(x) if rpn_cls_score.device.type == "hpu": rpn_cls_score = rpn_cls_score.cpu() rpn_bbox_pred = rpn_bbox_pred.cpu() - return rpn_cls_score, rpn_bbox_pred diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py index 69fb668e2f3..e47d891ea48 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py @@ -107,20 +107,13 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg, conv_obj): """Forward feature of a single scale level.""" - - cls_feat = cls_convs(x) - reg_feat = reg_convs(x) - - cls_score = conv_cls(cls_feat) - bbox_pred = conv_reg(reg_feat) - objectness = conv_obj(reg_feat) - + cls_score, bbox_pred, objectness = super().forward_single(x, cls_convs, reg_convs, conv_cls, conv_reg, + conv_obj) if cls_score.device.type == "hpu": # put on cpu for further post-processing cls_score = cls_score.cpu() bbox_pred = bbox_pred.cpu() objectness = objectness.cpu() - return cls_score, bbox_pred, objectness @@ -264,9 +257,7 @@ def _get_target_single(self, cls_preds, objectness, priors, decoded_bboxes, gt_b num_priors = priors.size(0) num_gts = gt_labels.size(0) gt_bboxes = gt_bboxes.to(decoded_bboxes.dtype) - if "hpu" in gt_bboxes.device: - gt_bboxes = gt_bboxes.cpu() - gt_labels = gt_labels.cpu() + # No target if num_gts == 0: cls_target = cls_preds.new_zeros((0, self.num_classes)) diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py index e734996e4b1..857021810d1 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/model.py @@ -20,5 +20,5 @@ load_from = "https://download.openmmlab.com/mmdetection/v2.0/yolox\ /yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth" -fp16 = None +fp16 = dict(loss_scale=512.0) ignore = False diff --git a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml index ca1d67ef754..50e07835a96 100644 --- a/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml +++ b/src/otx/algorithms/detection/configs/detection/cspdarknet_yolox_x/template.yaml @@ -26,17 +26,17 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 16 + default_value: 4 auto_hpo_state: POSSIBLE inference_batch_size: - default_value: 16 + default_value: 4 learning_rate: default_value: 0.001 auto_hpo_state: POSSIBLE learning_rate_warmup_iters: default_value: 3 num_iters: - default_value: 20 + default_value: 200 nncf_optimization: enable_quantization: default_value: true diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml index c49981e2c66..f0672ae5ff8 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/configuration.yaml @@ -194,7 +194,7 @@ learning_parameters: warning: This is applied exclusively when early stopping is enabled. use_adaptive_interval: affects_outcome_of: TRAINING - default_value: false + default_value: true description: Depending on the size of iteration per epoch, adaptively update the validation interval and related values. editable: true header: Use adaptive validation interval @@ -208,7 +208,7 @@ learning_parameters: warning: This will automatically control the patience and interval when early stopping is enabled. auto_adapt_batch_size: affects_outcome_of: TRAINING - default_value: None + default_value: Safe description: Safe => Prevent GPU out of memory. Full => Find a batch size using most of GPU memory. editable: true enum_name: BatchSizeAdaptType diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml index 82a27946ebb..272a648c551 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/efficientnetb2b_maskrcnn/template.yaml @@ -36,7 +36,7 @@ hyper_parameters: learning_rate_warmup_iters: default_value: 100 num_iters: - default_value: 5 + default_value: 100 pot_parameters: stat_requests_number: default_value: 1 diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py index d6f95c1d366..203470d2fac 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/maskrcnn_swin_t/model.py @@ -134,7 +134,6 @@ ) evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) - optimizer = dict( _delete_=True, type="AdamW", diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml index a77d5a22c47..17a74b1c25e 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/template.yaml @@ -26,10 +26,10 @@ hyper_parameters: parameter_overrides: learning_parameters: batch_size: - default_value: 16 + default_value: 4 auto_hpo_state: POSSIBLE inference_batch_size: - default_value: 16 + default_value: 1 learning_rate: default_value: 0.007 auto_hpo_state: POSSIBLE diff --git a/src/otx/recipes/stages/detection/incremental.py b/src/otx/recipes/stages/detection/incremental.py index 692a7dbee7c..9ddd2e28e55 100644 --- a/src/otx/recipes/stages/detection/incremental.py +++ b/src/otx/recipes/stages/detection/incremental.py @@ -43,6 +43,6 @@ ignore = True adaptive_validation_interval = dict( max_interval=5, - enable_adaptive_interval_hook=False, + enable_adaptive_interval_hook=True, enable_eval_before_run=True, ) diff --git a/src/otx/recipes/stages/instance-segmentation/incremental.py b/src/otx/recipes/stages/instance-segmentation/incremental.py index 8e472d5c986..93cda5428e7 100644 --- a/src/otx/recipes/stages/instance-segmentation/incremental.py +++ b/src/otx/recipes/stages/instance-segmentation/incremental.py @@ -3,7 +3,7 @@ task = "instance-segmentation" evaluation = dict( - interval=100, metric="mAP", save_best="mAP", iou_thr=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] + interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95] ) task_adapt = dict( @@ -19,6 +19,6 @@ ignore = True adaptive_validation_interval = dict( max_interval=5, - enable_adaptive_interval_hook=False, + enable_adaptive_interval_hook=True, enable_eval_before_run=True, ) diff --git a/src/otx/recipes/stages/instance-segmentation/train.py b/src/otx/recipes/stages/instance-segmentation/train.py index b866aed4b94..12e2110c7f8 100644 --- a/src/otx/recipes/stages/instance-segmentation/train.py +++ b/src/otx/recipes/stages/instance-segmentation/train.py @@ -1,7 +1,7 @@ _base_ = [ "../_base_/default.py", "../_base_/logs/tensorboard_logger.py", - "../_base_/optimizers/adam.py", + "../_base_/optimizers/sgd.py", "../_base_/runners/epoch_runner_cancel.py", "../_base_/schedules/plateau.py", ] @@ -9,6 +9,7 @@ optimizer = dict( type="SGD", lr=0.001, + momentum=0.9, weight_decay=0.0001, ) @@ -26,6 +27,7 @@ evaluation = dict(interval=1, metric="mAP", save_best="mAP") early_stop_metric = "mAP" + custom_hooks = [ dict( type="LazyEarlyStoppingHook", From d33f48df2924067cabeccfe389a19a5350b418d1 Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 16:40:16 +0000 Subject: [PATCH 12/16] fix pre-commit --- .../algorithms/common/adapters/mmcv/configurer.py | 1 + .../detection/adapters/mmdet/apis/train.py | 15 +++++++++------ .../adapters/mmdet/evaluation/evaluator.py | 14 +++++++++----- .../detectors/custom_single_stage_detector.py | 1 + .../adapters/mmdet/models/heads/__init__.py | 2 +- .../mmdet/models/heads/custom_roi_head.py | 4 ++-- .../mmdet/models/heads/custom_rpn_head.py | 7 +++++-- .../mmdet/models/heads/custom_ssd_head.py | 4 +--- .../mmdet/models/heads/custom_yolox_head.py | 7 ++----- .../configs/detection/configuration.yaml | 2 +- .../configs/detection/mobilenetv2_ssd/model.py | 1 - .../resnet50_maskrcnn/model.py | 2 +- .../recipes/stages/instance-segmentation/train.py | 1 - 13 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/configurer.py b/src/otx/algorithms/common/adapters/mmcv/configurer.py index bfdf3af4b71..83d43192ed7 100644 --- a/src/otx/algorithms/common/adapters/mmcv/configurer.py +++ b/src/otx/algorithms/common/adapters/mmcv/configurer.py @@ -184,6 +184,7 @@ def configure_device(self, cfg): elif is_xpu_available(): try: import intel_extension_for_pytorch as ipex # noqa: F401 + cfg.device = "xpu" except ModuleNotFoundError: cfg.device = "cpu" diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index ef13e8d84b3..98ac05d28f5 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -24,10 +24,9 @@ from torchvision.ops import nms as tv_nms from torchvision.ops import roi_align as tv_roi_align -from otx.algorithms.common.adapters.mmcv.utils import XPUDataParallel, HPUDataParallel +from otx.algorithms.common.adapters.mmcv.utils import HPUDataParallel, XPUDataParallel from otx.algorithms.common.adapters.mmcv.utils.hpu_optimizers import HABANA_OPTIMIZERS - ext_module = ext_loader.load_ext("_ext", ["nms", "softnms", "nms_match", "nms_rotated", "nms_quadri"]) dp_factory["xpu"] = XPUDataParallel dp_factory["hpu"] = HPUDataParallel @@ -123,13 +122,15 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, enable_autocast=bool(fp16_cfg)) model.to(f"xpu:{cfg.gpu_ids[0]}") elif cfg.device == "hpu": - from habana_frameworks.torch.utils.library_loader import load_habana_module import habana_frameworks.torch.core as htcore + from habana_frameworks.torch.utils.library_loader import load_habana_module + load_habana_module() os.environ["PT_HPU_LAZY_MODE"] = "1" assert len(cfg.gpu_ids) == 1 - model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, dim=0, - enable_autocast=bool(fp16_cfg), put_gt_on_device=False) + model = build_dp( + model, cfg.device, device_ids=cfg.gpu_ids, dim=0, enable_autocast=bool(fp16_cfg), put_gt_on_device=False + ) model.to(model.src_device_obj) htcore.mark_step() model.zero_grad() @@ -221,8 +222,9 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times runner.load_checkpoint(cfg.load_from) runner.run(data_loaders, cfg.workflow) + def patch_optimizer(cfg_optim): - "Patch optimizer for OD and IS" + """Patch optimizer for OD and IS.""" if cfg_optim["type"] == "SGD": return cfg_optim @@ -232,6 +234,7 @@ def patch_optimizer(cfg_optim): del cfg_optim["betas"] return cfg_optim + def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): """Runs MMCVs NMS with torchvision.nms, or forces NMS from MMCV to run on CPU.""" is_filtering_by_score = score_threshold > 0 diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index 715c260d371..f4cad3df5bb 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -15,18 +15,18 @@ # and limitations under the License. import multiprocessing as mp -from typing import Dict, List, Tuple, Union import time +from typing import Dict, List, Tuple, Union import mmcv import numpy as np import pycocotools.mask as mask_util from mmcv.utils import print_log from mmdet.core import BitmapMasks, PolygonMasks, eval_map +from mmdet.core.evaluation import mean_ap from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps from mmdet.core.evaluation.class_names import get_classes from mmdet.core.evaluation.mean_ap import average_precision -from mmdet.core.evaluation import mean_ap from terminaltables import AsciiTable from otx.api.entities.label import Domain @@ -99,16 +99,20 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches num_gts[i, j], results[j]["num_dets"], f"{recalls[i, j]:.3f}", - f"{aps[i, j]:.3f}" + f"{aps[i, j]:.3f}", ] if segmentation: row_data.append(f"{mious[i, j]:.3f}") table_data.append(row_data) - table_ = ["mAP", "", "", "", f"{mean_ap[i]:.3f}", f"{np.mean(mious[i]):.3f}"] if segmentation else ["mAP", "", "", "", f"{mean_ap[i]:.3f}"] + table_ = ( + ["mAP", "", "", "", f"{mean_ap[i]:.3f}", f"{np.mean(mious[i]):.3f}"] + if segmentation + else ["mAP", "", "", "", f"{mean_ap[i]:.3f}"] + ) table_data.append(table_) table = AsciiTable(table_data) table.inner_footing_row_border = True - time.sleep(0.1) # prevent segmentation fault + time.sleep(0.1) # prevent segmentation fault print_log("\n" + table.table, logger=logger) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py index e5587270545..a8e926cae5d 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/detectors/custom_single_stage_detector.py @@ -8,6 +8,7 @@ import torch from mmdet.models.builder import DETECTORS from mmdet.models.detectors.single_stage import SingleStageDetector + from otx.algorithms.common.adapters.mmcv.hooks.recording_forward_hook import ( FeatureVectorHook, ) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py index a0a410f3035..e705d18bdc8 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/__init__.py @@ -10,11 +10,11 @@ from .custom_fcn_mask_head import CustomFCNMaskHead from .custom_retina_head import CustomRetinaHead from .custom_roi_head import CustomRoIHead +from .custom_rpn_head import CustomRPNHead from .custom_ssd_head import CustomSSDHead from .custom_vfnet_head import CustomVFNetHead from .custom_yolox_head import CustomYOLOXHead from .detr_head import DETRHeadExtension -from .custom_rpn_head import CustomRPNHead __all__ = [ "CrossDatasetDetectorHead", diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py index 45da297ca82..d8e546a5f91 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_roi_head.py @@ -134,10 +134,10 @@ def get_targets(self, sampling_results, gt_bboxes, gt_labels, img_metas, rcnn_tr return labels, label_weights, bbox_targets, bbox_weights, valid_label_mask def forward(self, x): - '''ConvFCBBoxHead forward''' + """ConvFCBBoxHead forward.""" # shared part cls_score, bbox_pred = super().forward(x) - if cls_score.device.type == 'hpu': + if cls_score.device.type == "hpu": cls_score = cls_score.cpu() bbox_pred = bbox_pred.cpu() diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py index 6d7614bb844..4f73b1b9511 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py @@ -1,6 +1,9 @@ -import torch.nn.functional as F -from mmdet.models.dense_heads import RPNHead +"""Custom ROI head for OTX template.""" +# Copyright (C) 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# from mmdet.models.builder import HEADS +from mmdet.models.dense_heads import RPNHead @HEADS.register_module() diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py index 1ab2e6a8bbe..7aebbcb3173 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_ssd_head.py @@ -13,7 +13,6 @@ from mmdet.models.dense_heads.ssd_head import SSDHead from mmdet.models.losses import smooth_l1_loss from torch import nn -import time from otx.algorithms.detection.adapters.mmdet.models.heads.cross_dataset_detector_head import TrackingLossDynamicsMixIn from otx.algorithms.detection.adapters.mmdet.models.loss_dyns import ( @@ -100,8 +99,7 @@ def forward(self, feats): """ cls_scores = [] bbox_preds = [] - for feat, reg_conv, cls_conv in zip(feats, self.reg_convs, - self.cls_convs): + for feat, reg_conv, cls_conv in zip(feats, self.reg_convs, self.cls_convs): cls_out = cls_conv(feat) reg_out = reg_conv(feat) if cls_out.device.type == "hpu": diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py index e47d891ea48..8b12ae88dd2 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py @@ -43,7 +43,6 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m gt_bboxes_ignore (None | list[Tensor]): specify which bounding boxes can be ignored when computing the loss. """ - num_imgs = len(img_metas) featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores] mlvl_priors = self.prior_generator.grid_priors( @@ -104,11 +103,9 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m return loss_dict - def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg, - conv_obj): + def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg, conv_obj): """Forward feature of a single scale level.""" - cls_score, bbox_pred, objectness = super().forward_single(x, cls_convs, reg_convs, conv_cls, conv_reg, - conv_obj) + cls_score, bbox_pred, objectness = super().forward_single(x, cls_convs, reg_convs, conv_cls, conv_reg, conv_obj) if cls_score.device.type == "hpu": # put on cpu for further post-processing cls_score = cls_score.cpu() diff --git a/src/otx/algorithms/detection/configs/detection/configuration.yaml b/src/otx/algorithms/detection/configs/detection/configuration.yaml index 7fb866e16df..d36b0d941bc 100644 --- a/src/otx/algorithms/detection/configs/detection/configuration.yaml +++ b/src/otx/algorithms/detection/configs/detection/configuration.yaml @@ -129,7 +129,7 @@ learning_parameters: warning: null enable_early_stopping: affects_outcome_of: TRAINING - default_value: false + default_value: true description: Early exit from training when validation accuracy isn't changed or decreased for several epochs. editable: true header: Enable early stopping of the training diff --git a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py index bc4eff90aa2..45847b0b80c 100644 --- a/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py +++ b/src/otx/algorithms/detection/configs/detection/mobilenetv2_ssd/model.py @@ -96,5 +96,4 @@ /models/object_detection/v2/mobilenet_v2-2s_ssd-992x736.pth" fp16 = dict(loss_scale=512.0) -# fp16 = None ignore = False diff --git a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py index d0e51ef26c9..d8918edc33f 100644 --- a/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py +++ b/src/otx/algorithms/detection/configs/instance_segmentation/resnet50_maskrcnn/model.py @@ -159,5 +159,5 @@ v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/\ mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth" -evaluation = dict(interval=100, metric="mAP", save_best="mAP", iou_thr=[0.5]) +evaluation = dict(interval=1, metric="mAP", save_best="mAP", iou_thr=[0.5]) ignore = True diff --git a/src/otx/recipes/stages/instance-segmentation/train.py b/src/otx/recipes/stages/instance-segmentation/train.py index 12e2110c7f8..0ac963fa94d 100644 --- a/src/otx/recipes/stages/instance-segmentation/train.py +++ b/src/otx/recipes/stages/instance-segmentation/train.py @@ -7,7 +7,6 @@ ] optimizer = dict( - type="SGD", lr=0.001, momentum=0.9, weight_decay=0.0001, From 6eaf641d2b69e504e2379977f9488692b3f3646e Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 16:43:01 +0000 Subject: [PATCH 13/16] minor --- .../detection/adapters/mmdet/models/heads/custom_rpn_head.py | 2 +- .../detection/adapters/mmdet/models/heads/custom_yolox_head.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py index 4f73b1b9511..b5bb4184fe3 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_rpn_head.py @@ -1,4 +1,4 @@ -"""Custom ROI head for OTX template.""" +"""Custom RPN head for OTX template.""" # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # diff --git a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py index 8b12ae88dd2..161d692e4f4 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py +++ b/src/otx/algorithms/detection/adapters/mmdet/models/heads/custom_yolox_head.py @@ -229,6 +229,7 @@ def loss(self, cls_scores, bbox_preds, objectnesses, gt_bboxes, gt_labels, img_m if self.use_l1: loss_l1 = self.loss_l1(flatten_bbox_preds.view(-1, 4)[pos_masks], l1_targets) / num_total_samples loss_dict.update(loss_l1=loss_l1) + return loss_dict @torch.no_grad() From ecf1b43044faf952e930723fa7d6d3404821d18e Mon Sep 17 00:00:00 2001 From: kprokofi Date: Wed, 8 Nov 2023 17:10:10 +0000 Subject: [PATCH 14/16] change cast of bf16 --- .../common/adapters/mmcv/hooks/recording_forward_hook.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py index d4df8bbbc22..a3b2698babb 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/recording_forward_hook.py @@ -23,6 +23,7 @@ from torch.nn import LayerNorm from otx.algorithms.classification import MMCLS_AVAILABLE +from otx.algorithms.common.utils.utils import cast_bf16_to_fp32 if MMCLS_AVAILABLE: from mmcls.models.necks.gap import GlobalAveragePooling @@ -74,9 +75,7 @@ def _recording_forward( ): # pylint: disable=unused-argument tensors = self.func(output) if isinstance(tensors, torch.Tensor): - if tensors.dtype == torch.bfloat16: - tensors = tensors.to(torch.float32) - tensors_np = tensors.detach().cpu().numpy() + tensors_np = cast_bf16_to_fp32(tensors).detach().cpu().numpy() elif isinstance(tensors, np.ndarray): tensors_np = tensors else: From 496c92f08881bb8a5ae54b1d61289a0ad8d734ac Mon Sep 17 00:00:00 2001 From: kprokofi Date: Thu, 9 Nov 2023 14:23:03 +0000 Subject: [PATCH 15/16] reply comments --- .../utils/_builder_build_data_parallel.py | 2 + src/otx/algorithms/common/utils/__init__.py | 2 + .../detection/adapters/mmdet/apis/train.py | 47 +++++-------------- .../adapters/mmdet/evaluation/evaluator.py | 5 +- .../detection/adapters/mmdet/task.py | 10 ++-- 5 files changed, 23 insertions(+), 43 deletions(-) diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py index d20cd540ed1..226e5e8cc25 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/_builder_build_data_parallel.py @@ -154,6 +154,8 @@ def scatter(self, inputs, kwargs, device_ids): for val in x: if isinstance(val, dict): for k in val: + # don't put annotations on the HPU to proceed + # post-processing on the CPU if not self.put_gt_on_device and k.startswith("gt_"): continue if isinstance(val[k], torch.Tensor): diff --git a/src/otx/algorithms/common/utils/__init__.py b/src/otx/algorithms/common/utils/__init__.py index 6395bd6e60d..5e8b55a0171 100644 --- a/src/otx/algorithms/common/utils/__init__.py +++ b/src/otx/algorithms/common/utils/__init__.py @@ -14,6 +14,7 @@ # See the License for the specific language governing permissions # and limitations under the License. +import os from .callback import ( InferenceProgressCallback, OptimizationProgressCallback, @@ -59,4 +60,5 @@ if is_hpu_available(): + os.environ["PT_HPU_LAZY_MODE"] = "1" import habana_frameworks.torch.gpu_migration # noqa: F401 diff --git a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py index 98ac05d28f5..4565631880d 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/apis/train.py +++ b/src/otx/algorithms/detection/adapters/mmdet/apis/train.py @@ -122,31 +122,26 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids, enable_autocast=bool(fp16_cfg)) model.to(f"xpu:{cfg.gpu_ids[0]}") elif cfg.device == "hpu": - import habana_frameworks.torch.core as htcore - from habana_frameworks.torch.utils.library_loader import load_habana_module - - load_habana_module() - os.environ["PT_HPU_LAZY_MODE"] = "1" - assert len(cfg.gpu_ids) == 1 model = build_dp( model, cfg.device, device_ids=cfg.gpu_ids, dim=0, enable_autocast=bool(fp16_cfg), put_gt_on_device=False ) - model.to(model.src_device_obj) - htcore.mark_step() - model.zero_grad() + # patch optimizer + if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in HABANA_OPTIMIZERS: + cfg.optimizer["type"] = new_type else: model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids) # build optimizer auto_scale_lr(cfg, distributed, logger) - if cfg.device == "hpu": - cfg.optimizer = patch_optimizer(cfg.optimizer) + + if cfg.device in ["hpu", "xpu"]: + # dynamic patch for nms and roi_align + NMSop.forward = monkey_patched_nms + RoIAlign.forward = monkey_patched_roi_align + optimizer = build_optimizer(model, cfg.optimizer) if cfg.device == "xpu": - # dynamic patch for nms and roi_align - NMSop.forward = monkey_patched_xpu_nms - RoIAlign.forward = monkey_patched_xpu_roi_align if fp16_cfg is not None: dtype = torch.bfloat16 else: @@ -154,14 +149,6 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times model.train() model, optimizer = torch.xpu.optimize(model, optimizer=optimizer, dtype=dtype) - if cfg.device == "hpu": - NMSop.forward = monkey_patched_xpu_nms - RoIAlign.forward = monkey_patched_xpu_roi_align - # build runner - if cfg.device == "hpu": - if (new_type := "Fused" + cfg.optimizer.get("type", "SGD")) in HABANA_OPTIMIZERS: - cfg.optimizer["type"] = new_type - runner = build_runner( cfg.runner, default_args=dict(model=model, optimizer=optimizer, work_dir=cfg.work_dir, logger=logger, meta=meta) ) @@ -223,19 +210,7 @@ def train_detector(model, dataset, cfg, distributed=False, validate=False, times runner.run(data_loaders, cfg.workflow) -def patch_optimizer(cfg_optim): - """Patch optimizer for OD and IS.""" - if cfg_optim["type"] == "SGD": - return cfg_optim - - # Only SGD for OD and IS supported by now on HPU - cfg_optim["type"] = "SGD" - if "betas" in cfg_optim: - del cfg_optim["betas"] - return cfg_optim - - -def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): +def monkey_patched_nms(ctx, bboxes, scores, iou_threshold, offset, score_threshold, max_num): """Runs MMCVs NMS with torchvision.nms, or forces NMS from MMCV to run on CPU.""" is_filtering_by_score = score_threshold > 0 if is_filtering_by_score: @@ -265,7 +240,7 @@ def monkey_patched_xpu_nms(ctx, bboxes, scores, iou_threshold, offset, score_thr return inds -def monkey_patched_xpu_roi_align(self, input, rois): +def monkey_patched_roi_align(self, input, rois): """Replaces MMCVs roi align with the one from torchvision. Args: diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index f4cad3df5bb..96b42d05a2b 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -31,7 +31,7 @@ from otx.api.entities.label import Domain from otx.api.utils.time_utils import timeit - +from otx.algorithms.common.utils.utils import is_hpu_available def print_map_summary( # pylint: disable=too-many-locals,too-many-branches mean_ap, results, dataset=None, scale_ranges=None, logger=None @@ -112,7 +112,8 @@ def print_map_summary( # pylint: disable=too-many-locals,too-many-branches table_data.append(table_) table = AsciiTable(table_data) table.inner_footing_row_border = True - time.sleep(0.1) # prevent segmentation fault + if is_hpu_available(): + time.sleep(0.1) # prevent segmentation fault print_log("\n" + table.table, logger=logger) diff --git a/src/otx/algorithms/detection/adapters/mmdet/task.py b/src/otx/algorithms/detection/adapters/mmdet/task.py index 3b8040408be..53c00ceaf32 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/task.py +++ b/src/otx/algorithms/detection/adapters/mmdet/task.py @@ -44,8 +44,8 @@ from otx.algorithms.common.utils.data import get_dataset from otx.algorithms.common.utils.logger import get_logger from otx.algorithms.detection.adapters.mmdet.apis.train import ( - monkey_patched_xpu_nms, - monkey_patched_xpu_roi_align, + monkey_patched_nms, + monkey_patched_roi_align, train_detector, ) from otx.algorithms.detection.adapters.mmdet.configurer import ( @@ -348,9 +348,9 @@ def _infer_model( else: target_classes = mm_dataset.CLASSES - if cfg.device == "xpu": - NMSop.forward = monkey_patched_xpu_nms - RoIAlign.forward = monkey_patched_xpu_roi_align + if cfg.device in ["xpu", "hpu"]: + NMSop.forward = monkey_patched_nms + RoIAlign.forward = monkey_patched_roi_align # Model model = self.build_model(cfg, fp16=cfg.get("fp16", False)) From e85d681efb2eace8a8b6a14bd8e0a7d70d96236a Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Fri, 10 Nov 2023 09:39:40 +0900 Subject: [PATCH 16/16] align with pre-commit --- src/otx/algorithms/common/utils/__init__.py | 1 + .../detection/adapters/mmdet/evaluation/evaluator.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/otx/algorithms/common/utils/__init__.py b/src/otx/algorithms/common/utils/__init__.py index 5e8b55a0171..80372c59b4b 100644 --- a/src/otx/algorithms/common/utils/__init__.py +++ b/src/otx/algorithms/common/utils/__init__.py @@ -15,6 +15,7 @@ # and limitations under the License. import os + from .callback import ( InferenceProgressCallback, OptimizationProgressCallback, diff --git a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py index 96b42d05a2b..36bda12206f 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py +++ b/src/otx/algorithms/detection/adapters/mmdet/evaluation/evaluator.py @@ -29,9 +29,10 @@ from mmdet.core.evaluation.mean_ap import average_precision from terminaltables import AsciiTable +from otx.algorithms.common.utils.utils import is_hpu_available from otx.api.entities.label import Domain from otx.api.utils.time_utils import timeit -from otx.algorithms.common.utils.utils import is_hpu_available + def print_map_summary( # pylint: disable=too-many-locals,too-many-branches mean_ap, results, dataset=None, scale_ranges=None, logger=None