From 85f033a50fcc61c7aeb62704cdecbac4acfcc4e8 Mon Sep 17 00:00:00 2001 From: "Kang, Harim" Date: Mon, 20 Mar 2023 13:40:59 +0900 Subject: [PATCH 1/5] Add mmcls transformer backbones --- otx/algorithms/classification/configs/configuration.yaml | 2 +- otx/algorithms/common/configs/training_base.py | 2 +- otx/cli/builder/supported_backbone/mmcls.json | 8 ++++---- otx/mpa/cls/stage.py | 7 +++++++ 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/otx/algorithms/classification/configs/configuration.yaml b/otx/algorithms/classification/configs/configuration.yaml index 897c3f7e13f..dd2a93c51a0 100644 --- a/otx/algorithms/classification/configs/configuration.yaml +++ b/otx/algorithms/classification/configs/configuration.yaml @@ -10,7 +10,7 @@ learning_parameters: stable. A larger batch size has higher memory requirements. editable: true header: Batch size - max_value: 512 + max_value: 2048 min_value: 1 type: INTEGER ui_rules: diff --git a/otx/algorithms/common/configs/training_base.py b/otx/algorithms/common/configs/training_base.py index 1e99f5048ee..4c397554c54 100644 --- a/otx/algorithms/common/configs/training_base.py +++ b/otx/algorithms/common/configs/training_base.py @@ -65,7 +65,7 @@ class BaseLearningParameters(ParameterGroup): batch_size = configurable_integer( default_value=5, min_value=1, - max_value=512, + max_value=2048, header="Batch size", description="The number of training samples seen in each iteration of training. Increasing thisvalue " "improves training time and may make the training more stable. A larger batch size has higher " diff --git a/otx/cli/builder/supported_backbone/mmcls.json b/otx/cli/builder/supported_backbone/mmcls.json index 6b5f1343a2e..a63bfc636b3 100644 --- a/otx/cli/builder/supported_backbone/mmcls.json +++ b/otx/cli/builder/supported_backbone/mmcls.json @@ -11,7 +11,7 @@ "options": { "arch": ["tiny", "small", "base"] }, - "available": [] + "available": ["CLASSIFICATION"] }, "mmcls.ConvMixer": { "required": ["arch"], @@ -287,7 +287,7 @@ "mmcls.T2T_ViT": { "required": [], "options": {}, - "available": [] + "available": ["CLASSIFICATION"] }, "mmcls.TIMMBackbone": { "required": ["model_name"], @@ -299,7 +299,7 @@ "options": { "arch": ["base", "small"] }, - "available": [] + "available": ["CLASSIFICATION"] }, "mmcls.PCPVT": { "required": ["arch"], @@ -341,7 +341,7 @@ "deit-base" ] }, - "available": [] + "available": ["CLASSIFICATION"] } } } diff --git a/otx/mpa/cls/stage.py b/otx/mpa/cls/stage.py index d24abbe12fd..08d5d051eb8 100644 --- a/otx/mpa/cls/stage.py +++ b/otx/mpa/cls/stage.py @@ -15,6 +15,8 @@ logger = get_logger() +TRANSFORMER_BACKBONES = ["VisionTransformer", "T2T_ViT", "TNT", "Conformer"] + class ClsStage(Stage): MODEL_BUILDER = build_classifier @@ -89,6 +91,11 @@ def configure_in_channel(cfg): output = layer(torch.rand([1] + list(input_shape))) if isinstance(output, (tuple, list)): output = output[-1] + + if layer.__class__.__name__ in TRANSFORMER_BACKBONES: + # mmcls.VisionTransformer outputs Tuple[List[...]] and the last index of List is the final logit. + _, output = output + in_channels = output.shape[1] if cfg.model.get("neck") is not None: if cfg.model.neck.get("in_channels") is not None: From 3b0c1488af95f2b9d05264bee4a3f4dbedb1c1d8 Mon Sep 17 00:00:00 2001 From: "Kang, Harim" Date: Mon, 20 Mar 2023 14:21:36 +0900 Subject: [PATCH 2/5] Fix VisionTransformeroutput check --- otx/mpa/cls/stage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/otx/mpa/cls/stage.py b/otx/mpa/cls/stage.py index 08d5d051eb8..eeefaa14db0 100644 --- a/otx/mpa/cls/stage.py +++ b/otx/mpa/cls/stage.py @@ -92,7 +92,7 @@ def configure_in_channel(cfg): if isinstance(output, (tuple, list)): output = output[-1] - if layer.__class__.__name__ in TRANSFORMER_BACKBONES: + if layer.__class__.__name__ in TRANSFORMER_BACKBONES and isinstance(output, (tuple, list)): # mmcls.VisionTransformer outputs Tuple[List[...]] and the last index of List is the final logit. _, output = output From 86baffcde4f6859a8d47a0a4888bf216162dbcf8 Mon Sep 17 00:00:00 2001 From: "Kang, Harim" Date: Mon, 20 Mar 2023 22:03:13 +0900 Subject: [PATCH 3/5] Add changes --- otx/algorithms/__init__.py | 2 ++ otx/cli/builder/builder.py | 7 +++++++ otx/mpa/cls/inferrer.py | 4 ++++ otx/mpa/cls/stage.py | 5 +++-- otx/mpa/modules/hooks/recording_forward_hooks.py | 9 ++++++--- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/otx/algorithms/__init__.py b/otx/algorithms/__init__.py index 3d087f538e4..5e6c32a5e1d 100644 --- a/otx/algorithms/__init__.py +++ b/otx/algorithms/__init__.py @@ -2,3 +2,5 @@ # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 + +TRANSFORMER_BACKBONES = ["VisionTransformer", "T2T_ViT", "TNT", "Conformer"] diff --git a/otx/cli/builder/builder.py b/otx/cli/builder/builder.py index 5adfe235a96..1c3aee71329 100644 --- a/otx/cli/builder/builder.py +++ b/otx/cli/builder/builder.py @@ -29,6 +29,7 @@ from torch import nn from otx.api.entities.model_template import TaskType +from otx.algorithms import TRANSFORMER_BACKBONES from otx.cli.utils.importing import ( get_backbone_list, get_backbone_registry, @@ -212,6 +213,12 @@ def merge_backbone( out_channels = -1 if hasattr(model_config.model, "head"): model_config.model.head.in_channels = -1 + # TODO: This is a hard coded part of the Transformer backbone and needs to be refactored. + if backend == "mmcls" and backbone_class in TRANSFORMER_BACKBONES: + if hasattr(model_config.model, "neck"): + model_config.model.neck = None + if hasattr(model_config.model, "head"): + model_config.model.head["type"] = "VisionTransformerClsHead" else: # Need to update in/out channel configuration here out_channels = get_backbone_out_channels(backbone) diff --git a/otx/mpa/cls/inferrer.py b/otx/mpa/cls/inferrer.py index 17336bf7cd4..3c511d91212 100644 --- a/otx/mpa/cls/inferrer.py +++ b/otx/mpa/cls/inferrer.py @@ -11,6 +11,7 @@ from mmcls.datasets import build_dataset as mmcls_build_dataset from mmcv import Config, ConfigDict +from otx.algorithms import TRANSFORMER_BACKBONES from otx.algorithms.common.adapters.mmcv.utils import ( build_data_parallel, build_dataloader, @@ -53,6 +54,9 @@ def run(self, model_cfg, model_ckpt, data_cfg, **kwargs): model_builder = kwargs.get("model_builder", None) dump_features = kwargs.get("dump_features", False) dump_saliency_map = kwargs.get("dump_saliency_map", False) + # TODO: It looks like we need to modify that code in an appropriate way. + if model_cfg.model.head.get("type", None) == "VisionTransformerClsHead": + dump_saliency_map = False eval = kwargs.get("eval", False) outputs = self.infer( cfg, diff --git a/otx/mpa/cls/stage.py b/otx/mpa/cls/stage.py index eeefaa14db0..d12eb790e05 100644 --- a/otx/mpa/cls/stage.py +++ b/otx/mpa/cls/stage.py @@ -9,14 +9,13 @@ from mmcv import ConfigDict, build_from_cfg from otx.algorithms.classification.adapters.mmcls.utils.builder import build_classifier +from otx.algorithms import TRANSFORMER_BACKBONES from otx.mpa.stage import Stage from otx.mpa.utils.config_utils import recursively_update_cfg, update_or_add_custom_hook from otx.mpa.utils.logger import get_logger logger = get_logger() -TRANSFORMER_BACKBONES = ["VisionTransformer", "T2T_ViT", "TNT", "Conformer"] - class ClsStage(Stage): MODEL_BUILDER = build_classifier @@ -95,6 +94,8 @@ def configure_in_channel(cfg): if layer.__class__.__name__ in TRANSFORMER_BACKBONES and isinstance(output, (tuple, list)): # mmcls.VisionTransformer outputs Tuple[List[...]] and the last index of List is the final logit. _, output = output + if cfg.model.head.type != "VisionTransformerClsHead": + raise ValueError(f"{layer.__class__.__name__ } needs VisionTransformerClsHead as head") in_channels = output.shape[1] if cfg.model.get("neck") is not None: diff --git a/otx/mpa/modules/hooks/recording_forward_hooks.py b/otx/mpa/modules/hooks/recording_forward_hooks.py index 4b3fc7011e2..0935ce8c9e6 100644 --- a/otx/mpa/modules/hooks/recording_forward_hooks.py +++ b/otx/mpa/modules/hooks/recording_forward_hooks.py @@ -20,6 +20,7 @@ import torch from otx import MMCLS_AVAILABLE +from otx.algorithms import TRANSFORMER_BACKBONES if MMCLS_AVAILABLE: from mmcls.models.necks.gap import GlobalAveragePooling @@ -116,10 +117,12 @@ def func(feature_map: Union[torch.Tensor, Sequence[torch.Tensor]], fpn_idx: int class FeatureVectorHook(BaseRecordingForwardHook): - @staticmethod - def func(feature_map: Union[torch.Tensor, Sequence[torch.Tensor]]) -> torch.Tensor: + def func(self, feature_map: Union[torch.Tensor, Sequence[torch.Tensor]]) -> torch.Tensor: """Generate the feature vector by average pooling feature maps.""" - if isinstance(feature_map, (list, tuple)): + if self._module.backbone.__class__.__name__ in TRANSFORMER_BACKBONES and isinstance(feature_map[-1], (tuple, list)): + # mmcls.VisionTransformer outputs Tuple[List[...]] and the last index of List is the final logit. + feature_vector, _ = feature_map[-1] + elif isinstance(feature_map, (list, tuple)): # aggregate feature maps from Feature Pyramid Network feature_vector = [torch.nn.functional.adaptive_avg_pool2d(f, (1, 1)) for f in feature_map] feature_vector = torch.cat(feature_vector, 1) From d300f7ca2f94b3a42452c1f310a85f92b35febba Mon Sep 17 00:00:00 2001 From: "Kang, Harim" Date: Mon, 20 Mar 2023 22:22:27 +0900 Subject: [PATCH 4/5] Disable recording forward hooks in inferrer --- otx/algorithms/__init__.py | 2 +- otx/cli/builder/builder.py | 6 +++--- otx/cli/builder/supported_backbone/mmcls.json | 2 +- otx/mpa/cls/inferrer.py | 1 + otx/mpa/cls/stage.py | 2 +- otx/mpa/modules/hooks/recording_forward_hooks.py | 8 +++----- 6 files changed, 10 insertions(+), 11 deletions(-) diff --git a/otx/algorithms/__init__.py b/otx/algorithms/__init__.py index 5e6c32a5e1d..daf814e52b2 100644 --- a/otx/algorithms/__init__.py +++ b/otx/algorithms/__init__.py @@ -3,4 +3,4 @@ # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -TRANSFORMER_BACKBONES = ["VisionTransformer", "T2T_ViT", "TNT", "Conformer"] +TRANSFORMER_BACKBONES = ["VisionTransformer", "T2T_ViT", "Conformer"] diff --git a/otx/cli/builder/builder.py b/otx/cli/builder/builder.py index 1c3aee71329..aabff5429ab 100644 --- a/otx/cli/builder/builder.py +++ b/otx/cli/builder/builder.py @@ -28,8 +28,8 @@ from mmcv.utils import Registry, build_from_cfg from torch import nn -from otx.api.entities.model_template import TaskType from otx.algorithms import TRANSFORMER_BACKBONES +from otx.api.entities.model_template import TaskType from otx.cli.utils.importing import ( get_backbone_list, get_backbone_registry, @@ -102,8 +102,8 @@ def update_backbone_args(backbone_config: dict, registry: Registry, backend: str def update_channels(model_config: MPAConfig, out_channels: Any): """Update in_channel of head or neck.""" - if hasattr(model_config.model, "neck"): - if model_config.model.neck.type == "GlobalAveragePooling": + if hasattr(model_config.model, "neck") and model_config.model.neck: + if model_config.model.neck.get("type", None) == "GlobalAveragePooling": model_config.model.neck.pop("in_channels", None) else: print(f"\tUpdate model.neck.in_channels: {out_channels}") diff --git a/otx/cli/builder/supported_backbone/mmcls.json b/otx/cli/builder/supported_backbone/mmcls.json index a63bfc636b3..71f10692aa5 100644 --- a/otx/cli/builder/supported_backbone/mmcls.json +++ b/otx/cli/builder/supported_backbone/mmcls.json @@ -299,7 +299,7 @@ "options": { "arch": ["base", "small"] }, - "available": ["CLASSIFICATION"] + "available": [] }, "mmcls.PCPVT": { "required": ["arch"], diff --git a/otx/mpa/cls/inferrer.py b/otx/mpa/cls/inferrer.py index 3c511d91212..9c7e5770219 100644 --- a/otx/mpa/cls/inferrer.py +++ b/otx/mpa/cls/inferrer.py @@ -56,6 +56,7 @@ def run(self, model_cfg, model_ckpt, data_cfg, **kwargs): dump_saliency_map = kwargs.get("dump_saliency_map", False) # TODO: It looks like we need to modify that code in an appropriate way. if model_cfg.model.head.get("type", None) == "VisionTransformerClsHead": + dump_features = False dump_saliency_map = False eval = kwargs.get("eval", False) outputs = self.infer( diff --git a/otx/mpa/cls/stage.py b/otx/mpa/cls/stage.py index d12eb790e05..dd78acbfffa 100644 --- a/otx/mpa/cls/stage.py +++ b/otx/mpa/cls/stage.py @@ -8,8 +8,8 @@ import torch from mmcv import ConfigDict, build_from_cfg -from otx.algorithms.classification.adapters.mmcls.utils.builder import build_classifier from otx.algorithms import TRANSFORMER_BACKBONES +from otx.algorithms.classification.adapters.mmcls.utils.builder import build_classifier from otx.mpa.stage import Stage from otx.mpa.utils.config_utils import recursively_update_cfg, update_or_add_custom_hook from otx.mpa.utils.logger import get_logger diff --git a/otx/mpa/modules/hooks/recording_forward_hooks.py b/otx/mpa/modules/hooks/recording_forward_hooks.py index 0935ce8c9e6..0a6a6cc2a63 100644 --- a/otx/mpa/modules/hooks/recording_forward_hooks.py +++ b/otx/mpa/modules/hooks/recording_forward_hooks.py @@ -117,12 +117,10 @@ def func(feature_map: Union[torch.Tensor, Sequence[torch.Tensor]], fpn_idx: int class FeatureVectorHook(BaseRecordingForwardHook): - def func(self, feature_map: Union[torch.Tensor, Sequence[torch.Tensor]]) -> torch.Tensor: + @staticmethod + def func(feature_map: Union[torch.Tensor, Sequence[torch.Tensor]]) -> torch.Tensor: """Generate the feature vector by average pooling feature maps.""" - if self._module.backbone.__class__.__name__ in TRANSFORMER_BACKBONES and isinstance(feature_map[-1], (tuple, list)): - # mmcls.VisionTransformer outputs Tuple[List[...]] and the last index of List is the final logit. - feature_vector, _ = feature_map[-1] - elif isinstance(feature_map, (list, tuple)): + if isinstance(feature_map, (list, tuple)): # aggregate feature maps from Feature Pyramid Network feature_vector = [torch.nn.functional.adaptive_avg_pool2d(f, (1, 1)) for f in feature_map] feature_vector = torch.cat(feature_vector, 1) From 631055474766018ea1506f4c4d2eb9ffe1219e4b Mon Sep 17 00:00:00 2001 From: "Kang, Harim" Date: Mon, 20 Mar 2023 22:36:57 +0900 Subject: [PATCH 5/5] Remove unused import --- otx/mpa/modules/hooks/recording_forward_hooks.py | 1 - 1 file changed, 1 deletion(-) diff --git a/otx/mpa/modules/hooks/recording_forward_hooks.py b/otx/mpa/modules/hooks/recording_forward_hooks.py index 0a6a6cc2a63..4b3fc7011e2 100644 --- a/otx/mpa/modules/hooks/recording_forward_hooks.py +++ b/otx/mpa/modules/hooks/recording_forward_hooks.py @@ -20,7 +20,6 @@ import torch from otx import MMCLS_AVAILABLE -from otx.algorithms import TRANSFORMER_BACKBONES if MMCLS_AVAILABLE: from mmcls.models.necks.gap import GlobalAveragePooling