diff --git a/src/otx/algorithms/classification/adapters/mmcls/configurer.py b/src/otx/algorithms/classification/adapters/mmcls/configurer.py index a77938d6b10..397026d5760 100644 --- a/src/otx/algorithms/classification/adapters/mmcls/configurer.py +++ b/src/otx/algorithms/classification/adapters/mmcls/configurer.py @@ -1,9 +1,9 @@ """Base configurer for mmdet config.""" + # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# -from typing import Optional +from typing import Optional, Tuple import torch from mmcv import build_from_cfg @@ -22,7 +22,6 @@ recursively_update_cfg, update_or_add_custom_hook, ) -from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.utils.logger import get_logger logger = get_logger() @@ -162,16 +161,19 @@ def configure_topk(cfg): @staticmethod def configure_input_size( - cfg, input_size_config: InputSizePreset = InputSizePreset.DEFAULT, model_ckpt_path: Optional[str] = None + cfg, input_size=Optional[Tuple[int, int]], model_ckpt_path: Optional[str] = None, training=True ): """Change input size if necessary.""" - manager = InputSizeManager(cfg) - input_size = manager.get_configured_input_size(input_size_config, model_ckpt_path) if input_size is None: # InputSizePreset.DEFAULT return + manager = InputSizeManager(cfg) + if input_size == (0, 0): # InputSizePreset.AUTO - input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager) + if training: + input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager, use_annotations=False) + else: + input_size = manager.get_trained_input_size(model_ckpt_path) if input_size is None: return diff --git a/src/otx/algorithms/classification/adapters/mmcls/nncf/task.py b/src/otx/algorithms/classification/adapters/mmcls/nncf/task.py index 31dcda81344..eefb7bf1de4 100644 --- a/src/otx/algorithms/classification/adapters/mmcls/nncf/task.py +++ b/src/otx/algorithms/classification/adapters/mmcls/nncf/task.py @@ -1,18 +1,7 @@ """NNCF Task for OTX Classification.""" -# Copyright (C) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# Copyright (C) 2022-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 from functools import partial from typing import List, Optional @@ -121,3 +110,6 @@ def _generate_training_metrics_group(self, learning_curves): output.append(LineMetricsGroup(metrics=[metric_curve], visualization_info=visualization_info)) return output, best_acc + + def _save_model_post_hook(self, modelinfo): + modelinfo["input_size"] = self._input_size diff --git a/src/otx/algorithms/classification/adapters/mmcls/task.py b/src/otx/algorithms/classification/adapters/mmcls/task.py index eb59b34ddb9..425ddc6153d 100644 --- a/src/otx/algorithms/classification/adapters/mmcls/task.py +++ b/src/otx/algorithms/classification/adapters/mmcls/task.py @@ -1,18 +1,7 @@ """Task of OTX Classification using mmclassification training backend.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import glob import os @@ -194,11 +183,12 @@ def configure( ir_options, data_classes, model_classes, - self._hyperparams.learning_parameters.input_size, + self._input_size, options_for_patch_datasets=options_for_patch_datasets, options_for_patch_evaluation=options_for_patch_evaluation, ) self._config = cfg + self._input_size = cfg.model.pop("input_size", None) return cfg def build_model( diff --git a/src/otx/algorithms/classification/task.py b/src/otx/algorithms/classification/task.py index 03da102188f..9050e736f0c 100644 --- a/src/otx/algorithms/classification/task.py +++ b/src/otx/algorithms/classification/task.py @@ -1,18 +1,7 @@ """Task of OTX Classification.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import io import json @@ -34,6 +23,7 @@ get_multihead_class_info as get_hierarchical_info, ) from otx.algorithms.common.configs import TrainType +from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.tasks.base_task import TRAIN_TYPE_DIR_PATH, OTXTask from otx.algorithms.common.utils import embed_ir_model_data from otx.algorithms.common.utils.callback import TrainingProgressCallback @@ -80,6 +70,7 @@ from otx.api.utils.dataset_utils import add_saliency_maps_to_dataset_item from otx.api.utils.labels_utils import get_empty_label from otx.cli.utils.multi_gpu import is_multigpu_child_process +from otx.core.data.caching.mem_cache_handler import MemCacheHandlerSingleton logger = get_logger() RECIPE_TRAIN_TYPE = { @@ -129,6 +120,12 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str] if self._task_environment.model is not None: self._load_model() + if hasattr(self._hyperparams.learning_parameters, "input_size"): + input_size_cfg = InputSizePreset(self._hyperparams.learning_parameters.input_size.value) + else: + input_size_cfg = InputSizePreset.DEFAULT + self._input_size = input_size_cfg.tuple + def _is_multi_label(self, label_groups: List[LabelGroup], all_labels: List[LabelEntity]): """Check whether the current training mode is multi-label or not.""" # NOTE: In the current Geti, multi-label should have `___` symbol for all group names. @@ -215,6 +212,8 @@ def train( results = self._train_model(dataset) + MemCacheHandlerSingleton.delete() + # Check for stop signal when training has stopped. If should_stop is true, training was cancelled and no new if self._should_stop: logger.info("Training cancelled.") @@ -476,6 +475,7 @@ def save_model(self, output_model: ModelEntity): "model": model_ckpt, "config": hyperparams_str, "labels": labels, + "input_size": self._input_size, "VERSION": 1, } diff --git a/src/otx/algorithms/common/adapters/mmcv/configurer.py b/src/otx/algorithms/common/adapters/mmcv/configurer.py index 149fd79f9f5..68e2ea6c35c 100644 --- a/src/otx/algorithms/common/adapters/mmcv/configurer.py +++ b/src/otx/algorithms/common/adapters/mmcv/configurer.py @@ -26,7 +26,6 @@ recursively_update_cfg, update_or_add_custom_hook, ) -from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.tasks.base_task import OnHookInitialized from otx.algorithms.common.utils import UncopiableDefaultDict, append_dist_rank_suffix from otx.algorithms.common.utils.data import compute_robust_dataset_statistics @@ -74,7 +73,7 @@ def configure( ir_options: Optional[Config] = None, data_classes: Optional[List[str]] = None, model_classes: Optional[List[str]] = None, - input_size: InputSizePreset = InputSizePreset.DEFAULT, + input_size: Optional[Tuple[int, int]] = None, **kwargs: Dict[Any, Any], ) -> Config: """Create MMCV-consumable config from given inputs.""" @@ -228,7 +227,7 @@ def configure_data_pipeline(self, cfg, input_size, model_ckpt_path, **kwargs): """Configuration data pipeline settings.""" patch_color_conversion(cfg) - self.configure_input_size(cfg, input_size, model_ckpt_path) + self.configure_input_size(cfg, input_size, model_ckpt_path, self.training) def configure_recipe(self, cfg, **kwargs): """Configuration training recipe settings.""" @@ -533,7 +532,15 @@ def adapt_input_size_to_dataset( stat = compute_robust_dataset_statistics(dataset, use_annotations) if not stat: return None - logger.info(f"Dataset stat: {json.dumps(stat, indent=4)}") + + def format_float(obj): + if isinstance(obj, float): + return f"{obj:.2f}" + if isinstance(obj, dict): + return {k: format_float(v) for k, v in obj.items()} + return obj + + logger.info(f"Dataset stat: {json.dumps(format_float(stat), indent=4)}") # Fit to typical large image size (conservative) # -> "avg" size might be preferrable for efficiency diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/adaptive_training_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/adaptive_training_hook.py index 64a7be8f338..f9e6fb345ff 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/adaptive_training_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/adaptive_training_hook.py @@ -23,9 +23,15 @@ class AdaptiveTrainSchedulingHook(Hook): """Adaptive Training Scheduling Hook. - Depending on the size of iteration per epoch, adaptively update the validation interval. + Depending on the size of iteration per epoch, adaptively update the validation interval and related values. Args: + base_lr_patience (int): The value of LR drop patience are expected in total epoch. + Patience used when interval is 1, Defaults to 5. + min_lr_patience (int): Minumum value of LR drop patience. + Defaults to 2. + base_es_patience (int): The value of Early-Stopping patience are expected in total epoch. + Patience used when interval is 1, Defaults to 10. max_interval (int): Maximum value of validation interval. Defaults to 5. decay (float): Parameter to control the interval. This value is set by manual manner. @@ -39,6 +45,10 @@ class AdaptiveTrainSchedulingHook(Hook): def __init__( self, max_interval=5, + base_lr_patience=5, + min_lr_patience=2, + base_es_patience=10, + min_es_patience=3, decay=-0.025, enable_adaptive_interval_hook=False, enable_eval_before_run=False, @@ -47,6 +57,10 @@ def __init__( super().__init__(**kwargs) self.max_interval = max_interval + self.base_lr_patience = base_lr_patience + self.min_lr_patience = min_lr_patience + self.base_es_patience = base_es_patience + self.min_es_patience = min_es_patience self.decay = decay self.enable_adaptive_interval_hook = enable_adaptive_interval_hook self.enable_eval_before_run = enable_eval_before_run @@ -84,13 +98,23 @@ def before_train_iter(self, runner): logger.info(f"Update EvalHook interval: {hook.interval} -> {adaptive_interval}") hook.interval = adaptive_interval elif isinstance(hook, LrUpdaterHook): + patience = max( + math.ceil((self.base_lr_patience / adaptive_interval)), + self.min_lr_patience, + ) if hasattr(hook, "interval") and hasattr(hook, "patience"): hook.interval = adaptive_interval - logger.info(f"Update LrUpdaterHook interval: {hook.interval} -> {adaptive_interval}") + hook.patience = patience + logger.info(f"Update LrUpdaterHook patience: {hook.patience} -> {patience}") elif isinstance(hook, EarlyStoppingHook): - logger.info(f"Update EarlyStoppingHook interval: {hook.interval} -> {adaptive_interval}") + patience = max( + math.ceil((self.base_es_patience / adaptive_interval)), + self.min_es_patience, + ) + logger.info(f"Update EarlyStoppingHook patience: {hook.patience} -> {patience}") hook.start = adaptive_interval hook.interval = adaptive_interval + hook.patience = patience elif isinstance(hook, CheckpointHook): # make sure checkpoint is saved at last limit = runner.max_epochs if hook.by_epoch else runner.max_iters diff --git a/src/otx/algorithms/common/adapters/mmcv/hooks/task_adapt_hook.py b/src/otx/algorithms/common/adapters/mmcv/hooks/task_adapt_hook.py index 29d4f0ad87b..193f156fafd 100644 --- a/src/otx/algorithms/common/adapters/mmcv/hooks/task_adapt_hook.py +++ b/src/otx/algorithms/common/adapters/mmcv/hooks/task_adapt_hook.py @@ -9,6 +9,7 @@ from otx.algorithms.common.adapters.torch.dataloaders.samplers import ( BalancedSampler, ClsIncrSampler, + OTXSampler, ) from otx.algorithms.common.utils.logger import get_logger @@ -58,6 +59,11 @@ def before_epoch(self, runner): collate_fn = runner.data_loader.collate_fn worker_init_fn = runner.data_loader.worker_init_fn rank, world_size = get_dist_info() + + if isinstance(runner.data_loader.sampler, OTXSampler): + repeat = runner.data_loader.sampler.repeat + else: + repeat = 1 if self.sampler_type == "balanced": sampler = BalancedSampler( dataset, @@ -65,6 +71,7 @@ def before_epoch(self, runner): efficient_mode=self.efficient_mode, num_replicas=world_size, rank=rank, + n_repeats=repeat, ) else: sampler = ClsIncrSampler( @@ -73,6 +80,7 @@ def before_epoch(self, runner): efficient_mode=self.efficient_mode, num_replicas=world_size, rank=rank, + n_repeats=repeat, ) runner.data_loader = DataLoader( dataset, diff --git a/src/otx/algorithms/common/adapters/mmcv/utils/config_utils.py b/src/otx/algorithms/common/adapters/mmcv/utils/config_utils.py index 2b211890232..007c64dfa30 100644 --- a/src/otx/algorithms/common/adapters/mmcv/utils/config_utils.py +++ b/src/otx/algorithms/common/adapters/mmcv/utils/config_utils.py @@ -682,12 +682,12 @@ def set_input_size(self, input_size: Union[int, List[int], Tuple[int, int]]): self._set_pipeline_size_value(pipelines, resize_ratio) # Set model size - # - needed only for YOLOX model_cfg = self._config.get("model", {}) + model_cfg["input_size"] = input_size if model_cfg.get("type", "") == "CustomYOLOX": + # - needed only for YOLOX if input_size[0] % 32 != 0 or input_size[1] % 32 != 0: raise ValueError("YOLOX should have input size being multiple of 32.") - model_cfg["input_size"] = input_size @property def base_input_size(self) -> Union[Tuple[int, int], Dict[str, Tuple[int, int]]]: @@ -862,38 +862,28 @@ def _set_size_value(pipeline: Dict, attr: str, scale: Tuple[Union[int, float], U pipeline[attr] = (round(pipeline[attr][0] * scale[0]), round(pipeline[attr][1] * scale[1])) @staticmethod - def get_configured_input_size( - input_size_config: InputSizePreset = InputSizePreset.DEFAULT, model_ckpt: Optional[str] = None - ) -> Optional[Tuple[int, int]]: - """Get configurable input size configuration. If it doesn't exist, return None. + def get_trained_input_size(model_ckpt: Optional[str] = None) -> Optional[Tuple[int, int]]: + """Get trained input size from checkpoint. If it doesn't exist, return None. Args: - input_size_config (InputSizePreset, optional): Input size setting. Defaults to InputSizePreset.DEFAULT. model_ckpt (Optional[str], optional): Model weight to load. Defaults to None. Returns: Optional[Tuple[int, int]]: Pair of width and height. If there is no input size configuration, return None. """ - input_size = None - if input_size_config == InputSizePreset.DEFAULT: - if model_ckpt is None: - return None - - model_info = torch.load(model_ckpt, map_location="cpu") - for key in ["config", "learning_parameters", "input_size", "value"]: - if key not in model_info: - return None - model_info = model_info[key] - input_size = model_info - - if input_size == InputSizePreset.DEFAULT.value: - return None - logger.info("Given model weight was trained with {} input size.".format(input_size)) + if model_ckpt is None: + return None - else: - input_size = input_size_config.value + model_info = torch.load(model_ckpt, map_location="cpu") + if model_info is None: + return None - return InputSizePreset.parse(input_size) + input_size = model_info.get("input_size", None) + if not input_size: + return None + + logger.info("Given model weight was trained with {} input size.".format(input_size)) + return input_size @staticmethod def select_closest_size(input_size: Tuple[int, int], preset_sizes: List[Tuple[int, int]]): diff --git a/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py b/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py index aa12338910c..1ddaaab7884 100644 --- a/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py +++ b/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/balanced_sampler.py @@ -49,7 +49,7 @@ def __init__( num_replicas: int = 1, rank: int = 0, drop_last: bool = False, - n_repeats: Union[float, int, str] = "auto", + n_repeats: Union[float, int, str] = 1, ): self.samples_per_gpu = samples_per_gpu self.num_replicas = num_replicas @@ -75,7 +75,9 @@ def __init__( self.num_trials = int(self.data_length / self.num_cls) self.num_samples = self._calculate_num_samples() - logger.info(f"This sampler will select balanced samples {self.num_trials} times") + logger.info( + "Balanced sampler will select balanced samples " f"{math.ceil(self.num_samples/samples_per_gpu)} times" + ) def _calculate_num_samples(self): num_samples = self.num_trials * self.num_cls * self.repeat diff --git a/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/cls_incr_sampler.py b/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/cls_incr_sampler.py index ee3fca43699..58622a354f0 100644 --- a/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/cls_incr_sampler.py +++ b/src/otx/algorithms/common/adapters/torch/dataloaders/samplers/cls_incr_sampler.py @@ -47,7 +47,7 @@ def __init__( num_replicas: int = 1, rank: int = 0, drop_last: bool = False, - n_repeats: Union[float, int, str] = "auto", + n_repeats: Union[float, int, str] = 1, ): self.samples_per_gpu = samples_per_gpu self.num_replicas = num_replicas diff --git a/src/otx/algorithms/common/adapters/torch/utils/bs_search_algo.py b/src/otx/algorithms/common/adapters/torch/utils/bs_search_algo.py index 0e8b7343ac6..5b1457c6ede 100644 --- a/src/otx/algorithms/common/adapters/torch/utils/bs_search_algo.py +++ b/src/otx/algorithms/common/adapters/torch/utils/bs_search_algo.py @@ -6,6 +6,7 @@ from typing import Callable, Dict, Tuple import torch +import torch.distributed as dist from otx.algorithms.common.utils.logger import get_logger @@ -40,7 +41,7 @@ def __init__(self, train_func: Callable[[int], None], default_bs: int, max_bs: i def _try_batch_size(self, bs: int) -> Tuple[bool, int]: cuda_oom = False - torch.cuda.reset_max_memory_allocated(device=None) + torch.cuda.reset_max_memory_cached(device=None) torch.cuda.empty_cache() try: @@ -51,18 +52,42 @@ def _try_batch_size(self, bs: int) -> Tuple[bool, int]: else: raise e - max_memory_allocated = torch.cuda.max_memory_allocated(device=None) + max_memory_reserved = torch.cuda.max_memory_reserved(device=None) + + if dist.is_initialized(): # Aggregate all results and broadcast to all processes + rank = dist.get_rank() + try_result = torch.tensor([int(cuda_oom), max_memory_reserved], dtype=torch.int64).cuda() + + if rank == 0: + try_result_arr = [torch.empty(2, dtype=torch.int64).cuda() for _ in range(dist.get_world_size())] + dist.gather(try_result, gather_list=try_result_arr, dst=0) + else: + dist.gather(try_result, dst=0) + + if rank == 0: + try_result_arr = torch.stack(try_result_arr) + cuda_oom = torch.any(try_result_arr[:, 0]) # type: ignore + max_memory_reserved = torch.max(try_result_arr[:, 1]) # type: ignore + total_try_result = torch.tensor([cuda_oom, max_memory_reserved], dtype=torch.int64).cuda() + else: + total_try_result = torch.empty(2, dtype=torch.int64).cuda() + + dist.broadcast(total_try_result, src=0) + + cuda_oom = total_try_result[0].bool().item() + max_memory_reserved = total_try_result[1].item() + if not cuda_oom: # Because heapq only supports min heap, use negatized batch size - self._bs_try_history[bs] = max_memory_allocated + self._bs_try_history[bs] = max_memory_reserved logger.debug( f"Adapting Batch size => bs : {bs}, CUDA_OOM : {cuda_oom}, " - f"GPU memory usage : {max_memory_allocated / self._total_mem}%" + f"GPU memory usage : {max_memory_reserved / self._total_mem}%" ) torch.cuda.empty_cache() - return cuda_oom, max_memory_allocated + return cuda_oom, max_memory_reserved @staticmethod def _get_even_center_val(val1: int, val2: int) -> int: @@ -82,10 +107,10 @@ def auto_decrease_batch_size(self) -> int: lowest_unavailable_bs = self._default_bs + 2 while True: - cuda_oom, max_memory_allocated = self._try_batch_size(current_bs) + cuda_oom, max_memory_reserved = self._try_batch_size(current_bs) # If GPU memory usage is too close to limit, CUDA OOM can be raised during training - if cuda_oom or max_memory_allocated > self._mem_upper_bound: + if cuda_oom or max_memory_reserved > self._mem_upper_bound: if current_bs < lowest_unavailable_bs: lowest_unavailable_bs = current_bs current_bs = self._get_even_center_val(current_bs, available_bs) diff --git a/src/otx/algorithms/common/utils/utils.py b/src/otx/algorithms/common/utils/utils.py index cd8bcd653b5..3eede66c23c 100644 --- a/src/otx/algorithms/common/utils/utils.py +++ b/src/otx/algorithms/common/utils/utils.py @@ -1,18 +1,7 @@ """Collections of Utils for common OTX algorithms.""" -# Copyright (C) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# Copyright (C) 2022-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 import importlib import inspect @@ -98,7 +87,7 @@ def get_arg_spec( # noqa: C901 # pylint: disable=too-many-branches return tuple(args) -def set_random_seed(seed, logger, deterministic=False): +def set_random_seed(seed, logger=None, deterministic=False): """Set random seed. Args: @@ -116,7 +105,8 @@ def set_random_seed(seed, logger, deterministic=False): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) os.environ["PYTHONHASHSEED"] = str(seed) - logger.info(f"Training seed was set to {seed} w/ deterministic={deterministic}.") + if logger: + logger.info(f"Training seed was set to {seed} w/ deterministic={deterministic}.") if deterministic: torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False diff --git a/src/otx/algorithms/detection/adapters/mmdet/configurer.py b/src/otx/algorithms/detection/adapters/mmdet/configurer.py index 876e05ca822..a176d64e3a3 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/configurer.py +++ b/src/otx/algorithms/detection/adapters/mmdet/configurer.py @@ -1,9 +1,9 @@ """Base configurer for mmdet config.""" + # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# -from typing import Optional +from typing import Optional, Tuple from mmcv.utils import ConfigDict @@ -13,7 +13,6 @@ from otx.algorithms.common.adapters.mmcv.utils.config_utils import ( InputSizeManager, ) -from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.utils.logger import get_logger from otx.algorithms.detection.adapters.mmdet.utils import ( cluster_anchors, @@ -154,9 +153,12 @@ def configure_bbox_head(self, cfg): @staticmethod def configure_input_size( - cfg, input_size_config: InputSizePreset = InputSizePreset.DEFAULT, model_ckpt_path: Optional[str] = None + cfg, input_size=Optional[Tuple[int, int]], model_ckpt_path: Optional[str] = None, training=True ): """Change input size if necessary.""" + if input_size is None: # InputSizePreset.DEFAULT + return + # YOLOX tiny has a different input size in train and val data pipeline base_input_size = None model_cfg = cfg.get("model") @@ -168,15 +170,13 @@ def configure_input_size( "test": (416, 416), "unlabeled": (992, 736), } - manager = InputSizeManager(cfg, base_input_size) - input_size = manager.get_configured_input_size(input_size_config, model_ckpt_path) - if input_size is None: # InputSizePreset.DEFAULT - return - if input_size == (0, 0): # InputSizePreset.AUTO - input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager, use_annotations=True) + if training: + input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager, use_annotations=True) + else: + input_size = manager.get_trained_input_size(model_ckpt_path) if input_size is None: return diff --git a/src/otx/algorithms/detection/adapters/mmdet/nncf/task.py b/src/otx/algorithms/detection/adapters/mmdet/nncf/task.py index 25e4116d759..01b912d3791 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/nncf/task.py +++ b/src/otx/algorithms/detection/adapters/mmdet/nncf/task.py @@ -1,19 +1,7 @@ """NNCF Task of OTX Detection.""" -# Copyright (C) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. - +# Copyright (C) 2022-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 from functools import partial from typing import Optional @@ -124,3 +112,4 @@ def _save_model_post_hook(self, modelinfo): self._update_anchors(modelinfo["anchors"], self.config.model.bbox_head.anchor_generator) modelinfo["confidence_threshold"] = self.confidence_threshold + modelinfo["input_size"] = self._input_size diff --git a/src/otx/algorithms/detection/adapters/mmdet/task.py b/src/otx/algorithms/detection/adapters/mmdet/task.py index 39b76b4ad9d..bf8079b15e1 100644 --- a/src/otx/algorithms/detection/adapters/mmdet/task.py +++ b/src/otx/algorithms/detection/adapters/mmdet/task.py @@ -1,18 +1,7 @@ """Task of OTX Detection using mmdetection training backend.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import glob import io @@ -186,7 +175,7 @@ def configure(self, training=True, ir_options=None, train_dataset=None, export=F ir_options, data_classes, model_classes, - self._hyperparams.learning_parameters.input_size, + self._input_size, train_dataset=train_dataset, ) if should_cluster_anchors(self._recipe_cfg): @@ -195,6 +184,7 @@ def configure(self, training=True, ir_options=None, train_dataset=None, export=F elif self._anchors is not None: self._update_anchors(cfg.model.bbox_head.anchor_generator, self._anchors) self._config = cfg + self._input_size = cfg.model.pop("input_size", None) return cfg @@ -697,6 +687,7 @@ def save_model(self, output_model: ModelEntity): "config": hyperparams_str, "labels": labels, "confidence_threshold": self.confidence_threshold, + "input_size": self._input_size, "VERSION": 1, } if self.config is not None and should_cluster_anchors(self.config): diff --git a/src/otx/algorithms/detection/task.py b/src/otx/algorithms/detection/task.py index 9a3d9cca885..a3bd184f690 100644 --- a/src/otx/algorithms/detection/task.py +++ b/src/otx/algorithms/detection/task.py @@ -1,18 +1,7 @@ """Task of OTX Detection.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import io import os @@ -24,6 +13,7 @@ import torch from mmcv.utils import ConfigDict +from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.tasks.base_task import TRAIN_TYPE_DIR_PATH, OTXTask from otx.algorithms.common.utils.callback import ( InferenceProgressCallback, @@ -65,6 +55,7 @@ from otx.api.usecases.tasks.interfaces.export_interface import ExportType from otx.api.utils.dataset_utils import add_saliency_maps_to_dataset_item from otx.cli.utils.multi_gpu import is_multigpu_child_process +from otx.core.data.caching.mem_cache_handler import MemCacheHandlerSingleton logger = get_logger() @@ -103,6 +94,15 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str] else: self.data_pipeline_path = os.path.join(self._model_dir, "data_pipeline.py") + if hasattr(self._hyperparams.learning_parameters, "input_size"): + input_size_cfg = InputSizePreset(self._hyperparams.learning_parameters.input_size.value) + else: + input_size_cfg = InputSizePreset.DEFAULT + if self._hyperparams.tiling_parameters.enable_tiling: + # Disable auto input size if tiling is enabled + input_size_cfg = InputSizePreset.DEFAULT + self._input_size = input_size_cfg.tuple + def _load_postprocessing(self, model_data): """Load postprocessing configs form PyTorch model. @@ -231,6 +231,8 @@ def train( val_dataset.purpose = DatasetPurpose.INFERENCE val_preds, val_map = self._infer_model(val_dataset, InferenceParameters(is_evaluation=True)) + MemCacheHandlerSingleton.delete() + preds_val_dataset = val_dataset.with_empty_annotations() if self._hyperparams.postprocessing.result_based_confidence_threshold: confidence_threshold = 0.0 # Use all predictions to compute best threshold @@ -588,6 +590,7 @@ def save_model(self, output_model: ModelEntity): "config": hyperparams_str, "labels": labels, "confidence_threshold": self.confidence_threshold, + "input_size": self._input_size, "VERSION": 1, } torch.save(modelinfo, buffer) diff --git a/src/otx/algorithms/segmentation/adapters/mmseg/configurer.py b/src/otx/algorithms/segmentation/adapters/mmseg/configurer.py index f0f89cd22b6..b1c45dfab6c 100644 --- a/src/otx/algorithms/segmentation/adapters/mmseg/configurer.py +++ b/src/otx/algorithms/segmentation/adapters/mmseg/configurer.py @@ -1,11 +1,11 @@ """Base configurer for mmseg config.""" + # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# import os from collections import OrderedDict -from typing import Any, Optional +from typing import Any, Optional, Tuple import torch from mmcv.runner import CheckpointLoader @@ -18,7 +18,6 @@ InputSizeManager, remove_custom_hook, ) -from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.utils import append_dist_rank_suffix from otx.algorithms.common.utils.logger import get_logger from otx.algorithms.segmentation.adapters.mmseg.models.heads import otx_head_factory @@ -147,9 +146,12 @@ def patch_chkpt(ckpt_path: str, new_path: Optional[str] = None) -> str: @staticmethod def configure_input_size( - cfg, input_size_config: InputSizePreset = InputSizePreset.DEFAULT, model_ckpt_path: Optional[str] = None + cfg, input_size=Optional[Tuple[int, int]], model_ckpt_path: Optional[str] = None, training=True ): """Change input size if necessary.""" + if input_size is None: # InputSizePreset.DEFAULT + return + # Segmentation models have different input size in train and val data pipeline base_input_size = { "train": 512, @@ -157,15 +159,13 @@ def configure_input_size( "test": 544, "unlabeled": 512, } - manager = InputSizeManager(cfg, base_input_size) - input_size = manager.get_configured_input_size(input_size_config, model_ckpt_path) - if input_size is None: # InputSizePreset.DEFAULT - return - if input_size == (0, 0): # InputSizePreset.AUTO - input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager) + if training: + input_size = BaseConfigurer.adapt_input_size_to_dataset(cfg, manager, use_annotations=False) + else: + input_size = manager.get_trained_input_size(model_ckpt_path) if input_size is None: return diff --git a/src/otx/algorithms/segmentation/adapters/mmseg/nncf/task.py b/src/otx/algorithms/segmentation/adapters/mmseg/nncf/task.py index 36fc0287fda..c4979e607d7 100644 --- a/src/otx/algorithms/segmentation/adapters/mmseg/nncf/task.py +++ b/src/otx/algorithms/segmentation/adapters/mmseg/nncf/task.py @@ -1,18 +1,7 @@ """NNCF Task of OTX Segmentation.""" -# Copyright (C) 2022 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# Copyright (C) 2022-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 from functools import partial from typing import List, Optional @@ -122,3 +111,6 @@ def _generate_training_metrics_group(self, learning_curves): visualization_info = LineChartInfo(name=key, x_axis_label="Epoch", y_axis_label=key) output.append(MetricsGroup(metrics=[metric_curve], visualization_info=visualization_info)) return output, best_score + + def _save_model_post_hook(self, modelinfo): + modelinfo["input_size"] = self._input_size diff --git a/src/otx/algorithms/segmentation/adapters/mmseg/task.py b/src/otx/algorithms/segmentation/adapters/mmseg/task.py index 272ce57cd4d..0c671a06820 100644 --- a/src/otx/algorithms/segmentation/adapters/mmseg/task.py +++ b/src/otx/algorithms/segmentation/adapters/mmseg/task.py @@ -1,18 +1,7 @@ """Task of OTX Segmentation using mmsegmentation training backend.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import glob import io @@ -162,9 +151,10 @@ def configure( ir_options, data_classes, model_classes, - self._hyperparams.learning_parameters.input_size, + self._input_size, ) self._config = cfg + self._input_size = cfg.model.pop("input_size", None) return cfg @@ -553,6 +543,7 @@ def save_model(self, output_model: ModelEntity): "model": model_ckpt, "config": hyperparams_str, "labels": labels, + "input_size": self._input_size, "VERSION": 1, } diff --git a/src/otx/algorithms/segmentation/task.py b/src/otx/algorithms/segmentation/task.py index a62270bb13c..cca2befe81d 100644 --- a/src/otx/algorithms/segmentation/task.py +++ b/src/otx/algorithms/segmentation/task.py @@ -1,18 +1,7 @@ """Task of OTX Segmentation.""" # Copyright (C) 2023 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. +# SPDX-License-Identifier: Apache-2.0 import io import os @@ -23,6 +12,7 @@ import torch from mmcv.utils import ConfigDict +from otx.algorithms.common.configs.configuration_enums import InputSizePreset from otx.algorithms.common.configs.training_base import TrainType from otx.algorithms.common.tasks.base_task import TRAIN_TYPE_DIR_PATH, OTXTask from otx.algorithms.common.utils.callback import ( @@ -70,6 +60,7 @@ create_hard_prediction_from_soft_prediction, ) from otx.cli.utils.multi_gpu import is_multigpu_child_process +from otx.core.data.caching.mem_cache_handler import MemCacheHandlerSingleton logger = get_logger() RECIPE_TRAIN_TYPE = { @@ -109,6 +100,12 @@ def __init__(self, task_environment: TaskEnvironment, output_path: Optional[str] self.data_pipeline_path = os.path.join(self._model_dir, "data_pipeline.py") + if hasattr(self._hyperparams.learning_parameters, "input_size"): + input_size_cfg = InputSizePreset(self._hyperparams.learning_parameters.input_size.value) + else: + input_size_cfg = InputSizePreset.DEFAULT + self._input_size = input_size_cfg.tuple + def infer( self, dataset: DatasetEntity, @@ -171,6 +168,8 @@ def train( results = self._train_model(dataset) + MemCacheHandlerSingleton.delete() + # Check for stop signal when training has stopped. If should_stop is true, training was cancelled and no new if self._should_stop: logger.info("Training cancelled.") @@ -320,6 +319,7 @@ def save_model(self, output_model: ModelEntity): "model": model_ckpt, "config": hyperparams_str, "labels": labels, + "input_size": self._input_size, "VERSION": 1, } diff --git a/tests/e2e/cli/anomaly/reference/ote_anomaly_classification_padim/compressed_model.yml b/tests/e2e/cli/anomaly/reference/ote_anomaly_classification_padim/compressed_model.yml index d6d8e5a5f0d..bf922ada0f0 100644 --- a/tests/e2e/cli/anomaly/reference/ote_anomaly_classification_padim/compressed_model.yml +++ b/tests/e2e/cli/anomaly/reference/ote_anomaly_classification_padim/compressed_model.yml @@ -1,7 +1,5 @@ TestToolsAnomalyClassification: nncf: - number_of_fakequantizers: 26 - pot: - number_of_fakequantizers: 28 - ptq: number_of_fakequantizers: 27 + ptq: + number_of_fakequantizers: 28 diff --git a/tests/e2e/cli/anomaly/reference/ote_anomaly_detection_padim/compressed_model.yml b/tests/e2e/cli/anomaly/reference/ote_anomaly_detection_padim/compressed_model.yml index a54deb96c19..aa1b8c764c0 100644 --- a/tests/e2e/cli/anomaly/reference/ote_anomaly_detection_padim/compressed_model.yml +++ b/tests/e2e/cli/anomaly/reference/ote_anomaly_detection_padim/compressed_model.yml @@ -1,7 +1,5 @@ TestToolsAnomalyDetection: nncf: - number_of_fakequantizers: 26 - pot: - number_of_fakequantizers: 28 - ptq: number_of_fakequantizers: 27 + ptq: + number_of_fakequantizers: 28 diff --git a/tests/e2e/cli/anomaly/reference/ote_anomaly_segmentation_padim/compressed_model.yml b/tests/e2e/cli/anomaly/reference/ote_anomaly_segmentation_padim/compressed_model.yml index 6b1c3affdf9..f476fb6f822 100644 --- a/tests/e2e/cli/anomaly/reference/ote_anomaly_segmentation_padim/compressed_model.yml +++ b/tests/e2e/cli/anomaly/reference/ote_anomaly_segmentation_padim/compressed_model.yml @@ -1,7 +1,5 @@ TestToolsAnomalySegmentation: nncf: - number_of_fakequantizers: 26 - pot: - number_of_fakequantizers: 28 - ptq: number_of_fakequantizers: 27 + ptq: + number_of_fakequantizers: 28 diff --git a/tests/e2e/cli/classification/test_api_xai_sanity_classification.py b/tests/e2e/cli/classification/test_api_xai_sanity_classification.py index d44a9e1684e..39060672ee8 100644 --- a/tests/e2e/cli/classification/test_api_xai_sanity_classification.py +++ b/tests/e2e/cli/classification/test_api_xai_sanity_classification.py @@ -47,10 +47,12 @@ def saliency_maps_check( assert metadata.data.numpy.ndim == 3, "Number of dims is incorrect." assert metadata.data.numpy.shape == (data_point.height, data_point.width, 3) else: - assert metadata.data.numpy.ndim == 2, "Raw saliency map ahs to be two-dimensional." + assert metadata.data.numpy.ndim == 2, "Raw saliency map has to be two-dimensional." if raw_sal_map_shape: - assert metadata.data.numpy.shape == raw_sal_map_shape, "Raw sak map shape is incorrect." - assert metadata.data.numpy.dtype == np.uint8, "Sal map has to be uint8 dtype." + assert ( + metadata.data.numpy.shape == raw_sal_map_shape + ), "Raw saliency map shape is incorrect." + assert metadata.data.numpy.dtype == np.uint8, "Saliency map has to be uint8 dtype." if only_predicted: assert saliency_map_counter == len(data_point.annotation_scene.get_labels()), assert_text_explain_predicted else: diff --git a/tests/e2e/cli/detection/test_api_xai_sanity_detection.py b/tests/e2e/cli/detection/test_api_xai_sanity_detection.py index 287c14325f6..02024d54bfd 100644 --- a/tests/e2e/cli/detection/test_api_xai_sanity_detection.py +++ b/tests/e2e/cli/detection/test_api_xai_sanity_detection.py @@ -8,6 +8,8 @@ import torch +from otx.algorithms.common.configs.configuration_enums import InputSizePreset +from otx.algorithms.common.utils import set_random_seed from otx.algorithms.detection.adapters.mmdet.task import MMDetectionTask from otx.algorithms.detection.adapters.openvino.task import OpenVINODetectionTask from otx.api.entities.inference_parameters import InferenceParameters @@ -22,7 +24,7 @@ from tests.integration.api.detection.api_detection import DetectionTaskAPIBase, DEFAULT_DET_TEMPLATE_DIR from tests.test_suite.e2e_test_system import e2e_pytest_api -torch.manual_seed(0) +set_random_seed(0) assert_text_explain_all = "The number of saliency maps should be equal to the number of all classes." assert_text_explain_predicted = "The number of saliency maps should be equal to the number of predicted classes." @@ -30,7 +32,7 @@ class TestOVDetXAIAPI(DetectionTaskAPIBase): ref_raw_saliency_shapes = { - "MobileNetV2-ATSS": (6, 8), + "MobileNetV2-ATSS": (4, 4), # Need to be adapted to configurable or adaptive input size } @e2e_pytest_api @@ -39,6 +41,7 @@ def test_inference_xai(self): hyper_parameters, model_template = self.setup_configurable_parameters( DEFAULT_DET_TEMPLATE_DIR, num_iters=15 ) + hyper_parameters.learning_parameters.input_size = InputSizePreset._512x512 # To fix saliency map size task_env, dataset = self.init_environment(hyper_parameters, model_template, 10) train_task = MMDetectionTask(task_environment=task_env) diff --git a/tests/e2e/cli/detection/test_detection.py b/tests/e2e/cli/detection/test_detection.py index a9692ac44d8..a5eb2ead206 100644 --- a/tests/e2e/cli/detection/test_detection.py +++ b/tests/e2e/cli/detection/test_detection.py @@ -147,8 +147,8 @@ def test_otx_eval(self, template, tmp_dir_path): @pytest.mark.parametrize("template", templates, ids=templates_ids) @pytest.mark.parametrize("half_precision", [True, False]) def test_otx_eval_openvino(self, template, tmp_dir_path, half_precision): - if template.name == "YOLOX-L": - pytest.skip(reason="Issue#2518: YOLOX-L, Tiling-ATSS showed 0.0 after export") + if template.name == "YOLOX-L" or template.name == "SSD": + pytest.skip(reason="Issue#2548: Exported model performance is too low") tmp_dir_path = tmp_dir_path / "detection" otx_eval_openvino_testing(template, tmp_dir_path, otx_dir, args, threshold=0.2, half_precision=half_precision) @@ -221,6 +221,8 @@ def test_otx_deploy_openvino(self, template, tmp_dir_path): def test_otx_eval_deployment(self, template, tmp_dir_path): if template.name == "YOLOX-L": pytest.skip(reason="Issue#2518: YOLOX-L, Tiling-ATSS showed 0.0 after export") + if template.name == "SSD": + pytest.skip(reason="Issue#2548: Exported model performance is too low") tmp_dir_path = tmp_dir_path / "detection" otx_eval_deployment_testing(template, tmp_dir_path, otx_dir, args, threshold=0.0) diff --git a/tests/e2e/cli/detection/test_tiling_detection.py b/tests/e2e/cli/detection/test_tiling_detection.py index 76d3cbf0d90..b123f5dc502 100644 --- a/tests/e2e/cli/detection/test_tiling_detection.py +++ b/tests/e2e/cli/detection/test_tiling_detection.py @@ -128,8 +128,6 @@ def test_otx_eval(self, template, tmp_dir_path): @pytest.mark.parametrize("template", templates, ids=templates_ids) @pytest.mark.parametrize("half_precision", [True, False]) def test_otx_eval_openvino(self, template, tmp_dir_path, half_precision): - if template.name == "MobileNetV2-ATSS": - pytest.skip(reason="Issue#2518: YOLOX-L, Tiling-ATSS showed 0.0 after export") tmp_dir_path = tmp_dir_path / "tiling_det" otx_eval_openvino_testing(template, tmp_dir_path, otx_dir, args, threshold=0.2, half_precision=half_precision) diff --git a/tests/test_suite/run_test_command.py b/tests/test_suite/run_test_command.py index 5d5db2d82db..0b2d6ebbfe2 100644 --- a/tests/test_suite/run_test_command.py +++ b/tests/test_suite/run_test_command.py @@ -8,6 +8,7 @@ import os import shutil import sys +import torch from pathlib import Path from typing import Dict import onnx @@ -244,6 +245,12 @@ def otx_export_testing(template, root, dump_features=False, half_precision=False else: assert os.path.exists(path_to_xml) assert os.path.exists(os.path.join(save_path, "openvino.bin")) + ckpt = torch.load(f"{template_work_dir}/trained_{template.model_template_id}/models/weights.pth") + input_size = ckpt.get("input_size", None) + if input_size: + with open(path_to_xml, encoding="utf-8") as xml_stream: + xml_model = xml_stream.read() + assert f"{input_size[1]},{input_size[0]}" in xml_model else: if "Visual_Prompting" in template.model_template_id: assert os.path.exists(os.path.join(save_path, "visual_prompting_image_encoder.onnx")) @@ -688,6 +695,14 @@ def nncf_export_testing(template, root): f"{template_work_dir}/exported_nncf_{template.model_template_id}/openvino.bin" ) assert compressed_bin_size < original_bin_size, f"{compressed_bin_size=}, {original_bin_size=}" + ckpt = torch.load(f"{template_work_dir}/nncf_{template.model_template_id}/weights.pth") + input_size = ckpt.get("input_size", None) + if input_size: + with open( + f"{template_work_dir}/exported_nncf_{template.model_template_id}/openvino.xml", encoding="utf-8" + ) as xml_stream: + xml_model = xml_stream.read() + assert f"{input_size[1]},{input_size[0]}" in xml_model def nncf_validate_fq_testing(template, root, otx_dir, task_type, test_name): diff --git a/tests/unit/algorithms/classification/adapters/mmcls/test_configurer.py b/tests/unit/algorithms/classification/adapters/mmcls/test_configurer.py index fd6f26d0805..ab513913749 100644 --- a/tests/unit/algorithms/classification/adapters/mmcls/test_configurer.py +++ b/tests/unit/algorithms/classification/adapters/mmcls/test_configurer.py @@ -66,7 +66,7 @@ def test_configure(self, mocker): mock_cfg_merge.assert_called_once_with(model_cfg, data_cfg, self.data_pipeline_path, None) mock_cfg_ckpt.assert_called_once_with(model_cfg, "") mock_cfg_env.assert_called_once_with(model_cfg) - mock_cfg_data_pipeline.assert_called_once_with(model_cfg, InputSizePreset.DEFAULT, "") + mock_cfg_data_pipeline.assert_called_once_with(model_cfg, None, "") mock_cfg_recipe.assert_called_once_with(model_cfg) mock_cfg_model.assert_called_once_with(model_cfg, None, None, None) mock_cfg_hook.assert_called_once_with(model_cfg) @@ -156,33 +156,31 @@ def test_configure_samples_per_gpu(self): @e2e_pytest_unit @pytest.mark.parametrize("input_size", [None, (0, 0), (128, 128)]) - def test_configure_input_size(self, mocker, input_size): + @pytest.mark.parametrize("training", [True, False]) + def test_configure_input_size(self, mocker, input_size, training): # prepare mock_cfg = mocker.MagicMock() mock_input_manager_cls = mocker.patch.object(configurer, "InputSizeManager") mock_input_manager = mock_input_manager_cls.return_value - mock_input_manager.get_configured_input_size.return_value = input_size + mock_input_manager.get_trained_input_size.return_value = (32, 32) mock_input_manager_cls.return_value = mock_input_manager mock_base_configurer_cls = mocker.patch.object(configurer, "BaseConfigurer") mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = (64, 64) # execute - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) + self.configurer.configure_input_size(mock_cfg, input_size, "ckpt/path", training=training) # check if input_size is None: mock_input_manager.set_input_size.assert_not_called() elif input_size == (0, 0): - mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + if training: + mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + else: + mock_input_manager.set_input_size.assert_called_once_with((32, 32)) else: mock_input_manager.set_input_size.assert_called_once_with(input_size) - if input_size == (0, 0): - mock_input_manager.set_input_size = mocker.MagicMock() - mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = None - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) - mock_input_manager.set_input_size.assert_not_called() - @e2e_pytest_unit def test_configure_fp16(self): model_cfg = copy.deepcopy(self.model_cfg) diff --git a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_adaptive_training_hooks.py b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_adaptive_training_hooks.py index 71716effd62..51a30756b97 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/hooks/test_adaptive_training_hooks.py +++ b/tests/unit/algorithms/common/adapters/mmcv/hooks/test_adaptive_training_hooks.py @@ -86,7 +86,7 @@ def test_before_train_iter(self) -> None: assert hook._original_interval is None assert eval_hook.interval == 4 assert lr_hook.interval == 4 - assert lr_hook.patience == 1 + assert lr_hook.patience == 2 assert early_hook.interval == 4 - assert early_hook.patience == 1 + assert early_hook.patience == 3 assert ckpt_hook.interval == 4 diff --git a/tests/unit/algorithms/common/adapters/mmcv/utils/test_config_utils.py b/tests/unit/algorithms/common/adapters/mmcv/utils/test_config_utils.py index 00405854eb9..1f250eee3ff 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/utils/test_config_utils.py +++ b/tests/unit/algorithms/common/adapters/mmcv/utils/test_config_utils.py @@ -295,11 +295,11 @@ def get_mock_model_ckpt(case): if case == "none": return None if case == "no_input_size": - return {"config": {}} + return {} if case == "input_size_default": - return {"config": {"learning_parameters": {"input_size": {"value": "Default"}}}} + return {"input_size": None} if case == "input_size_exist": - return {"config": {"learning_parameters": {"input_size": {"value": "512x512"}}}} + return {"input_size": (512, 512)} @e2e_pytest_unit @@ -408,40 +408,20 @@ def test_get_input_size_from_cfg(self, test_case): assert input_size_manager.get_input_size_from_cfg("train") == input_size @e2e_pytest_unit - @pytest.mark.parametrize( - "input_size_config", [InputSizePreset.DEFAULT, InputSizePreset.AUTO, InputSizePreset._1024x1024] - ) @pytest.mark.parametrize("model_ckpt_case", ["none", "no_input_size", "input_size_default", "input_size_exist"]) - def test_get_configured_input_size(self, mocker, input_size_config, model_ckpt_case): + def test_get_trained_input_size(self, mocker, model_ckpt_case): # prepare mock_torch = mocker.patch.object(config_utils, "torch") mock_torch.load.return_value = get_mock_model_ckpt(model_ckpt_case) - input_size_parser = re.compile("(\d+)x(\d+)") - - if input_size_config == InputSizePreset.DEFAULT: - if ( - model_ckpt_case == "none" - or model_ckpt_case == "no_input_size" - or model_ckpt_case == "input_size_default" - ): - expected_value = None - elif model_ckpt_case == "input_size_exist": - input_size = get_mock_model_ckpt(model_ckpt_case)["config"]["learning_parameters"]["input_size"][ - "value" - ] - pattern = input_size_parser.search(input_size) - expected_value = (int(pattern.group(1)), int(pattern.group(2))) - elif input_size_config == InputSizePreset.AUTO: - expected_value = (0, 0) + + if model_ckpt_case == "none" or model_ckpt_case == "no_input_size" or model_ckpt_case == "input_size_default": + expected_value = None else: - pattern = input_size_parser.search(input_size_config.value) - expected_value = (int(pattern.group(1)), int(pattern.group(2))) + expected_value = (512, 512) # check expected value is returned assert ( - InputSizeManager.get_configured_input_size( - input_size_config, None if model_ckpt_case == "none" else mocker.MagicMock() - ) + InputSizeManager.get_trained_input_size(None if model_ckpt_case == "none" else mocker.MagicMock()) == expected_value ) diff --git a/tests/unit/algorithms/common/adapters/torch/utils/test_bs_search_algo.py b/tests/unit/algorithms/common/adapters/torch/utils/test_bs_search_algo.py index d0649dc29bf..a347968dc5e 100644 --- a/tests/unit/algorithms/common/adapters/torch/utils/test_bs_search_algo.py +++ b/tests/unit/algorithms/common/adapters/torch/utils/test_bs_search_algo.py @@ -1,4 +1,7 @@ +from typing import Optional, List + import pytest +import torch from tests.test_suite.e2e_test_system import e2e_pytest_unit from otx.algorithms.common.adapters.torch.utils import BsSearchAlgo @@ -11,6 +14,8 @@ class TestBsSearchAlgo: def setup_test(self, mocker): self.mock_torch = mocker.patch.object(bs_search_algo, "torch") self.mock_torch.cuda.mem_get_info.return_value = (1, 10000) + self.mock_dist = mocker.patch.object(bs_search_algo, "dist") + self.mock_dist.is_initialized.return_value = False def test_init(self, mocker): BsSearchAlgo(mocker.MagicMock(), 4, 10) @@ -35,11 +40,122 @@ def mock_train_func(batch_size): else: mem_usage = 8500 * batch_size / max_runnable_bs - self.mock_torch.cuda.max_memory_allocated.return_value = mem_usage + self.mock_torch.cuda.max_memory_reserved.return_value = mem_usage return mem_usage return mock_train_func + def test_try_batch_size(self): + mock_train_func = self.get_mock_train_func(cuda_oom_bound=10000, max_runnable_bs=80) + bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000) + batch_size = 40 + + cuda_oom, max_memory_reserved = bs_search_algo._try_batch_size(batch_size) + + assert cuda_oom is False + assert max_memory_reserved == mock_train_func(batch_size) + self.mock_torch.cuda.reset_max_memory_cached.assert_called() + self.mock_torch.cuda.empty_cache.assert_called() + + def test_try_batch_size_cuda_oom(self): + mock_train_func = self.get_mock_train_func(cuda_oom_bound=100, max_runnable_bs=80) + bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000) + batch_size = 200 + + cuda_oom, _ = bs_search_algo._try_batch_size(batch_size) + + assert cuda_oom is True + self.mock_torch.cuda.reset_max_memory_cached.assert_called() + self.mock_torch.cuda.empty_cache.assert_called() + + def _prepare_dist_test(self, broadcast_val: torch.Tensor, gather_val: Optional[List[torch.Tensor]] = None): + self.mock_dist.is_initialized.return_value = True + + # mocking torch.distributed.broadcast + def mock_broadcast(tensor: torch.Tensor, src: int): + tensor.copy_(broadcast_val) + + self.mock_dist.broadcast.side_effect = mock_broadcast + + # mocking torch.distributed.gather if gather_val is given + def mock_gather(tensor: torch.Tensor, gather_list: Optional[List[torch.Tensor]] = None, dst: int = 0): + for i in range(len(gather_list)): + gather_list[i].copy_(gather_val[i]) + + if gather_val is not None: + self.mock_dist.gather.side_effect = mock_gather + + # revert some of torch function + def mock_tensor_cuda(self, *args, **kwargs): + return self + + torch.Tensor.cuda = mock_tensor_cuda + self.mock_torch.tensor = torch.tensor + self.mock_torch.int64 = torch.int64 + self.mock_torch.max = torch.max + self.mock_torch.any = torch.any + self.mock_torch.stack = torch.stack + self.mock_torch.empty = torch.empty + + def test_try_batch_size_distributed_not_rank_0(self): + self.mock_dist.get_rank.return_value = 1 + broadcasted_cuda_oom = False + broadcasted_max_memory_reserved = 4000 + self._prepare_dist_test( + broadcast_val=torch.tensor([broadcasted_cuda_oom, broadcasted_max_memory_reserved], dtype=torch.int64) + ) + mock_train_func = self.get_mock_train_func(cuda_oom_bound=10000, max_runnable_bs=80) + batch_size = 40 + bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000) + w1_max_memory_reserved = mock_train_func(batch_size) + + cuda_oom, max_memory_reserved = bs_search_algo._try_batch_size(batch_size) + + # check dist.gather is called and get [cuda_oom, maxmemory_reserved] as arguments. + self.mock_dist.gather.assert_called_once() + assert self.mock_dist.gather.call_args.args[0][0].item() == False + assert self.mock_dist.gather.call_args.args[0][1].item() == w1_max_memory_reserved + assert self.mock_dist.gather.call_args.kwargs["dst"] == 0 + # check dist.broadcast is called + self.mock_dist.broadcast.assert_called_once() + assert self.mock_dist.broadcast.call_args.kwargs["src"] == 0 + # check broadcased values are returned + assert cuda_oom is broadcasted_cuda_oom + assert max_memory_reserved == broadcasted_max_memory_reserved + + def test_try_batch_size_distributed_rank_0(self): + self.mock_dist.get_rank.return_value = 0 + self.mock_dist.get_world_size.return_value = 2 + self._prepare_dist_test( + broadcast_val=torch.tensor([True, 4000], dtype=torch.int64), + gather_val=[ + torch.tensor([False, 3000], dtype=torch.int64), + torch.tensor([True, 4000], dtype=torch.int64), + ], + ) + mock_train_func = self.get_mock_train_func(cuda_oom_bound=10000, max_runnable_bs=80) + batch_size = 40 + bs_search_algo = BsSearchAlgo(mock_train_func, 128, 1000) + w0_max_memory_reserved = mock_train_func(batch_size) + + cuda_oom, max_memory_reserved = bs_search_algo._try_batch_size(batch_size) + + # check dist.gather is called and get [cuda_oom, max_memory_reserved] as arguments. + self.mock_dist.gather.assert_called_once() + assert self.mock_dist.gather.call_args.args[0][0].item() == False + assert self.mock_dist.gather.call_args.args[0][1].item() == w0_max_memory_reserved + assert self.mock_dist.gather.call_args.kwargs["dst"] == 0 + # check if any process get cuda oom then set cuda_oom to True and + # set max_memory_reserved to maximum value of processes' + self.mock_dist.broadcast.assert_called_once() + self.mock_dist.broadcast.assert_called_once() + assert self.mock_dist.broadcast.call_args.kwargs["src"] == 0 + assert self.mock_dist.broadcast.call_args.args[0][0].item() == True + assert self.mock_dist.broadcast.call_args.args[0][1].item() == 4000 + # check proper values are returned + assert cuda_oom is True + assert max_memory_reserved == 4000 + def test_auto_decrease_batch_size(self): mock_train_func = self.get_mock_train_func(cuda_oom_bound=10000, max_runnable_bs=80) @@ -91,7 +207,7 @@ def mock_train_func(batch_size): mem_usage = 9000 else: mem_usage = 1000 - self.mock_torch.cuda.max_memory_allocated.return_value = mem_usage + self.mock_torch.cuda.max_memory_reserved.return_value = mem_usage return mem_usage bs_search_algo = BsSearchAlgo(mock_train_func, 64, 1000) @@ -108,7 +224,7 @@ def mock_train_func(batch_size): mem_usage = 9000 else: mem_usage = 1000 + batch_size / 1000 - self.mock_torch.cuda.max_memory_allocated.return_value = mem_usage + self.mock_torch.cuda.max_memory_reserved.return_value = mem_usage return mem_usage bs_search_algo = BsSearchAlgo(mock_train_func, 64, 1000) diff --git a/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py b/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py index c341d9d1b4e..9df0626976d 100644 --- a/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py +++ b/tests/unit/algorithms/detection/adapters/mmdet/test_configurer.py @@ -76,9 +76,7 @@ def test_configure(self, mocker): ) mock_cfg_ckpt.assert_called_once_with(model_cfg, "") mock_cfg_env.assert_called_once_with(model_cfg) - mock_cfg_data_pipeline.assert_called_once_with( - model_cfg, InputSizePreset.DEFAULT, "", train_dataset=self.det_dataset - ) + mock_cfg_data_pipeline.assert_called_once_with(model_cfg, None, "", train_dataset=self.det_dataset) mock_cfg_recipe.assert_called_once_with(model_cfg, train_dataset=self.det_dataset) mock_cfg_hook.assert_called_once_with(model_cfg) mock_cfg_model.assert_called_once_with(model_cfg, None, None, None, train_dataset=self.det_dataset) @@ -169,33 +167,31 @@ def test_configure_samples_per_gpu(self): @e2e_pytest_unit @pytest.mark.parametrize("input_size", [None, (0, 0), (256, 256)]) - def test_configure_input_size_not_yolox(self, mocker, input_size): + @pytest.mark.parametrize("training", [True, False]) + def test_configure_input_size_not_yolox(self, mocker, input_size, training): # prepare mock_cfg = mocker.MagicMock() mock_input_manager_cls = mocker.patch.object(configurer, "InputSizeManager") mock_input_manager = mock_input_manager_cls.return_value - mock_input_manager.get_configured_input_size.return_value = input_size + mock_input_manager.get_trained_input_size.return_value = (32, 32) mock_input_manager_cls.return_value = mock_input_manager mock_base_configurer_cls = mocker.patch.object(configurer, "BaseConfigurer") mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = (64, 64) # execute - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) + self.configurer.configure_input_size(mock_cfg, input_size, "ckpt/path", training=training) # check if input_size is None: mock_input_manager.set_input_size.assert_not_called() elif input_size == (0, 0): - mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + if training: + mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + else: + mock_input_manager.set_input_size.assert_called_once_with((32, 32)) else: mock_input_manager.set_input_size.assert_called_once_with(input_size) - if input_size == (0, 0): - mock_input_manager.set_input_size = mocker.MagicMock() - mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = None - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) - mock_input_manager.set_input_size.assert_not_called() - @e2e_pytest_unit @pytest.mark.parametrize("is_yolox_tiny", [True, False]) def test_configure_input_size_yolox(self, mocker, is_yolox_tiny): diff --git a/tests/unit/algorithms/segmentation/adapters/mmseg/test_mmseg_configurer.py b/tests/unit/algorithms/segmentation/adapters/mmseg/test_mmseg_configurer.py index 49c3b91d6e7..13a44846e15 100644 --- a/tests/unit/algorithms/segmentation/adapters/mmseg/test_mmseg_configurer.py +++ b/tests/unit/algorithms/segmentation/adapters/mmseg/test_mmseg_configurer.py @@ -67,7 +67,7 @@ def test_configure(self, mocker): mock_cfg_merge.assert_called_once_with(model_cfg, data_cfg, self.data_pipeline_path, None) mock_cfg_ckpt.assert_called_once_with(model_cfg, "") mock_cfg_env.assert_called_once_with(model_cfg) - mock_cfg_data_pipeline.assert_called_once_with(model_cfg, InputSizePreset.DEFAULT, "") + mock_cfg_data_pipeline.assert_called_once_with(model_cfg, None, "") mock_cfg_recipe.assert_called_once_with(model_cfg) mock_cfg_model.assert_called_once_with(model_cfg, None, None, None) mock_cfg_hook.assert_called_once_with(model_cfg) @@ -156,33 +156,31 @@ def test_configure_samples_per_gpu(self): @e2e_pytest_unit @pytest.mark.parametrize("input_size", [None, (0, 0), (256, 256)]) - def test_configure_input_size(self, mocker, input_size): + @pytest.mark.parametrize("training", [True, False]) + def test_configure_input_size(self, mocker, input_size, training): # prepare mock_cfg = mocker.MagicMock() mock_input_manager_cls = mocker.patch.object(configurer, "InputSizeManager") mock_input_manager = mock_input_manager_cls.return_value - mock_input_manager.get_configured_input_size.return_value = input_size + mock_input_manager.get_trained_input_size.return_value = (32, 32) mock_input_manager_cls.return_value = mock_input_manager mock_base_configurer_cls = mocker.patch.object(configurer, "BaseConfigurer") mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = (64, 64) # execute - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) + self.configurer.configure_input_size(mock_cfg, input_size, "ckpt/path", training=training) # check if input_size is None: mock_input_manager.set_input_size.assert_not_called() elif input_size == (0, 0): - mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + if training: + mock_input_manager.set_input_size.assert_called_once_with((64, 64)) + else: + mock_input_manager.set_input_size.assert_called_once_with((32, 32)) else: mock_input_manager.set_input_size.assert_called_once_with(input_size) - if input_size == (0, 0): - mock_input_manager.set_input_size = mocker.MagicMock() - mock_base_configurer_cls.adapt_input_size_to_dataset.return_value = None - self.configurer.configure_input_size(mock_cfg, InputSizePreset.DEFAULT, self.data_cfg) - mock_input_manager.set_input_size.assert_not_called() - @e2e_pytest_unit def test_configure_fp16(self): model_cfg = copy.deepcopy(self.model_cfg) diff --git a/tests/unit/algorithms/segmentation/adapters/test_otx_segmentation_task.py b/tests/unit/algorithms/segmentation/adapters/test_otx_segmentation_task.py index 6f94c5c04c7..00e0cecb391 100644 --- a/tests/unit/algorithms/segmentation/adapters/test_otx_segmentation_task.py +++ b/tests/unit/algorithms/segmentation/adapters/test_otx_segmentation_task.py @@ -80,6 +80,7 @@ def test_label_order(self, mocker): for i in range(20): fake_label.append(LabelEntity(name=f"class_{i}", domain=Domain.SEGMENTATION, id=ID(str(i)))) mock_environemnt.get_labels.return_value = fake_label + del mock_environemnt.get_hyper_parameters.return_value.learning_parameters.input_size # To avoid mocking error task = MMSegmentationTask(mock_environemnt) for i, label_entity in task._label_dictionary.items():