From 17eb950065614543fa9606b0c2d1ecaad8fc03ec Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 10:26:12 +0900 Subject: [PATCH 1/6] handle iter runner case --- .../adapters/mmcv/utils/automatic_bs.py | 25 ++++---- .../adapters/mmcv/utils/test_automatic_bs.py | 63 +++++++------------ 2 files changed, 37 insertions(+), 51 deletions(-) diff --git a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py index 153a4277899..5fe0c1c9d3c 100644 --- a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py +++ b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py @@ -56,17 +56,9 @@ def adapt_batch_size(train_func: Callable, cfg, datasets: List, validate: bool = def train_func_single_iter(batch_size): copied_cfg = deepcopy(cfg) _set_batch_size(copied_cfg, batch_size) + _set_max_epoch(copied_cfg, 1) # setup for training a single iter to reduce time - # setup for training a single iter to reduce time - if copied_cfg.runner.get("type") == "AccuracyAwareRunner": # nncf case - if "nncf_config" in copied_cfg.runner: - _set_value_at_dict_in_dict( - copied_cfg.runner["nncf_config"], "accuracy_aware_training.params.maximal_total_epochs", 1 - ) - else: - copied_cfg.runner["max_epochs"] = 1 - - # Remove some hooks due to reasons below + # Remove hooks due to reasons below # OTXProgressHook => prevent progress bar from being 0 and 100 repeatably # earlystoppinghook => if eval hook is excluded, this hook makes an error due to absence of score history # CustomEvalHook => exclude validation in classification task @@ -115,7 +107,7 @@ def train_func_single_iter(batch_size): logger.info(f"Batch size is adapted : {default_bs} -> {new_batch_size}") logger.info(f"learning rate is adapted : {origin_lr} -> {cfg.optimizer.lr}") else: - logger.info("Adapting batch size is done. Current batch size is availble.") + logger.info("Adapting batch size is done. Batch size isn't changed.") def _get_batch_size(cfg) -> int: @@ -130,6 +122,17 @@ def _set_batch_size(cfg, batch_size: int): else: cfg.data.train_dataloader["samples_per_gpu"] = batch_size +def _set_max_epoch(cfg, max_epoch: int): + if cfg.runner.get("type") == "AccuracyAwareRunner": # nncf case + if "nncf_config" in cfg.runner: + _set_value_at_dict_in_dict( + cfg.runner["nncf_config"], "accuracy_aware_training.params.maximal_total_epochs", max_epoch + ) + elif "iterbased" in cfg.runner["type"].lower(): + cfg.runner["max_iters"] = max_epoch + else: + cfg.runner["max_epochs"] = max_epoch + class SubDataset: """Wrapper class to make dataset pretend to have specified number of images. diff --git a/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py b/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py index 552cc4a76c5..e1f49542402 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py +++ b/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py @@ -44,14 +44,12 @@ def common_cfg(mocker): return mock_cfg -@pytest.fixture -def mock_cfg_not_action(common_cfg): +def set_mock_cfg_not_action(common_cfg): common_cfg.data.train_dataloader = {"samples_per_gpu": DEFAULT_BS} return common_cfg -@pytest.fixture -def mock_cfg_action(common_cfg): +def set_mock_cfg_action(common_cfg): common_cfg.data.videos_per_gpu = DEFAULT_BS common_cfg.domain = "ACTION_CLASSIFICATION" return common_cfg @@ -65,19 +63,34 @@ def mock_dataset(mocker): @pytest.mark.parametrize("not_increase", [True, False]) -def test_adapt_batch_size_not_action_task(mocker, mock_adapt_algo_cls, mock_cfg_not_action, mock_dataset, not_increase): +@pytest.mark.parametrize("is_action_task", [True, False]) +@pytest.mark.parametrize("is_iter_based_runner", [True, False]) +def test_adapt_batch_size(mocker, mock_adapt_algo_cls, common_cfg, mock_dataset, not_increase, is_action_task, is_iter_based_runner): # prepare mock_train_func = mocker.MagicMock() new_bs = DEFAULT_BS // 2 if not_increase else DEFAULT_BS + 2 + max_eph_name = "max_epochs" + if is_iter_based_runner: + common_cfg.runner = {'type': 'IterBasedRunnerWithCancel', 'max_iters': 100} + max_eph_name = "max_iters" + + if is_action_task: + mock_config = set_mock_cfg_action(common_cfg) + else: + mock_config = set_mock_cfg_not_action(common_cfg) + # execute - adapt_batch_size(mock_train_func, mock_cfg_not_action, mock_dataset, False, not_increase) + adapt_batch_size(mock_train_func, mock_config, mock_dataset, False, not_increase) # check adapted batch size is applied - assert mock_cfg_not_action.data.train_dataloader["samples_per_gpu"] == new_bs + if is_action_task: + assert mock_config.data.videos_per_gpu == new_bs + else: + assert mock_config.data.train_dataloader["samples_per_gpu"] == new_bs # check leanring rate is updated depending on adapted batch size bs_change_ratio = new_bs / DEFAULT_BS - assert mock_cfg_not_action.optimizer.lr == pytest.approx(DEFAULT_LR * sqrt(bs_change_ratio)) + assert mock_config.optimizer.lr == pytest.approx(DEFAULT_LR * sqrt(bs_change_ratio)) # check adapt function gets proper arguments assert mock_adapt_algo_cls.call_args.kwargs["default_bs"] == DEFAULT_BS assert mock_adapt_algo_cls.call_args.kwargs["max_bs"] == TRAINSET_SIZE @@ -85,8 +98,8 @@ def test_adapt_batch_size_not_action_task(mocker, mock_adapt_algo_cls, mock_cfg_ assert len(mock_train_func.call_args_list[0].kwargs["dataset"][0]) == DEFAULT_BS assert len(mock_train_func.call_args_list[1].kwargs["dataset"][0]) == new_bs # check max epoch is set as 1 to reduce time - assert mock_train_func.call_args_list[0].kwargs["cfg"].runner["max_epochs"] == 1 - assert mock_train_func.call_args_list[1].kwargs["cfg"].runner["max_epochs"] == 1 + assert mock_train_func.call_args_list[0].kwargs["cfg"].runner[max_eph_name] == 1 + assert mock_train_func.call_args_list[1].kwargs["cfg"].runner[max_eph_name] == 1 # check eval before run is disabled to reduce time assert not mock_train_func.call_args_list[0].kwargs["cfg"].custom_hooks[0]["enable_eval_before_run"] assert not mock_train_func.call_args_list[1].kwargs["cfg"].custom_hooks[0]["enable_eval_before_run"] @@ -94,36 +107,6 @@ def test_adapt_batch_size_not_action_task(mocker, mock_adapt_algo_cls, mock_cfg_ assert len(mock_train_func.call_args_list[0].kwargs["cfg"].custom_hooks) == 1 -@pytest.mark.parametrize("not_increase", [True, False]) -def test_adapt_batch_size_action_task(mocker, mock_adapt_algo_cls, mock_cfg_action, mock_dataset, not_increase): - # prepare - mock_train_func = mocker.MagicMock() - new_bs = DEFAULT_BS // 2 if not_increase else DEFAULT_BS + 2 - - # execute - adapt_batch_size(mock_train_func, mock_cfg_action, mock_dataset, True, not_increase) - - # check adapted batch size is applied - assert mock_cfg_action.data.videos_per_gpu == new_bs - # check leanring rate is updated depending on adapted batch size - bs_change_ratio = new_bs / DEFAULT_BS - assert mock_cfg_action.optimizer.lr == pytest.approx(DEFAULT_LR * sqrt(bs_change_ratio)) - # check adapt function gets proper arguments - assert mock_adapt_algo_cls.call_args.kwargs["default_bs"] == DEFAULT_BS - assert mock_adapt_algo_cls.call_args.kwargs["max_bs"] == TRAINSET_SIZE - # check length of dataset is decreased to reduce time - assert len(mock_train_func.call_args_list[0].kwargs["dataset"][0]) == DEFAULT_BS - assert len(mock_train_func.call_args_list[1].kwargs["dataset"][0]) == new_bs - # check max epoch is set as 1 to reduce time - assert mock_train_func.call_args_list[0].kwargs["cfg"].runner["max_epochs"] == 1 - assert mock_train_func.call_args_list[1].kwargs["cfg"].runner["max_epochs"] == 1 - # check eval before run is enabled if validate is set as True - assert mock_train_func.call_args_list[0].kwargs["cfg"].custom_hooks[0]["enable_eval_before_run"] - assert mock_train_func.call_args_list[1].kwargs["cfg"].custom_hooks[0]["enable_eval_before_run"] - # check OTXProgressHook is removed - assert len(mock_train_func.call_args_list[0].kwargs["cfg"].custom_hooks) == 1 - - class TestSubDataset: @pytest.fixture(autouse=True) def set_up(self, mocker): From 0fbf176b2217fc499fcc178961bff3b87d5137de Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 10:27:06 +0900 Subject: [PATCH 2/6] align with pre commit --- otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py | 1 + .../common/adapters/mmcv/utils/test_automatic_bs.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py index 5fe0c1c9d3c..81a27f81a6a 100644 --- a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py +++ b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py @@ -122,6 +122,7 @@ def _set_batch_size(cfg, batch_size: int): else: cfg.data.train_dataloader["samples_per_gpu"] = batch_size + def _set_max_epoch(cfg, max_epoch: int): if cfg.runner.get("type") == "AccuracyAwareRunner": # nncf case if "nncf_config" in cfg.runner: diff --git a/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py b/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py index e1f49542402..a7353d97cee 100644 --- a/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py +++ b/tests/unit/algorithms/common/adapters/mmcv/utils/test_automatic_bs.py @@ -65,14 +65,16 @@ def mock_dataset(mocker): @pytest.mark.parametrize("not_increase", [True, False]) @pytest.mark.parametrize("is_action_task", [True, False]) @pytest.mark.parametrize("is_iter_based_runner", [True, False]) -def test_adapt_batch_size(mocker, mock_adapt_algo_cls, common_cfg, mock_dataset, not_increase, is_action_task, is_iter_based_runner): +def test_adapt_batch_size( + mocker, mock_adapt_algo_cls, common_cfg, mock_dataset, not_increase, is_action_task, is_iter_based_runner +): # prepare mock_train_func = mocker.MagicMock() new_bs = DEFAULT_BS // 2 if not_increase else DEFAULT_BS + 2 max_eph_name = "max_epochs" if is_iter_based_runner: - common_cfg.runner = {'type': 'IterBasedRunnerWithCancel', 'max_iters': 100} + common_cfg.runner = {"type": "IterBasedRunnerWithCancel", "max_iters": 100} max_eph_name = "max_iters" if is_action_task: From fee855eddb7ab3e72f8dfd553066f4e05d50478e Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 10:30:38 +0900 Subject: [PATCH 3/6] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f87748dcb61..d4cd542c74d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,7 @@ All notable changes to this project will be documented in this file. ### Bug fixes - Fix backward compatibility with OpenVINO SSD-like detection models from OTE 0.5 () +- Fix the bug that auto adapt batch size is unavailable with IterBasedRunner () ### Known issues From f5ef0848f95d40d416f5ffe888beabd6c35f7493 Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 15:59:18 +0900 Subject: [PATCH 4/6] deal with no runner type case --- otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py index 81a27f81a6a..cce7159e694 100644 --- a/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py +++ b/otx/algorithms/common/adapters/mmcv/utils/automatic_bs.py @@ -129,10 +129,12 @@ def _set_max_epoch(cfg, max_epoch: int): _set_value_at_dict_in_dict( cfg.runner["nncf_config"], "accuracy_aware_training.params.maximal_total_epochs", max_epoch ) - elif "iterbased" in cfg.runner["type"].lower(): - cfg.runner["max_iters"] = max_epoch else: - cfg.runner["max_epochs"] = max_epoch + runner_type = cfg.runner.get("type") + if runner_type is not None and "iterbased" in runner_type.lower(): + cfg.runner["max_iters"] = max_epoch + else: + cfg.runner["max_epochs"] = max_epoch class SubDataset: From 8f2400fcc8c98aa4f3626c5ce90a25fc4da7cff5 Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 16:04:18 +0900 Subject: [PATCH 5/6] change CHANGELOG --- CHANGELOG.md | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4cd542c74d..4295a66d4a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,19 @@ All notable changes to this project will be documented in this file. +## \[v1.4.0\] + +### New features + +### Enhancements + +### Bug fixes + +- Fix the bug that auto adapt batch size is unavailable with IterBasedRunner () + +### Known issues + + ## \[v1.3.0\] ### New features @@ -72,7 +85,6 @@ All notable changes to this project will be documented in this file. ### Bug fixes - Fix backward compatibility with OpenVINO SSD-like detection models from OTE 0.5 () -- Fix the bug that auto adapt batch size is unavailable with IterBasedRunner () ### Known issues From a3125db3756b7219beff4725bb99b9ddefa44d85 Mon Sep 17 00:00:00 2001 From: eunwoosh Date: Wed, 24 May 2023 16:25:21 +0900 Subject: [PATCH 6/6] align with prettier --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4295a66d4a5..f7d94b0a95e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,6 @@ All notable changes to this project will be documented in this file. ### Known issues - ## \[v1.3.0\] ### New features