Add documentation about configurable input size (#3870)

* add docs about configurable input size * update api usecase and fix bug
openvinotoolkit · Aug 21, 2024 · b807c9d · b807c9d
1 parent cba5120
commit b807c9d
Show file tree

Hide file tree

Showing 5 changed files with 134 additions and 2 deletions.
diff --git a/docs/source/guide/explanation/additional_features/configurable_input_size.rst b/docs/source/guide/explanation/additional_features/configurable_input_size.rst
@@ -0,0 +1,116 @@
+Configurable Input Size
+=======================
+
+The Configurable Input Size feature allows users to adjust the input resolution of their deep learning models
+to balance between training and inference speed and model performance.
+This flexibility enables users to tailor the input size to their specific needs without manually altering
+the data pipeline configurations.
+
+To utilize this feature, simply specify the desired input size as an argument during the train command.
+Additionally, OTX ensures compatibility with model trained on non-default input sizes by automatically adjusting
+the data pipeline to match the input size during other engine entry points.
+
+Usage example:
+
+.. code-block::
+
+    $ otx train \
+        --config ... \
+
+.. tab-set::
+
+    .. tab-item:: API 1
+
+        .. code-block:: python
+
+            from otx.algo.detection.yolox import YOLOXS
+            from otx.core.data.module import OTXDataModule
+            from otx.engine import Engine
+
+            input_size = (512, 512)
+            model = YOLOXS(label_info=5, input_size=input_size)  # should be tuple[int, int]
+            datamodule = OTXDataModule(..., input_size=input_size)
+            engine = Engine(model=model, datamodule=datamodule)
+            engine.train()
+
+    .. tab-item:: API 2
+
+        .. code-block:: python
+
+            from otx.core.data.module import OTXDataModule
+            from otx.engine import Engine
+
+            datamodule = OTXDataModule(..., input_size=(512, 512))
+            engine = Engine(model="yolox_s", datamodule=datamodule)  # model input size will be aligned with the datamodule input size
+            engine.train()
+
+    .. tab-item:: CLI
+
+        .. code-block:: bash
+
+            (otx) ...$ otx train ... --data.input_size 512
+
+.. _adaptive-input-size:
+
+Adaptive Input Size
+-------------------
+
+The Adaptive Input Size feature intelligently determines an optimal input size for the model
+by analyzing the dataset's statistics.
+It operates in two distinct modes: "auto" and "downscale".
+In "auto" mode, the input size may increase or decrease based on the dataset's characteristics.
+In "downscale" mode, the input size will either decrease or remain unchanged, ensuring that the model training or inference speed deosn't drop.
+
+
+To activate this feature, use the following command with the desired mode:
+
+.. tab-set::
+
+    .. tab-item:: API
+
+        .. code-block:: python
+
+            from otx.algo.detection.yolox import YOLOXS
+            from otx.core.data.module import OTXDataModule
+            from otx.engine import Engine
+
+            datamodule = OTXDataModule(
+                ...
+                adaptive_input_size="auto",  # auto or downscale
+                input_size_multiplier=YOLOXS.input_size_multiplier, # should set the input_size_multiplier of the model
+            )
+            model = YOLOXS(label_info=5, input_size=datamodule.input_size)
+            engine = Engine(model=model, datamodule=datamodule)
+            engine.train()
+
+    .. tab-item:: CLI
+
+        .. code-block:: bash
+
+            (otx) ...$ otx train ... --data.adaptive_input_size "auto | downscale"
+
+The adaptive process includes the following steps:
+
+1. OTX computes robust statistics from the input dataset.
+
+2. The initial input size is set based on the typical large image size within the dataset.
+
+3. (Optional) The input size may be further refined to account for the sizes of objects present in the dataset.
+   The model's minimum recognizable object size, typically ranging from 16x16 to 32x32 pixels, serves as a reference to
+   proportionally adjust the input size relative to the average small object size observed in the dataset.
+   For instance, if objects are generally 64x64 pixels in a 512x512 image, the input size would be adjusted
+   to 256x256 to maintain detectability.
+
+   Adjustments are subject to the following constraints:
+
+   * If the recalculated input size exceeds the maximum image size determined in the previous step, it will be capped at that maximum size.
+   * If the recalculated input size falls below the minimum threshold defined by MIN_DETECTION_INPUT_SIZE, the input size will be scaled up. This is done by increasing the smaller dimension (width or height) to MIN_DETECTION_INPUT_SIZE while maintaining the aspect ratio, ensuring that the model's minimum criteria for object detection are met.
+
+4. (downscale only) Any scale-up beyond the default model input size is restricted.
+
+
+.. Note::
+    Opting for a smaller input size can be advantageous for datasets with lower-resolution images or larger objects,
+    as it may improve speed with minimal impact on model accuracy. However, it is important to consider that selecting
+    a smaller input size could affect model performance depending on the task, model architecture, and dataset
+    properties.
diff --git a/docs/source/guide/explanation/additional_features/hpo.rst b/docs/source/guide/explanation/additional_features/hpo.rst
@@ -143,10 +143,16 @@ Here is explanation of all HPO configuration.
 
 - **mode** (*str*, *default='max'*) - Optimization mode for the metric. It determines whether the metric should be maximized or minimized. The possible values are 'max' and 'min', respectively.
 
-- **num_workers** (*int*, *default=1*) How many trials will be executed in parallel.
+- **num_trials** (*int*, *default=None*) The number of training trials to perform during HPO. If not provided, the number of trials will be determined based on the expected time ratio. Defaults to None.
+
+- **num_workers** (*int*, *default=None*) The number of trials that will be run concurrently.
 
 - **expected_time_ratio** (*int*, *default=4*) How many times to use for HPO compared to training time.
 
+- **metric_name** (*str*, *default=None*) The name of the performance metric to be optimized during HPO. If not specified, the metric will be selected based on the configured callbacks. Defaults to None.
+
+- **adapt_bs_search_space_max_val** (*Literal["None", "Safe", "Full"]*, *default="None"*) Whether to execute `Auto-adapt batch size` prior to HPO. This step finds the maximum batch size value, which then serves as the upper limit for the batch size search space during HPO. For further information on `Auto-adapt batch size`, please refer to the `Auto-configuration` documentation. Defaults to "None".
+
 - **maximum_resource** (*int*, *default=None*) - Maximum number of training epochs for each trial. When the training epochs reaches this value, the trial stop to train.
 
 - **minimum_resource** (*int*, *default=None*) - Minimum number of training epochs for each trial. Each trial will run at least this epochs, even if the performance of the model is not improving.

diff --git a/docs/source/guide/explanation/additional_features/index.rst b/docs/source/guide/explanation/additional_features/index.rst
@@ -14,3 +14,4 @@ Additional Features
    fast_data_loading
    tiling
    class_incremental_sampler
+   configurable_input_size
diff --git a/src/otx/engine/engine.py b/src/otx/engine/engine.py
@@ -143,7 +143,8 @@ def __init__(
         get_model_args: dict[str, Any] = {}
         if self._datamodule is not None:
             get_model_args["label_info"] = self._datamodule.label_info
-            get_model_args["input_size"] = self._datamodule.input_size
+            if (input_size := self._datamodule.input_size) is not None:
+                get_model_args["input_size"] = (input_size, input_size) if isinstance(input_size, int) else input_size
         self._model: OTXModel = (
             model if isinstance(model, OTXModel) else self._auto_configurator.get_model(**get_model_args)
         )

diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py
@@ -72,6 +72,14 @@ def test_model_init(self, tmp_path, mock_datamodule):
         assert engine._model.input_size == (1234, 1234)
         assert engine._model.label_info.num_classes == 4321
 
+    def test_model_init_datamodule_ipt_size_int(self, tmp_path, mock_datamodule):
+        mock_datamodule.input_size = 1234
+        data_root = "tests/assets/classification_dataset"
+        engine = Engine(work_dir=tmp_path, data_root=data_root)
+
+        assert engine._model.input_size == (1234, 1234)
+        assert engine._model.label_info.num_classes == 4321
+
     def test_model_setter(self, fxt_engine, mocker) -> None:
         assert isinstance(fxt_engine.model, TVModelForMulticlassCls)
         fxt_engine.model = "efficientnet_b0"