diff --git a/anomalib/config/__init__.py b/anomalib/config/__init__.py index 2becba50cf..e2607d88f9 100644 --- a/anomalib/config/__init__.py +++ b/anomalib/config/__init__.py @@ -14,6 +14,10 @@ # See the License for the specific language governing permissions # and limitations under the License. -from .config import get_configurable_parameters, update_nncf_config +from .config import ( + get_configurable_parameters, + update_input_size_config, + update_nncf_config, +) -__all__ = ["get_configurable_parameters", "update_nncf_config"] +__all__ = ["get_configurable_parameters", "update_nncf_config", "update_input_size_config"] diff --git a/anomalib/utils/callbacks/model_loader.py b/anomalib/utils/callbacks/model_loader.py index c74663ddc1..6c4fa0f278 100644 --- a/anomalib/utils/callbacks/model_loader.py +++ b/anomalib/utils/callbacks/model_loader.py @@ -1,4 +1,19 @@ """Callback that loads model weights from the state dict.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + import torch from pytorch_lightning import Callback, LightningModule diff --git a/anomalib/utils/callbacks/nncf_callback.py b/anomalib/utils/callbacks/nncf_callback.py index 8d0013fce8..22486f83d5 100644 --- a/anomalib/utils/callbacks/nncf_callback.py +++ b/anomalib/utils/callbacks/nncf_callback.py @@ -1,5 +1,19 @@ """NNCF Callback.""" +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + import os from typing import Any, Dict, Iterator, Optional, Tuple, Union diff --git a/anomalib/utils/callbacks/save_to_csv.py b/anomalib/utils/callbacks/save_to_csv.py index 327788eb1d..0fc2f26034 100644 --- a/anomalib/utils/callbacks/save_to_csv.py +++ b/anomalib/utils/callbacks/save_to_csv.py @@ -1,4 +1,19 @@ """Callback to save metrics to CSV.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + from pathlib import Path import numpy as np diff --git a/anomalib/utils/hpo/config.py b/anomalib/utils/hpo/config.py deleted file mode 100644 index 9edcc944e5..0000000000 --- a/anomalib/utils/hpo/config.py +++ /dev/null @@ -1,53 +0,0 @@ -"""Utils to update configuration files.""" - -# Copyright (C) 2020 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions -# and limitations under the License. - -from typing import List - -from omegaconf import DictConfig - - -def flatten_sweep_params(params_dict: DictConfig) -> DictConfig: - """Flatten the nested parameters section of the config object. - - Args: - params_dict: DictConfig: The dictionary containing the hpo parameters in the original, nested, structure. - - Returns: - flattened version of the parameter dictionary. - """ - - def process_params(nested_params: DictConfig, keys: List[str], flattened_params: DictConfig): - """Flatten nested dictionary. - - Recursive helper function that traverses the nested config object and stores the leaf nodes in a flattened - dictionary. - - Args: - nested_params: DictConfig: config object containing the original parameters. - keys: List[str]: list of keys leading to the current location in the config. - flattened_params: DictConfig: Dictionary in which the flattened parameters are stored. - """ - for name, cfg in nested_params.items(): - if isinstance(cfg, DictConfig): - process_params(cfg, keys + [str(name)], flattened_params) - else: - key = ".".join(keys + [str(name)]) - flattened_params[key] = cfg - - flattened_params_dict = DictConfig({}) - process_params(params_dict, [], flattened_params_dict) - - return flattened_params_dict diff --git a/anomalib/utils/sweep/__init__.py b/anomalib/utils/sweep/__init__.py new file mode 100644 index 0000000000..d657879a70 --- /dev/null +++ b/anomalib/utils/sweep/__init__.py @@ -0,0 +1,32 @@ +"""Utils for Benchmarking and Sweep.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +from .config import get_run_config, set_in_nested_config +from .helpers import ( + get_meta_data, + get_openvino_throughput, + get_sweep_callbacks, + get_torch_throughput, +) + +__all__ = [ + "get_run_config", + "set_in_nested_config", + "get_sweep_callbacks", + "get_meta_data", + "get_openvino_throughput", + "get_torch_throughput", +] diff --git a/anomalib/utils/sweep/config.py b/anomalib/utils/sweep/config.py new file mode 100644 index 0000000000..96238f44c3 --- /dev/null +++ b/anomalib/utils/sweep/config.py @@ -0,0 +1,144 @@ +"""Utilities for modifying the configuration.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +import itertools +import operator +from functools import reduce +from typing import Any, Generator, List + +from omegaconf import DictConfig + + +def flatten_sweep_params(params_dict: DictConfig) -> DictConfig: + """Flatten the nested parameters section of the config object. + + We need to flatten the params so that all the nested keys are concatenated into a single string. + This is useful when + - We need to do a cartesian product of all the combinations of the configuration for grid search. + - Save keys as headers for csv + - Add the config to `wandb` sweep. + + Args: + params_dict: DictConfig: The dictionary containing the hpo parameters in the original, nested, structure. + + Returns: + flattened version of the parameter dictionary. + """ + + def flatten_nested_dict(nested_params: DictConfig, keys: List[str], flattened_params: DictConfig): + """Flatten nested dictionary. + + Recursive helper function that traverses the nested config object and stores the leaf nodes in a flattened + dictionary. + + Args: + nested_params: DictConfig: config object containing the original parameters. + keys: List[str]: list of keys leading to the current location in the config. + flattened_params: DictConfig: Dictionary in which the flattened parameters are stored. + """ + for name, cfg in nested_params.items(): + if isinstance(cfg, DictConfig): + flatten_nested_dict(cfg, keys + [str(name)], flattened_params) + else: + key = ".".join(keys + [str(name)]) + flattened_params[key] = cfg + + flattened_params_dict = DictConfig({}) + flatten_nested_dict(params_dict, [], flattened_params_dict) + + return flattened_params_dict + + +def get_run_config(params_dict: DictConfig) -> Generator[DictConfig, None, None]: + """Yields configuration for a single run. + + Args: + params_dict (DictConfig): Configuration for grid search. + + Example: + >>> dummy_config = DictConfig({ + "parent1":{ + "child1": ['a', 'b', 'c'], + "child2": [1, 2, 3] + }, + "parent2":['model1', 'model2'] + }) + >>> for run_config in get_run_config(dummy_config): + >>> print(run_config) + {'parent1.child1': 'a', 'parent1.child2': 1, 'parent2': 'model1'} + {'parent1.child1': 'a', 'parent1.child2': 1, 'parent2': 'model2'} + {'parent1.child1': 'a', 'parent1.child2': 2, 'parent2': 'model1'} + ... + + Yields: + Generator[DictConfig]: Dictionary containing flattened keys + and values for current run. + """ + params = flatten_sweep_params(params_dict) + combinations = list(itertools.product(*params.values())) + keys = params.keys() + for combination in combinations: + run_config = DictConfig({}) + for key, val in zip(keys, combination): + run_config[key] = val + yield run_config + + +def get_from_nested_config(config: DictConfig, keymap: List) -> Any: + """Retrieves an item from a nested config object using a list of keys. + + Args: + config: DictConfig: nested DictConfig object + keymap: List[str]: list of keys corresponding to item that should be retrieved. + """ + return reduce(operator.getitem, keymap, config) + + +def set_in_nested_config(config: DictConfig, keymap: List, value: Any): + """Set an item in a nested config object using a list of keys. + + Args: + config: DictConfig: nested DictConfig object + keymap: List[str]: list of keys corresponding to item that should be set. + value: Any: Value that should be assigned to the dictionary item at the specified location. + + Example: + >>> dummy_config = DictConfig({ + "parent1":{ + "child1": ['a', 'b', 'c'], + "child2": [1, 2, 3] + }, + "parent2":['model1', 'model2'] + }) + >>> model_config = DictConfig({ + "parent1":{ + "child1": 'e', + "child2": 4, + }, + "parent3": False + }) + >>> for run_config in get_run_config(dummy_config): + >>> print("Original model config", model_config) + >>> print("Suggested config", run_config) + >>> for param in run_config.keys(): + >>> set_in_nested_config(model_config, param.split('.'), run_config[param]) + >>> print("Replaced model config", model_config) + >>> break + Original model config {'parent1': {'child1': 'e', 'child2': 4}, 'parent3': False} + Suggested config {'parent1.child1': 'a', 'parent1.child2': 1, 'parent2': 'model1'} + Replaced model config {'parent1': {'child1': 'a', 'child2': 1}, 'parent3': False, 'parent2': 'model1'} + """ + get_from_nested_config(config, keymap[:-1])[keymap[-1]] = value diff --git a/anomalib/utils/sweep/helpers/__init__.py b/anomalib/utils/sweep/helpers/__init__.py new file mode 100644 index 0000000000..05f42fd513 --- /dev/null +++ b/anomalib/utils/sweep/helpers/__init__.py @@ -0,0 +1,20 @@ +"""Helpers for benchmarking and hyperparameter optimization.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +from .callbacks import get_sweep_callbacks +from .inference import get_meta_data, get_openvino_throughput, get_torch_throughput + +__all__ = ["get_meta_data", "get_openvino_throughput", "get_torch_throughput", "get_sweep_callbacks"] diff --git a/anomalib/utils/sweep/helpers/callbacks.py b/anomalib/utils/sweep/helpers/callbacks.py new file mode 100644 index 0000000000..e09267c91e --- /dev/null +++ b/anomalib/utils/sweep/helpers/callbacks.py @@ -0,0 +1,36 @@ +"""Get callbacks related to sweep.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + + +from typing import List + +from pytorch_lightning import Callback + +from anomalib.utils.callbacks.timer import TimerCallback + + +def get_sweep_callbacks() -> List[Callback]: + """Gets callbacks relevant to sweep. + + Args: + config (Union[DictConfig, ListConfig]): Model config loaded from anomalib + + Returns: + List[Callback]: List of callbacks + """ + callbacks: List[Callback] = [TimerCallback()] + + return callbacks diff --git a/anomalib/utils/sweep/helpers/inference.py b/anomalib/utils/sweep/helpers/inference.py new file mode 100644 index 0000000000..11de7a6000 --- /dev/null +++ b/anomalib/utils/sweep/helpers/inference.py @@ -0,0 +1,149 @@ +"""Utils to help compute inference statistics.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +import time +from pathlib import Path +from typing import Dict, Iterable, List, Tuple, Union + +import numpy as np +from omegaconf import DictConfig, ListConfig +from torch.utils.data import DataLoader + +from anomalib.deploy import OpenVINOInferencer, TorchInferencer +from anomalib.models.components import AnomalyModule + + +class MockImageLoader: + """Create mock images for inference on CPU based on the specifics of the original torch test dataset. + + Uses yield so as to avoid storing everything in the memory. + + Args: + image_size (List[int]): Size of input image + total_count (int): Total images in the test dataset + """ + + def __init__(self, image_size: List[int], total_count: int): + self.total_count = total_count + self.image_size = image_size + self.image = np.ones((*self.image_size, 3)).astype(np.uint8) + + def __len__(self): + """Get total count of images.""" + return self.total_count + + def __call__(self) -> Iterable[np.ndarray]: + """Yield batch of generated images. + + Args: + idx (int): Unused + """ + for _ in range(self.total_count): + yield self.image + + +def get_meta_data(model: AnomalyModule, input_size: Tuple[int, int]) -> Dict: + """Get meta data for inference. + + Args: + model (AnomalyModule): Trained model from which the metadata is extracted. + input_size (Tuple[int, int]): Input size used to resize the pixel level mean and std. + + Returns: + (Dict): Metadata as dictionary. + """ + meta_data = { + "image_threshold": model.image_threshold.value.cpu().numpy(), + "pixel_threshold": model.pixel_threshold.value.cpu().numpy(), + "min": model.min_max.min.cpu().numpy(), + "max": model.min_max.max.cpu().numpy(), + "stats": {}, + } + + image_mean = model.training_distribution.image_mean.cpu().numpy() + if image_mean.size > 0: + meta_data["stats"]["image_mean"] = image_mean + + image_std = model.training_distribution.image_std.cpu().numpy() + if image_std.size > 0: + meta_data["stats"]["image_std"] = image_std + + pixel_mean = model.training_distribution.pixel_mean.cpu().numpy() + if pixel_mean.size > 0: + meta_data["stats"]["pixel_mean"] = pixel_mean.reshape(input_size) + + pixel_std = model.training_distribution.pixel_std.cpu().numpy() + if pixel_std.size > 0: + meta_data["stats"]["pixel_std"] = pixel_std.reshape(input_size) + + return meta_data + + +def get_torch_throughput( + config: Union[DictConfig, ListConfig], model: AnomalyModule, test_dataset: DataLoader, meta_data: Dict +) -> float: + """Tests the model on dummy data. Images are passed sequentially to make the comparision with OpenVINO model fair. + + Args: + config (Union[DictConfig, ListConfig]): Model config. + model (Path): Model on which inference is called. + test_dataset (DataLoader): The test dataset used as a reference for the mock dataset. + meta_data (Dict): Metadata used for normalization. + + Returns: + float: Inference throughput + """ + model.eval() + inferencer = TorchInferencer(config, model) + torch_dataloader = MockImageLoader(config.dataset.image_size, len(test_dataset)) + start_time = time.time() + # Since we don't care about performance metrics and just the throughput, use mock data. + for image in torch_dataloader(): + inferencer.predict(image, superimpose=False, meta_data=meta_data) + + # get throughput + inference_time = time.time() - start_time + throughput = len(test_dataset) / inference_time + + return throughput + + +def get_openvino_throughput( + config: Union[DictConfig, ListConfig], model_path: Path, test_dataset: DataLoader, meta_data: Dict +) -> float: + """Runs the generated OpenVINO model on a dummy dataset to get throughput. + + Args: + config (Union[DictConfig, ListConfig]): Model config. + model_path (Path): Path to folder containing the OpenVINO models. It then searches `model.xml` in the folder. + test_dataset (DataLoader): The test dataset used as a reference for the mock dataset. + meta_data (Dict): Metadata used for normalization. + + Returns: + float: Inference throughput + """ + inferencer = OpenVINOInferencer(config, model_path / "model.xml") + openvino_dataloader = MockImageLoader(config.dataset.image_size, total_count=len(test_dataset)) + start_time = time.time() + # Create test images on CPU. Since we don't care about performance metrics and just the throughput, use mock data. + for image in openvino_dataloader(): + inferencer.predict(image, superimpose=False, meta_data=meta_data) + + # get throughput + inference_time = time.time() - start_time + throughput = len(test_dataset) / inference_time + + return throughput diff --git a/tests/nightly/models/test_model_nightly.py b/tests/nightly/models/test_model_nightly.py index 4151375194..f3caebdde9 100644 --- a/tests/nightly/models/test_model_nightly.py +++ b/tests/nightly/models/test_model_nightly.py @@ -25,7 +25,7 @@ from omegaconf import DictConfig, ListConfig, OmegaConf from pytorch_lightning import seed_everything -from anomalib.utils.hpo.config import flatten_sweep_params +from anomalib.utils.sweep.config import flatten_sweep_params from tests.helpers.dataset import get_dataset_path from tests.helpers.model import model_load_test, setup_model_train diff --git a/tests/pre_merge/models/test_model_premerge.py b/tests/pre_merge/models/test_model_premerge.py index dd55c23f8e..5af1f12510 100644 --- a/tests/pre_merge/models/test_model_premerge.py +++ b/tests/pre_merge/models/test_model_premerge.py @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions # and limitations under the License. -import os import tempfile import pytest diff --git a/tests/pre_merge/pre_processing/__init__.py b/tests/pre_merge/pre_processing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/pre_merge/pre_processing/test_tiler.py b/tests/pre_merge/pre_processing/test_tiler.py new file mode 100644 index 0000000000..4555ce1490 --- /dev/null +++ b/tests/pre_merge/pre_processing/test_tiler.py @@ -0,0 +1,135 @@ +"""Image Tiling Tests.""" + +import pytest +import torch +from omegaconf import ListConfig + +from anomalib.pre_processing.tiler import StrideSizeError, Tiler + +tile_data = [ + ([3, 1024, 1024], 512, 512, torch.Size([4, 3, 512, 512]), False), + ([1, 3, 1024, 1024], 512, 512, torch.Size([4, 3, 512, 512]), False), + ([3, 1024, 1024], 512, 512, torch.Size([4, 3, 512, 512]), True), + ([1, 3, 1024, 1024], 512, 512, torch.Size([4, 3, 512, 512]), True), +] + +untile_data = [ + ([3, 1024, 1024], 512, 256, torch.Size([4, 3, 512, 512])), + ([1, 3, 1024, 1024], 512, 512, torch.Size([4, 3, 512, 512])), +] + +overlapping_data = [ + ( + torch.Size([1, 3, 1024, 1024]), + 512, + 256, + torch.Size([16, 3, 512, 512]), + "padding", + ), + ( + torch.Size([1, 3, 1024, 1024]), + 512, + 256, + torch.Size([16, 3, 512, 512]), + "interpolation", + ), +] + + +@pytest.mark.parametrize( + "tile_size, stride", + [(512, 256), ([512, 512], [256, 256]), (ListConfig([512, 512]), 256)], +) +def test_size_types_should_be_int_tuple_or_list_config(tile_size, stride): + """Size type could only be integer, tuple or ListConfig type.""" + tiler = Tiler(tile_size=tile_size, stride=stride) + assert isinstance(tiler.tile_size_h, int) + assert isinstance(tiler.stride_w, int) + + +@pytest.mark.parametrize("image_size, tile_size, stride, shape, use_random_tiling", tile_data) +def test_tiler_handles_single_image_without_batch_dimension(image_size, tile_size, stride, shape, use_random_tiling): + """Tiler should add batch dimension if image is 3D (CxHxW).""" + tiler = Tiler(tile_size=tile_size, stride=stride) + image = torch.rand(image_size) + patches = tiler.tile(image, use_random_tiling=use_random_tiling) + assert patches.shape == shape + + +def test_stride_size_cannot_be_larger_than_tile_size(): + """Larger stride size than tile size is not desired, and causes issues.""" + kernel_size = (128, 128) + stride = 256 + with pytest.raises(StrideSizeError): + tiler = Tiler(tile_size=kernel_size, stride=stride) + + +def test_tile_size_cannot_be_larger_than_image_size(): + """Larger tile size than image size is not desired, and causes issues.""" + with pytest.raises(ValueError): + tiler = Tiler(tile_size=1024, stride=512) + image = torch.rand(1, 3, 512, 512) + tiler.tile(image) + + +@pytest.mark.parametrize("tile_size, kernel_size, stride, image_size", untile_data) +def test_untile_non_overlapping_patches(tile_size, kernel_size, stride, image_size): + """Non-Overlapping Tiling/Untiling should return the same image size.""" + tiler = Tiler(tile_size=kernel_size, stride=stride) + image = torch.rand(image_size) + tiles = tiler.tile(image) + + untiled_image = tiler.untile(tiles) + assert untiled_image.shape == torch.Size(image_size) + + +@pytest.mark.parametrize("mode", ["pad", "padded", "interpolate", "interplation"]) +def test_upscale_downscale_mode(mode): + with pytest.raises(ValueError): + tiler = Tiler(tile_size=(512, 512), stride=(256, 256), mode=mode) + + +@pytest.mark.parametrize("image_size, kernel_size, stride, tile_size, mode", overlapping_data) +@pytest.mark.parametrize("remove_border_count", [0, 5]) +def test_untile_overlapping_patches(image_size, kernel_size, stride, remove_border_count, tile_size, mode): + """Overlapping Tiling/Untiling should return the same image size.""" + tiler = Tiler( + tile_size=kernel_size, + stride=stride, + remove_border_count=remove_border_count, + mode=mode, + ) + + image = torch.rand(image_size) + tiles = tiler.tile(image) + reconstructed_image = tiler.untile(tiles) + image = image[ + :, + :, + remove_border_count:-remove_border_count, + remove_border_count:-remove_border_count, + ] + reconstructed_image = reconstructed_image[ + :, + :, + remove_border_count:-remove_border_count, + remove_border_count:-remove_border_count, + ] + assert torch.equal(image, reconstructed_image) + + +@pytest.mark.parametrize("image_size", [(1, 3, 512, 512)]) +@pytest.mark.parametrize("tile_size", [(256, 256), (200, 200), (211, 213), (312, 333), (511, 511)]) +@pytest.mark.parametrize("stride", [(64, 64), (111, 111), (128, 111), (128, 128)]) +@pytest.mark.parametrize("mode", ["padding", "interpolation"]) +def test_divisible_tile_size_and_stride(image_size, tile_size, stride, mode): + """When the image is not divisible by tile size and stride, Tiler should up + samples the image before tiling, and downscales before untiling.""" + tiler = Tiler(tile_size, stride, mode=mode) + image = torch.rand(image_size) + tiles = tiler.tile(image) + reconstructed_image = tiler.untile(tiles) + assert image.shape == reconstructed_image.shape + + if mode == "padding": + assert torch.allclose(image, reconstructed_image) diff --git a/tests/pre_merge/pre_processing/transforms/__init__.py b/tests/pre_merge/pre_processing/transforms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/pre_merge/pre_processing/transforms/test_transforms.py b/tests/pre_merge/pre_processing/transforms/test_transforms.py new file mode 100644 index 0000000000..ea15788ec0 --- /dev/null +++ b/tests/pre_merge/pre_processing/transforms/test_transforms.py @@ -0,0 +1,81 @@ +"""Data transformation test. + +This test contains the following test: + - Transformations could be ``None``, ``yaml``, ``json`` or ``dict``. + - When it is ``None``, the script loads the default transforms + - When it is ``yaml``, ``json`` or ``dict``, `albumentations` package + deserializes the transformations. +""" + +import tempfile +from pathlib import Path + +import albumentations as A +import numpy as np +import pytest +import skimage +from torch import Tensor + +from anomalib.pre_processing import PreProcessor + + +def test_transforms_and_image_size_cannot_be_none(): + """When transformations ``config`` and ``image_size`` are ``None`` + ``PreProcessor`` class should raise a ``ValueError``.""" + + with pytest.raises(ValueError): + PreProcessor(config=None, image_size=None) + + +def test_image_size_could_be_int_or_tuple(): + """When ``config`` is None, ``image_size`` could be either ``int`` or + ``Tuple[int, int]``.""" + + PreProcessor(config=None, image_size=256) + PreProcessor(config=None, image_size=(256, 512)) + with pytest.raises(ValueError): + PreProcessor(config=None, image_size=0.0) + + +def test_load_transforms_from_string(): + """When the pre-processor is instantiated via a transform config file, it + should work with either string or A.Compose and return a ValueError + otherwise.""" + + config_path = tempfile.NamedTemporaryFile(suffix=".yaml").name + + # Create a dummy transformation. + transforms = A.Compose( + [ + A.Resize(1024, 1024, always_apply=True), + A.CenterCrop(256, 256, always_apply=True), + A.Resize(224, 224, always_apply=True), + ] + ) + A.save(transform=transforms, filepath=config_path, data_format="yaml") + + # Pass a path to config + pre_processor = PreProcessor(config=config_path) + assert isinstance(pre_processor.transforms, A.Compose) + + # Pass a config of type A.Compose + pre_processor = PreProcessor(config=transforms) + assert isinstance(pre_processor.transforms, A.Compose) + + # Anything else should raise an error + with pytest.raises(ValueError): + PreProcessor(config=0) + + +def test_to_tensor_returns_correct_type(): + """`to_tensor` flag should ensure that pre-processor returns the expected + type.""" + image = skimage.data.astronaut() + + pre_processor = PreProcessor(config=None, image_size=256, to_tensor=True) + transformed = pre_processor(image=image)["image"] + assert isinstance(transformed, Tensor) + + pre_processor = PreProcessor(config=None, image_size=256, to_tensor=False) + transformed = pre_processor(image=image)["image"] + assert isinstance(transformed, np.ndarray) diff --git a/tests/pre_merge/utils/__init__.py b/tests/pre_merge/utils/callbacks/__init__.py similarity index 96% rename from tests/pre_merge/utils/__init__.py rename to tests/pre_merge/utils/callbacks/__init__.py index f47e58091f..8d05a0bcff 100644 --- a/tests/pre_merge/utils/__init__.py +++ b/tests/pre_merge/utils/callbacks/__init__.py @@ -1,4 +1,4 @@ -"""Tests for utils.""" +"""Test callbacks.""" # Copyright (C) 2020 Intel Corporation # diff --git a/tests/pre_merge/utils/test_sweep_config.py b/tests/pre_merge/utils/test_sweep_config.py new file mode 100644 index 0000000000..5e8347ee1a --- /dev/null +++ b/tests/pre_merge/utils/test_sweep_config.py @@ -0,0 +1,66 @@ +"""Tests for benchmarking configuration utils.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +from omegaconf import DictConfig + +from anomalib.utils.sweep.config import ( + flatten_sweep_params, + get_run_config, + set_in_nested_config, +) + + +class TestSweepConfig: + def test_flatten_params(self): + # simulate grid search config + dummy_config = DictConfig( + {"parent1": {"child1": ["a", "b", "c"], "child2": [1, 2, 3]}, "parent2": ["model1", "model2"]} + ) + dummy_config = flatten_sweep_params(dummy_config) + assert dummy_config == { + "parent1.child1": ["a", "b", "c"], + "parent1.child2": [1, 2, 3], + "parent2": ["model1", "model2"], + } + + def test_get_run_config(self): + # simulate model config + model_config = DictConfig( + { + "parent1": { + "child1": "e", + "child2": 4, + }, + "parent3": False, + } + ) + # simulate grid search config + dummy_config = DictConfig({"parent1": {"child1": ["a"], "child2": [1, 2]}, "parent2": ["model1"]}) + + config_iterator = get_run_config(dummy_config) + # First iteration + run_config = next(config_iterator) + assert run_config == {"parent1.child1": "a", "parent1.child2": 1, "parent2": "model1"} + for param in run_config.keys(): + set_in_nested_config(model_config, param.split("."), run_config[param]) + assert model_config == {"parent1": {"child1": "a", "child2": 1}, "parent3": False, "parent2": "model1"} + + # Second iteration + run_config = next(config_iterator) + assert run_config == {"parent1.child1": "a", "parent1.child2": 2, "parent2": "model1"} + for param in run_config.keys(): + set_in_nested_config(model_config, param.split("."), run_config[param]) + assert model_config == {"parent1": {"child1": "a", "child2": 2}, "parent3": False, "parent2": "model1"} diff --git a/tests/utils/callbacks/__init__.py b/tests/utils/callbacks/__init__.py index 8d05a0bcff..26dac37289 100644 --- a/tests/utils/callbacks/__init__.py +++ b/tests/utils/callbacks/__init__.py @@ -1,4 +1,4 @@ -"""Test callbacks.""" +"""Sampling methods.""" # Copyright (C) 2020 Intel Corporation # @@ -13,3 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions # and limitations under the License. + +from .k_center_greedy import KCenterGreedy + +__all__ = ["KCenterGreedy"] diff --git a/tools/benchmarking/README.md b/tools/benchmarking/README.md index d4b60c7180..b87b51c331 100644 --- a/tools/benchmarking/README.md +++ b/tools/benchmarking/README.md @@ -2,10 +2,12 @@ These bash scripts will assist in measuring the training performance of the anomalib library. +The python script (`benchmark.py`) will assist in computing metrics for all the models in the repository. + ## Usage -Run the train.sh with the same args as the tools/train.py. Refer to [`../README.md`](https://gitlab-icv.inn.intel.com/algo_rnd_team/anomaly/README.md) for those details. +Run the train.sh with the same args as the tools/train.py. Refer to [`../README.md`](https://github.com/openvinotoolkit/anomalib/blob/development/README.md) for those details. -Note: To collect memory read/write numbers, run the script with sudo priviledges. Otherwise, those values will be blank. +Note: To collect memory read/write numbers, run the script with sudo privileges. Otherwise, those values will be blank. ``` sudo -E ./train.sh # Train STFPM on MVTec leather @@ -22,3 +24,13 @@ For post processing, run the post-process.sh script with the results directory y ``` ./post-process.sh ./output/2021Aug31_2351 ``` + +--- + +To use the python script, run it from the root directory. + +``` +python tools/benchmarking/benchmark.py +``` + +The output will be generated in results folder and a csv file for each model. diff --git a/tools/benchmarking/benchmark.py b/tools/benchmarking/benchmark.py new file mode 100644 index 0000000000..ed3d293464 --- /dev/null +++ b/tools/benchmarking/benchmark.py @@ -0,0 +1,240 @@ +"""Benchmark all the algorithms in the repo.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + + +import logging +import multiprocessing +import time +from concurrent.futures import ProcessPoolExecutor, as_completed +from pathlib import Path +from tempfile import TemporaryDirectory +from typing import Dict, List, Union, cast + +import torch +from omegaconf import DictConfig, ListConfig, OmegaConf +from pytorch_lightning import Trainer, seed_everything +from utils import convert_to_openvino, upload_to_wandb, write_metrics + +from anomalib.config import get_configurable_parameters, update_input_size_config +from anomalib.data import get_datamodule +from anomalib.models import get_model +from anomalib.utils.sweep import ( + get_meta_data, + get_openvino_throughput, + get_run_config, + get_sweep_callbacks, + get_torch_throughput, + set_in_nested_config, +) + +logger = logging.getLogger(__file__) + + +def get_single_model_metrics(model_config: Union[DictConfig, ListConfig], openvino_metrics: bool = False) -> Dict: + """Collects metrics for `model_name` and returns a dict of results. + + Args: + model_config (DictConfig, ListConfig): Configuration for run + openvino_metrics (bool): If True, converts the model to OpenVINO format and gathers inference metrics. + + Returns: + Dict: Collection of all the metrics such as time taken, throughput and performance scores. + """ + + with TemporaryDirectory() as project_path: + model_config.project.path = project_path + datamodule = get_datamodule(model_config) + model = get_model(model_config) + + callbacks = get_sweep_callbacks() + + trainer = Trainer(**model_config.trainer, logger=None, callbacks=callbacks) + + start_time = time.time() + trainer.fit(model=model, datamodule=datamodule) + + # get start time + training_time = time.time() - start_time + + # Creating new variable is faster according to https://stackoverflow.com/a/4330829 + start_time = time.time() + # get test results + test_results = trainer.test(model=model, datamodule=datamodule) + + # get testing time + testing_time = time.time() - start_time + + meta_data = get_meta_data(model, model_config.model.input_size) + + throughput = get_torch_throughput(model_config, model, datamodule.test_dataloader().dataset, meta_data) + + # Get OpenVINO metrics + openvino_throughput = float("nan") + if openvino_metrics: + # Create dirs for openvino model export + openvino_export_path = project_path / Path("exported_models") + openvino_export_path.mkdir(parents=True, exist_ok=True) + convert_to_openvino(model, openvino_export_path, model_config.model.input_size) + openvino_throughput = get_openvino_throughput( + model_config, openvino_export_path, datamodule.test_dataloader().dataset, meta_data + ) + + # arrange the data + data = { + "Training Time (s)": training_time, + "Testing Time (s)": testing_time, + "Inference Throughput (fps)": throughput, + "OpenVINO Inference Throughput (fps)": openvino_throughput, + } + for key, val in test_results[0].items(): + data[key] = float(val) + + return data + + +def compute_on_cpu(): + """Compute all run configurations over a sigle CPU.""" + sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml") + for run_config in get_run_config(sweep_config.grid_search): + model_metrics = sweep(run_config, 0, sweep_config.seed) + write_metrics(model_metrics, sweep_config.writer) + + +def compute_on_gpu(run_configs: Union[DictConfig, ListConfig], device: int, seed: int, writers: List[str]): + """Go over each run config and collect the result. + + Args: + run_configs (Union[DictConfig, ListConfig]): List of run configurations. + device (int): The GPU id used for running the sweep. + seed (int): Fix a seed. + writers (List[str]): Destinations to write to. + """ + for run_config in run_configs: + model_metrics = sweep(run_config, device, seed) + write_metrics(model_metrics, writers) + + +def distribute_over_gpus(): + """Distribute metric collection over all available GPUs. This is done by splitting the list of configurations.""" + sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml") + with ProcessPoolExecutor( + max_workers=torch.cuda.device_count(), mp_context=multiprocessing.get_context("spawn") + ) as executor: + run_configs = list(get_run_config(sweep_config.grid_search)) + jobs = [] + for device_id, run_split in enumerate( + range(0, len(run_configs), len(run_configs) // torch.cuda.device_count()) + ): + jobs.append( + executor.submit( + compute_on_gpu, + run_configs[run_split : run_split + len(run_configs) // torch.cuda.device_count()], + device_id + 1, + sweep_config.seed, + sweep_config.writer, + ) + ) + for job in jobs: + try: + job.result() + except Exception as exc: + raise Exception(f"Error occurred while computing benchmark on device {job}") from exc + + +def distribute(): + """Run all cpu experiments on a single process. Distribute gpu experiments over all available gpus. + + Args: + device_count (int, optional): If device count is 0, uses only cpu else spawn processes according + to number of gpus available on the machine. Defaults to 0. + """ + sweep_config = OmegaConf.load("tools/benchmarking/benchmark_params.yaml") + devices = sweep_config.hardware + if not torch.cuda.is_available() and "gpu" in devices: + logger.warning("Config requested GPU benchmarking but torch could not detect any cuda enabled devices") + elif {"cpu", "gpu"}.issubset(devices): + # Create process for gpu and cpu + with ProcessPoolExecutor(max_workers=2, mp_context=multiprocessing.get_context("spawn")) as executor: + jobs = [executor.submit(compute_on_cpu), executor.submit(distribute_over_gpus)] + for job in as_completed(jobs): + try: + job.result() + except Exception as exception: + raise Exception(f"Error occurred while computing benchmark on device {job}") from exception + elif "cpu" in devices: + compute_on_cpu() + elif "gpu" in devices: + distribute_over_gpus() + if "wandb" in sweep_config.writer: + upload_to_wandb(team="anomalib") + + +def sweep(run_config: Union[DictConfig, ListConfig], device: int = 0, seed: int = 42) -> Dict[str, Union[float, str]]: + """Go over all the values mentioned in `grid_search` parameter of the benchmarking config. + + Args: + device (int, optional): Name of the device on which the model is trained. Defaults to 0 "cpu". + + Returns: + Dict[str, Union[float, str]]: Dictionary containing the metrics gathered from the sweep. + """ + seed_everything(seed) + # This assumes that `model_name` is always present in the sweep config. + model_config = get_configurable_parameters(model_name=run_config.model_name) + + model_config = cast(DictConfig, model_config) # placate mypy + for param in run_config.keys(): + # grid search keys are always assumed to be strings + param = cast(str, param) # placate mypy + set_in_nested_config(model_config, param.split("."), run_config[param]) + + # convert image size to tuple in case it was updated by run config + model_config = update_input_size_config(model_config) + + # Set device in config. 0 - cpu, [0], [1].. - gpu id + model_config.trainer.gpus = 0 if device == 0 else [device - 1] + convert_openvino = bool(model_config.trainer.gpus) + + if run_config.model_name == "patchcore": + convert_openvino = False # `torch.cdist` is not supported by onnx version 11 + # TODO Remove this line when issue #40 is fixed https://github.com/openvinotoolkit/anomalib/issues/40 + if model_config.model.input_size != (224, 224): + return {} # go to next run + + # Run benchmarking for current config + model_metrics = get_single_model_metrics(model_config=model_config, openvino_metrics=convert_openvino) + + # Append configuration of current run to the collected metrics + for key, value in run_config.items(): + # Skip adding model name to the dataframe + if key != "model_name": + model_metrics[key] = value + + # Add device name to list + model_metrics["device"] = "gpu" if device > 0 else "cpu" + model_metrics["model_name"] = run_config.model_name + + return model_metrics + + +if __name__ == "__main__": + # Benchmarking entry point. + # Spawn multiple processes one for cpu and rest for the number of gpus available in the system. + # The idea is to distribute metrics collection over all the available devices. + + print("Benchmarking started 🏃‍♂️. This will take a while ⏲ depending on your configuration.") + distribute() + print("Finished gathering results ⚡") diff --git a/tools/benchmarking/benchmark_params.yaml b/tools/benchmarking/benchmark_params.yaml new file mode 100644 index 0000000000..8e7a565801 --- /dev/null +++ b/tools/benchmarking/benchmark_params.yaml @@ -0,0 +1,29 @@ +seed: 42 +hardware: + - cpu + - gpu +writer: + - wandb + - tensorboard +grid_search: + dataset: + category: + - bottle + - cable + - capsule + - carpet + - grid + - hazelnut + - leather + - metal_nut + - pill + - screw + - tile + - toothbrush + - transistor + - wood + - zipper + image_size: [224] + model_name: + - padim + - patchcore diff --git a/anomalib/utils/hpo/__init__.py b/tools/benchmarking/utils/__init__.py similarity index 72% rename from anomalib/utils/hpo/__init__.py rename to tools/benchmarking/utils/__init__.py index df0b042da6..2f2c54b428 100644 --- a/anomalib/utils/hpo/__init__.py +++ b/tools/benchmarking/utils/__init__.py @@ -1,4 +1,4 @@ -"""Utils to help in HPO search.""" +"""Utils specific to running benchmarking scripts.""" # Copyright (C) 2020 Intel Corporation # @@ -13,3 +13,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions # and limitations under the License. + +from .convert import convert_to_openvino +from .metrics import upload_to_wandb, write_metrics + +__all__ = ["convert_to_openvino", "write_metrics", "upload_to_wandb"] diff --git a/tools/benchmarking/utils/convert.py b/tools/benchmarking/utils/convert.py new file mode 100644 index 0000000000..70c64023be --- /dev/null +++ b/tools/benchmarking/utils/convert.py @@ -0,0 +1,28 @@ +"""Model converters.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. + +from pathlib import Path +from typing import List, Union + +from anomalib.deploy import export_convert +from anomalib.models import AnomalyModule + + +def convert_to_openvino(model: AnomalyModule, export_path: Union[Path, str], input_size: List[int]): + """Convert the trained model to OpenVINO.""" + export_path = export_path if isinstance(export_path, Path) else Path(export_path) + onnx_path = export_path / "model.onnx" + export_convert(model, input_size, onnx_path, export_path) diff --git a/tools/benchmarking/utils/metrics.py b/tools/benchmarking/utils/metrics.py new file mode 100644 index 0000000000..51236ac75d --- /dev/null +++ b/tools/benchmarking/utils/metrics.py @@ -0,0 +1,117 @@ +"""Methods to compute and save metrics.""" + +# Copyright (C) 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +import random +import string +from glob import glob +from pathlib import Path +from typing import Dict, List, Union + +import pandas as pd +from torch.utils.tensorboard.writer import SummaryWriter + +import wandb + + +def write_metrics(model_metrics: Dict[str, Union[str, float]], writers: List[str]): + """Writes metrics to destination provided in the sweep config. + + Args: + model_metrics (Dict): Dictionary to be written + writers (List[str]): List of destinations. + """ + # Write to file as each run is computed + if model_metrics == {} or model_metrics is None: + return + + # Write to CSV + metrics_df = pd.DataFrame(model_metrics, index=[0]) + result_path = Path(f"runs/{model_metrics['model_name']}_{model_metrics['device']}.csv") + Path.mkdir(result_path.parent, parents=True, exist_ok=True) + if not result_path.is_file(): + metrics_df.to_csv(result_path) + else: + metrics_df.to_csv(result_path, mode="a", header=False) + + if "tensorboard" in writers: + write_to_tensorboard(model_metrics) + + +def write_to_tensorboard( + model_metrics: Dict[str, Union[str, float]], +): + """Write model_metrics to tensorboard. + + Args: + model_metrics (Dict[str, Union[str, float]]): Dictionary containing collected results. + """ + scalar_metrics = {} + scalar_prefixes: List[str] = [] + string_metrics = {} + for key, metric in model_metrics.items(): + if isinstance(metric, (int, float, bool)): + scalar_metrics[key] = metric + else: + string_metrics[key] = metric + scalar_prefixes.append(metric) + writer = SummaryWriter(f"runs/{model_metrics['model_name']}_{model_metrics['device']}") + for key, metric in model_metrics.items(): + if isinstance(metric, (int, float, bool)): + scalar_metrics[key.replace(".", "/")] = metric # need to join by / for tensorboard grouping + writer.add_scalar(key, metric) + else: + if key == "model_name": + continue + scalar_prefixes.append(metric) + scalar_prefix: str = "/".join(scalar_prefixes) + for key, metric in scalar_metrics.items(): + writer.add_scalar(scalar_prefix + "/" + str(key), metric) + writer.close() + + +def get_unique_key(str_len: int) -> str: + """Returns a random string of length str_len. + + Args: + str_len (int): Length of string. + + Returns: + str: Random string + """ + return "".join([random.choice(string.ascii_lowercase) for _ in range(str_len)]) + + +def upload_to_wandb(team: str = "anomalib"): + """Upload the data in csv files to wandb. + + Creates a project named benchmarking_[two random characters]. This is so that the project names are unique. + One issue is that it does not check for collision + + Args: + team (str, optional): Name of the team on wandb. This can also be the id of your personal account. + Defaults to "anomalib". + """ + project = f"benchmarking_{get_unique_key(2)}" + tag_list = ["dataset.category", "model_name", "dataset.image_size", "model.backbone", "device"] + for csv_file in glob("runs/*.csv"): + table = pd.read_csv(csv_file) + for index, row in table.iterrows(): + row = dict(row[1:]) # remove index column + tags = [str(row[column]) for column in tag_list if column in row.keys()] + wandb.init( + entity=team, project=project, name=f"{row['model_name']}_{row['dataset.category']}_{index}", tags=tags + ) + wandb.log(row) + wandb.finish()