Fix configs to properly use pytorch-lightning==1.6 with GPU (#234)

* ☑︎ Check if openvino is in `config.yaml` file. * 🗑 Remove `update_device_config` from `get_config` * ➕ `devices` and 🗑 `gpus` from test configs * ➕ `devices` and 🗑 `gpus` from model configs. * 🗑 `terminate_on_nan` from trainer configs. * ➕ Added new Trainer config params and 🗑 deprecated ones. * 🗑 callback * 🗑 device and set progress bar to true * 🛠 Fix tests * ➕ missing trainer configs
openvinotoolkit · Apr 20, 2022 · 01370ef · 01370ef
1 parent 8455303
commit 01370ef
Show file tree

Hide file tree

Showing 11 changed files with 68 additions and 100 deletions.
diff --git a/anomalib/config/config.py b/anomalib/config/config.py
@@ -21,7 +21,6 @@
 from typing import List, Optional, Union
 from warnings import warn
 
-import torch
 from omegaconf import DictConfig, ListConfig, OmegaConf
 
 
@@ -112,37 +111,10 @@ def update_multi_gpu_training_config(config: Union[DictConfig, ListConfig]) -> U
     return config
 
 
-def update_device_config(config: Union[DictConfig, ListConfig], openvino: bool) -> Union[DictConfig, ListConfig]:
-    """Update XPU Device Config This function ensures devices are configured correctly by the user.
-
-    Args:
-        config (Union[DictConfig, ListConfig]): Input config
-        openvino (bool): Boolean to check if OpenVINO Inference is enabled.
-
-    Returns:
-        Union[DictConfig, ListConfig]: Updated config
-    """
-
-    config.openvino = openvino
-    if openvino:
-        config.trainer.gpus = 0
-
-    if not torch.cuda.is_available():
-        config.trainer.gpus = 0
-
-    if config.trainer.gpus == 0 and torch.cuda.is_available():
-        config.trainer.gpus = 1
-
-    config = update_multi_gpu_training_config(config)
-
-    return config
-
-
 def get_configurable_parameters(
     model_name: Optional[str] = None,
     model_config_path: Optional[Union[Path, str]] = None,
     weight_file: Optional[str] = None,
-    openvino: bool = False,
     config_filename: Optional[str] = "config",
     config_file_extension: Optional[str] = "yaml",
 ) -> Union[DictConfig, ListConfig]:
@@ -152,7 +124,6 @@ def get_configurable_parameters(
         model_name: Optional[str]:  (Default value = None)
         model_config_path: Optional[Union[Path, str]]:  (Default value = None)
         weight_file: Path to the weight file
-        openvino: Use OpenVINO
         config_filename: Optional[str]:  (Default value = "config")
         config_file_extension: Optional[str]:  (Default value = "yaml")
 
@@ -191,7 +162,6 @@ def get_configurable_parameters(
         config.model.weight_file = weight_file
 
     config = update_nncf_config(config)
-    config = update_device_config(config, openvino)
 
     # thresholding
     if "pixel_default" not in config.model.threshold.keys():

diff --git a/anomalib/models/__init__.py b/anomalib/models/__init__.py
@@ -54,7 +54,7 @@ def get_model(config: Union[DictConfig, ListConfig]) -> AnomalyModule:
     torch_model_list: List[str] = ["padim", "stfpm", "dfkde", "dfm", "patchcore", "cflow", "ganomaly"]
     model: AnomalyModule
 
-    if config.openvino:
+    if "openvino" in config.keys() and config.openvino:
         if config.model.name in openvino_model_list:
             module = import_module(f"anomalib.models.{config.model.name}.model")
             model = getattr(module, f"{config.model.name.capitalize()}OpenVINO")

diff --git a/anomalib/models/cflow/config.yaml b/anomalib/models/cflow/config.yaml
@@ -54,7 +54,7 @@ project:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -63,11 +63,15 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 1
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
@@ -76,6 +80,7 @@ trainer:
   log_gpu_memory: null
   max_epochs: 50
   max_steps: -1
+  max_time: null
   min_epochs: null
   min_steps: null
   move_metrics_to_cpu: false
@@ -86,16 +91,11 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+  strategy: null
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0
-  weights_save_path: null
-  weights_summary: top
diff --git a/anomalib/models/dfkde/config.yaml b/anomalib/models/dfkde/config.yaml
@@ -38,7 +38,7 @@ project:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -47,11 +47,15 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
@@ -60,6 +64,7 @@ trainer:
   log_gpu_memory: null
   max_epochs: 1
   max_steps: -1
+  max_time: null
   min_epochs: null
   min_steps: null
   move_metrics_to_cpu: false
@@ -70,16 +75,11 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+  strategy: null
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0 # Don't validate before extracting features.
-  weights_save_path: null
-  weights_summary: top
diff --git a/anomalib/models/dfm/config.yaml b/anomalib/models/dfm/config.yaml
@@ -42,7 +42,7 @@ project:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -51,11 +51,15 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
@@ -64,6 +68,7 @@ trainer:
   log_gpu_memory: null
   max_epochs: 1
   max_steps: -1
+  max_time: null
   min_epochs: null
   min_steps: null
   move_metrics_to_cpu: false
@@ -74,16 +79,12 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
+
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+  strategy: null
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0 # Don't validate before extracting features.
-  weights_save_path: null
-  weights_summary: top
diff --git a/anomalib/models/ganomaly/config.yaml b/anomalib/models/ganomaly/config.yaml
@@ -61,7 +61,7 @@ optimization:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -70,11 +70,15 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 2
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
@@ -93,16 +97,12 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
+
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+  strategy: null
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0
-  weights_save_path: null
-  weights_summary: top
diff --git a/anomalib/models/padim/config.yaml b/anomalib/models/padim/config.yaml
@@ -53,7 +53,7 @@ optimization:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -62,19 +62,23 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
   limit_val_batches: 1.0
   log_every_n_steps: 50
-  log_gpu_memory: null
   max_epochs: 1
   max_steps: -1
+  max_time: null
   min_epochs: null
   min_steps: null
   move_metrics_to_cpu: false
@@ -85,16 +89,11 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0 # Don't validate before extracting features.
-  weights_save_path: null
-  weights_summary: top
diff --git a/anomalib/models/patchcore/config.yaml b/anomalib/models/patchcore/config.yaml
@@ -51,7 +51,7 @@ project:
 
 # PL Trainer Args. Don't add extra parameter here.
 trainer:
-  accelerator: null
+  accelerator: auto # <"cpu", "gpu", "tpu", "ipu", "hpu", "auto">
   accumulate_grad_batches: 1
   amp_backend: native
   auto_lr_find: false
@@ -60,11 +60,15 @@ trainer:
   benchmark: false
   check_val_every_n_epoch: 1 # Don't validate before extracting features.
   default_root_dir: null
+  detect_anomaly: false
   deterministic: false
   enable_checkpointing: true
+  enable_model_summary: true
+  enable_progress_bar: true
   fast_dev_run: false
-  gpus: 1
+  gpus: null # Set automatically
   gradient_clip_val: 0
+  ipus: null
   limit_predict_batches: 1.0
   limit_test_batches: 1.0
   limit_train_batches: 1.0
@@ -73,6 +77,7 @@ trainer:
   log_gpu_memory: null
   max_epochs: 1
   max_steps: -1
+  max_time: null
   min_epochs: null
   min_steps: null
   move_metrics_to_cpu: false
@@ -83,16 +88,11 @@ trainer:
   overfit_batches: 0.0
   plugins: null
   precision: 32
-  prepare_data_per_node: true
-  process_position: 0
   profiler: null
-  progress_bar_refresh_rate: null
+  reload_dataloaders_every_n_epochs: 0
   replace_sampler_ddp: true
-  stochastic_weight_avg: false
+  strategy: null
   sync_batchnorm: false
-  terminate_on_nan: false
   tpu_cores: null
   track_grad_norm: -1
   val_check_interval: 1.0 # Don't validate before extracting features.
-  weights_save_path: null
-  weights_summary: top