Lightning-AI · ananthsub · May 22, 2021 · Mar 23, 2021 · Mar 23, 2021 · Mar 24, 2021
@@ -77,6 +77,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - MLFlowLogger now accepts `run_name` as an constructor argument ([#7622](https://github.com/PyTorchLightning/pytorch-lightning/issues/7622))
 
 
+- Changed `teardown()` in `Accelerator` to allow `training_type_plugin` to customize `teardown` logic ([#7579](https://github.com/PyTorchLightning/pytorch-lightning/pull/7579))
+
+
 ### Deprecated
 
 

@@ -154,14 +154,7 @@ def root_device(self) -> torch.device:
         return self.training_type_plugin.root_device
 
     def teardown(self) -> None:
-        """
-        This method is called to teardown the training process.
-        It is the right place to release memory and free other ressources.
-
-        By default we add a barrier here to synchronize processes before returning
-        control back to the caller.
-        """
-        self.barrier("teardown")
+        self.training_type_plugin.teardown()
 
     def batch_to_device(
         self, batch: Any, device: Optional[torch.device] = None, dataloader_idx: Optional[int] = None

@@ -47,13 +47,6 @@ def on_train_start(self) -> None:
         with torch.cuda.device(self.root_device):
             torch.cuda.empty_cache()
 
-    def teardown(self) -> None:
-        self.lightning_module.cpu()
-
-        # clean up memory
-        with torch.cuda.device(self.root_device):
-            torch.cuda.empty_cache()
-
     @staticmethod
     def set_nvidia_flags(local_rank: int) -> None:
         # set the correct cuda visible devices (using pci order)

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import os
 from typing import Any, Callable
 
 from torch.optim import Optimizer
@@ -51,10 +50,6 @@ def setup(self, trainer: 'pl.Trainer', model: 'pl.LightningModule') -> None:
             raise MisconfigurationException("TPUs only support a single tpu core or tpu spawn training.")
         return super().setup(trainer, model)
 
-    def teardown(self) -> None:
-        if "PT_XLA_DEBUG" in os.environ:
-            del os.environ["PT_XLA_DEBUG"]
-
     def run_optimizer_step(
         self, optimizer: Optimizer, optimizer_idx: int, lambda_closure: Callable, **kwargs: Any
     ) -> None:

@@ -23,6 +23,7 @@
 from pytorch_lightning.overrides.base import unwrap_lightning_module
 from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
 from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
+from pytorch_lightning.utilities import _XLA_AVAILABLE
 from pytorch_lightning.utilities.distributed import all_gather_ddp_if_available, ReduceOp
 
 
@@ -40,13 +41,17 @@ def __init__(
 
     @property
     @abstractmethod
-    def root_device(self):
+    def root_device(self) -> torch.device:
         raise NotImplementedError
 
     @property
-    def on_gpu(self):
+    def on_gpu(self) -> bool:
         return self.root_device.type == "cuda" and torch.cuda.is_available()
 
+    @property
+    def on_tpu(self) -> bool:
+        return self.root_device.type == "xla" and _XLA_AVAILABLE
+
     @property
     def lightning_module(self):
         return unwrap_lightning_module(self._model)
@@ -122,3 +127,21 @@ def block_backward_sync(self):
                 yield None
         else:
             yield None
+
+    def teardown(self) -> None:
+        """
+        This method is called to teardown the training process.
+        It is the right place to release memory and free other ressources.
+
+        By default, we teardown in the following way: if training is on gpu,
+        we move lightning module to CPU and clean up cuda memory.
+
+        At the end, we synchronize processes before returning control back to the caller.
+        """
+        if self.on_gpu:
+            # GPU teardown
+            self.lightning_module.cpu()
+            # clean up memory
+            with torch.cuda.device(self.root_device):
+                torch.cuda.empty_cache()
+        self.barrier("teardown")
@@ -16,6 +16,7 @@
 import torch
 
 from pytorch_lightning.plugins.training_type.training_type_plugin import TrainingTypePlugin
+from pytorch_lightning.utilities import _XLA_AVAILABLE
 
 
 class SingleDevicePlugin(TrainingTypePlugin):
@@ -30,11 +31,11 @@ def __init__(self, device: torch.device):
 
     @property
     def on_tpu(self) -> bool:
-        return False
+        return self.root_device.type == "xla" and _XLA_AVAILABLE
 
     @property
     def on_gpu(self) -> bool:
-        return self.device.type == "cuda" and torch.cuda.is_available()
+        return self.root_device.type == "cuda" and torch.cuda.is_available()
 
     def reduce(self, tensor: Union[Any, torch.Tensor], *args: Any, **kwargs: Any) -> Union[Any, torch.Tensor]:
         """
@@ -78,3 +79,18 @@ def barrier(self, *args, **kwargs) -> None:
 
     def broadcast(self, obj: object, src: int = 0) -> object:
         return obj
+
+    def teardown(self) -> None:
+        """
+        This method is called to teardown the training process.
+        It is the right place to release memory and free other ressources.
+
+        By default, we teardown in the following way: if training is on gpu,
+        we move lightning module to CPU and clean up cuda memory.
+        """
+        if self.on_gpu:
+            # GPU teardown
+            self.lightning_module.cpu()
+            # clean up memory
+            with torch.cuda.device(self.root_device):
+                torch.cuda.empty_cache()
@@ -35,10 +35,6 @@ def __init__(self, device: int, debug: bool = False):
         self.tpu_local_core_rank = 0
         self.tpu_global_core_rank = 0
 
-    @property
-    def on_tpu(self) -> bool:
-        return True
-
     @property
     def is_distributed(self) -> bool:
         return False
@@ -63,3 +59,8 @@ def on_save(self, checkpoint: dict) -> dict:
         https://github.com/pytorch/xla/blob/master/API_GUIDE.md#saving-and-loading-xla-tensors
         """
         return move_data_to_device(checkpoint, torch.device("cpu"))
+
+    def teardown(self) -> None:
+        # TPU teardown
+        if "PT_XLA_DEBUG" in os.environ:
+            del os.environ["PT_XLA_DEBUG"]
@@ -71,7 +71,7 @@ def world_size(self) -> int:
 
     @property
     def root_device(self) -> torch.device:
-        return self.device
+        return xm.xla_device()
 
     @staticmethod
     def _validate_dataloader(dataloaders: Union[List[DataLoader], DataLoader]) -> None:
@@ -129,7 +129,7 @@ def is_distributed(self) -> bool:
 
     def process_dataloader(self, dataloader: DataLoader) -> MpDeviceLoader:
         TPUSpawnPlugin._validate_dataloader(dataloader)
-        return MpDeviceLoader(dataloader, self.device)
+        return MpDeviceLoader(dataloader, self.root_device)
 
     def configure_ddp(self) -> None:
         pass
@@ -172,8 +172,7 @@ def new_process(self, process_idx: int, trainer, mp_queue) -> None:
             time.sleep(2)
 
     def model_to_device(self) -> None:
-        self.device = xm.xla_device()
-        self.model = self.wrapped_model.to(self.device)
+        self.model = self.wrapped_model.to(self.root_device)
 
     def barrier(self, name: Optional[str] = None) -> None:
         # HOST_WORLD_SIZE is None outside the xmp.spawn process
@@ -209,7 +208,7 @@ def broadcast(self, obj: object, src: int = 0) -> object:
         buffer = io.BytesIO()
         torch.save(obj, buffer)
         data = bytearray(buffer.getbuffer())
-        data_tensor = torch.tensor(data, device=self.device, dtype=torch.float)
+        data_tensor = torch.tensor(data, device=self.root_device, dtype=torch.float)
         data = xm.all_gather(data_tensor)
         buffer = io.BytesIO(data.cpu().byte().numpy())
         obj = torch.load(buffer)
@@ -302,3 +301,9 @@ def all_gather(self, tensor: torch.Tensor, group: Optional[Any] = None, sync_gra
         if isinstance(tensor, torch.Tensor) and tensor.dim() == 0:
             tensor = tensor.unsqueeze(0)
         return xm.all_gather(tensor)
+
+    def teardown(self) -> None:
+        # TPU teardown
+        if "PT_XLA_DEBUG" in os.environ:
+            del os.environ["PT_XLA_DEBUG"]
+        self.barrier("teardown")
@@ -60,11 +60,19 @@ def setup(self, model: Module) -> None:
     @abstractmethod
     def on_gpu(self) -> bool:
         """Returns whether the current process is done on GPU"""
+        raise NotImplementedError
+
+    @property
+    @abstractmethod
+    def on_tpu(self) -> bool:
+        """Returns whether the current process is done on TPU"""
+        raise NotImplementedError
 
     @property
     @abstractmethod
     def root_device(self) -> torch.device:
         """Returns the root device"""
+        raise NotImplementedError
 
     @abstractmethod
     def model_to_device(self) -> None:
@@ -290,6 +298,14 @@ def call_configure_sharded_model_hook(self) -> bool:
     def call_configure_sharded_model_hook(self, mode: bool) -> None:
         self._call_configure_sharded_model_hook = mode
 
+    @abstractmethod
+    def teardown(self) -> None:
+        """
+        This method is called to teardown the training process.
+        It is the right place to release memory and free other ressources.
+        """
+        raise NotImplementedError
+
     @classmethod
     def register_plugins(cls, plugin_registry):
         pass
diff --git a/pytorch_lightning/utilities/teardown.py b/pytorch_lightning/utilities/teardown.py
@@ -9,6 +9,7 @@
 from torch.optim import Optimizer
 from torch.utils.data import DataLoader
 
+import tests.helpers.utils as tutils
 from pytorch_lightning import LightningModule, seed_everything, Trainer
 from pytorch_lightning.callbacks import Callback, ModelCheckpoint
 from pytorch_lightning.metrics import Accuracy
@@ -569,6 +570,7 @@ def test_deepspeed_multigpu_stage_2_accumulated_grad_batches(tmpdir, cpu_offload
     Test to ensure with Stage 2 and multiple GPUs, accumulated grad batches works.
     """
     seed_everything(42)
+    tutils.set_random_master_port()
 
     class VerificationCallback(Callback):
 

diff --git a/tests/plugins/test_training_type_plugin_device.py b/tests/plugins/test_training_type_plugin_device.py
@@ -0,0 +1,79 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from unittest import mock
+
+import torch
+
+from pytorch_lightning import Trainer
+from pytorch_lightning.plugins import DDPPlugin, DDPSpawnPlugin, SingleDevicePlugin, TPUSpawnPlugin
+from tests.helpers.runif import RunIf
+from tests.helpers.utils import pl_multi_process_test
+
+
+def test_single_cpu():
+    """Tests if on_gpu and on_tpu is set correctly for single cpu plugin."""
+    trainer = Trainer()
+    assert isinstance(trainer.training_type_plugin, SingleDevicePlugin)
+    assert not trainer.training_type_plugin.on_gpu
+    assert not trainer.training_type_plugin.on_tpu
+    assert trainer.training_type_plugin.root_device == torch.device("cpu")
+
+
+@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0"})
+@mock.patch("torch.cuda.device_count", return_value=1)
+@mock.patch("torch.cuda.is_available", return_value=True)
+def test_single_gpu(device_count_mock, mock_cuda_available):
+    """Tests if on_gpu and on_tpu is set correctly for single gpu plugin."""
+    trainer = Trainer(gpus=1)
+    assert isinstance(trainer.training_type_plugin, SingleDevicePlugin)
+    assert trainer.training_type_plugin.on_gpu
+    assert not trainer.training_type_plugin.on_tpu
+    assert trainer.training_type_plugin.root_device == torch.device("cuda:0")
+
+
+@mock.patch("torch.cuda.is_available", return_value=False)
+def test_ddp_cpu(mock_cuda_available):
+    """Tests if on_gpu and on_tpu is set correctly for ddp_cpu plugin."""
+    trainer = Trainer(num_processes=2)
+    assert isinstance(trainer.training_type_plugin, DDPSpawnPlugin)
+    assert not trainer.training_type_plugin.on_gpu
+    assert not trainer.training_type_plugin.on_tpu
+    assert trainer.training_type_plugin.root_device == torch.device("cpu")
+
+
+@mock.patch.dict(os.environ, {"CUDA_VISIBLE_DEVICES": "0,1"})
+@mock.patch("torch.cuda.device_count", return_value=2)
+@mock.patch("torch.cuda.is_available", return_value=True)
+def test_ddp_multi_gpu(device_count_mock, mock_cuda_available):
+    """Tests if on_gpu and on_tpu is set correctly for multi gpu ddp plugin."""
+    trainer = Trainer(
+        gpus=2,
+        accelerator="ddp",
+    )
+    assert isinstance(trainer.training_type_plugin, DDPPlugin)
+    assert trainer.training_type_plugin.on_gpu
+    assert not trainer.training_type_plugin.on_tpu
+    assert trainer.training_type_plugin.root_device == torch.device("cuda:0")
+
+
+@RunIf(tpu=True)
+@pl_multi_process_test
+def test_model_tpu_one_core():
+    """Tests in_gpu and on_tpu is set correctly for tpu spawn plugin."""
+    trainer = Trainer(tpu_cores=1)
+    assert isinstance(trainer.training_type_plugin, TPUSpawnPlugin)
+    assert not trainer.training_type_plugin.on_gpu
+    assert trainer.training_type_plugin.on_tpu
+    assert trainer.training_type_plugin.root_device == torch.device("xla")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -77,6 +77,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
		- MLFlowLogger now accepts `run_name` as an constructor argument ([#7622](https://github.com/PyTorchLightning/pytorch-lightning/issues/7622))


		- Changed `teardown()` in `Accelerator` to allow `training_type_plugin` to customize `teardown` logic ([#7579](https://github.com/PyTorchLightning/pytorch-lightning/pull/7579))


		### Deprecated


Expand Down