Lightning-AI · tchaton · Apr 27, 2021 · Apr 20, 2021 · Apr 20, 2021 · Apr 20, 2021
@@ -120,6 +120,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Added `trainer.predict(return_predictions=None|False|True)` ([#7215](https://github.com/PyTorchLightning/pytorch-lightning/pull/7215))
 
 
+- Added `BasePredictionWriter` callback to implement prediction saving ([#7127](https://github.com/PyTorchLightning/pytorch-lightning/pull/7127))
+
+
 ### Changed
 
 - Renamed `pytorch_lightning.callbacks.swa` to `pytorch_lightning.callbacks.stochastic_weight_avg` ([#6259](https://github.com/PyTorchLightning/pytorch-lightning/pull/6259))

@@ -104,6 +104,7 @@ Lightning has a few built-in callbacks.
     LearningRateMonitor
     ModelCheckpoint
     ModelPruning
+    BasePredictionWriter
     ProgressBar
     ProgressBarBase
     QuantizationAwareTraining

@@ -19,6 +19,7 @@
 from pytorch_lightning.callbacks.lambda_function import LambdaCallback
 from pytorch_lightning.callbacks.lr_monitor import LearningRateMonitor
 from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
+from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter
 from pytorch_lightning.callbacks.progress import ProgressBar, ProgressBarBase
 from pytorch_lightning.callbacks.pruning import ModelPruning
 from pytorch_lightning.callbacks.quantization import QuantizationAwareTraining
@@ -36,6 +37,7 @@
     'LearningRateMonitor',
     'ModelCheckpoint',
     'ModelPruning',
+    'BasePredictionWriter',
     'ProgressBar',
     'ProgressBarBase',
     'QuantizationAwareTraining',

@@ -0,0 +1,119 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""
+BasePredictionWriter
+====================
+
+Aids in saving predictions
+"""
+from typing import Any, List, Optional
+
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.utilities import LightningEnum
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+
+
+class WriteInterval(LightningEnum):
+    BATCH = "batch"
+    EPOCH = "epoch"
+    BATCH_AND_EPOCH = "batch_and_epoch"
+
+    @property
+    def on_batch(self) -> bool:
+        return self in (self.BATCH, self.BATCH_AND_EPOCH)
+
+    @property
+    def on_epoch(self) -> bool:
+        return self in (self.EPOCH, self.BATCH_AND_EPOCH)
+
+
+class BasePredictionWriter(Callback):
+    """
+    Base class to implement how the predictions should be stored.
+
+    Args:
+        write_interval: When to write.
+
+    Example::
+
+        from pytorch_lightning.callbacks import BasePredictionWriter
+
+        class CustomWriter(BasePredictionWriter):
+
+            def __init__(self, output_dir: str, write_interval: str):
+                super().__init__(write_interval)
+                self.output_dir
+
+            def write_on_batch_end(
+                self, trainer, pl_module: 'LightningModule', prediction: Any, batch_indices: List[int], batch: Any,
+                batch_idx: int, dataloader_idx: int
+            ):
+                torch.save(prediction, os.path.join(self.output_dir, dataloader_idx, f"{batch_idx}.pt"))
+
+            def write_on_epoch_end(
+                self, trainer, pl_module: 'LightningModule', predictions: List[Any], batch_indices: List[Any]
+            ):
+                torch.save(predictions, os.path.join(self.output_dir, "predictions.pt"))
+    """
+
+    def __init__(self, write_interval: str = "batch") -> None:
+        if write_interval not in list(WriteInterval):
+            raise MisconfigurationException(f"`write_interval` should be one of {[i.value for i in WriteInterval]}.")
+        self.interval = WriteInterval(write_interval)
+
+    def write_on_batch_end(
+        self,
+        trainer: 'pl.Trainer',
+        pl_module: 'pl.LightningModule',
+        prediction: Any,
+        batch_indices: Optional[List[int]],
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        """Override with the logic to write a single batch."""
+        raise NotImplementedError()
+
+    def write_on_epoch_end(
+        self,
+        trainer: 'pl.Trainer',
+        pl_module: 'pl.LightningModule',
+        predictions: List[Any],
+        batch_indices: Optional[List[Any]],
+    ) -> None:
+        """Override with the logic to write all batches."""
+        raise NotImplementedError()
+
+    def on_predict_batch_end(
+        self,
+        trainer: 'pl.Trainer',
+        pl_module: 'pl.LightningModule',
+        outputs: Any,
+        batch: Any,
+        batch_idx: int,
+        dataloader_idx: int,
+    ) -> None:
+        if not self.interval.on_batch:
+            return
+        is_distributed = trainer.accelerator_connector.is_distributed
+        batch_indices = trainer.predict_loop.batch_indices if is_distributed else None
+        self.write_on_batch_end(trainer, pl_module, outputs, batch_indices, batch, batch_idx, dataloader_idx)
+
+    def on_predict_epoch_end(self, trainer: 'pl.Trainer', pl_module: 'pl.LightningModule', outputs: List[Any]) -> None:
+        if not self.interval.on_epoch:
+            return
+        is_distributed = trainer.accelerator_connector.is_distributed
+        epoch_batch_indices = trainer.predict_loop.epoch_batch_indices if is_distributed else None
+        self.write_on_epoch_end(trainer, pl_module, trainer.predict_loop.predictions, epoch_batch_indices)
@@ -51,6 +51,10 @@ def return_predictions(self, return_predictions: Optional[bool] = None) -> None:
         # For non ``DDPSpawnPlugin`` plugin, the `return_predictions` is True by default unless user decide otherwise.
         self._return_predictions = not is_ddp_spawn if return_predictions is None else return_predictions
 
+    @property
+    def should_store_predictions(self) -> bool:
+        return self.return_predictions or any(c.interval.on_epoch for c in self.trainer.prediction_writer_callbacks)
+
     def on_trainer_init(self):
         self.trainer.num_predict_batches = []
 
@@ -112,14 +116,14 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int) -> None:
 
         self.trainer.call_hook("on_predict_batch_end", predictions, batch, batch_idx, dataloader_idx)
 
-        if self.return_predictions:
+        if self.should_store_predictions:
             self.predictions[dataloader_idx].append(predictions)
 
     def _store_batch_indices(self, dataloader_idx: int) -> None:
         batch_sampler = self.trainer.predict_dataloaders[dataloader_idx].batch_sampler
         if isinstance(batch_sampler, IndexBatchSamplerWrapper):
             self.batch_indices = batch_sampler.batch_indices
-            if self.return_predictions:
+            if self.should_store_predictions:
                 self.epoch_batch_indices[dataloader_idx].append(batch_sampler.batch_indices)
 
     def on_predict_start(self) -> None:

@@ -23,6 +23,7 @@
 from pytorch_lightning.accelerators import Accelerator
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, ProgressBarBase
 from pytorch_lightning.callbacks.base import Callback
+from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter
 from pytorch_lightning.core.lightning import LightningModule
 from pytorch_lightning.core.optimizer import LightningOptimizer
 from pytorch_lightning.loggers import LightningLoggerBase
@@ -309,6 +310,14 @@ def early_stopping_callbacks(self) -> List[EarlyStopping]:
         """
         return [c for c in self.callbacks if isinstance(c, EarlyStopping)]
 
+    @property
+    def prediction_writer_callbacks(self) -> Optional[List[BasePredictionWriter]]:
+        """
+        A list of all instances of :class:`~pytorch_lightning.callbacks.prediction_writer.BasePredictionWriter`
+        found in the Trainer.callbacks list.
+        """
+        return [c for c in self.callbacks if isinstance(c, BasePredictionWriter)]
+
     @property
     def checkpoint_callback(self) -> Optional[ModelCheckpoint]:
         """

@@ -0,0 +1,69 @@
+# Copyright The PyTorch Lightning team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from pytorch_lightning import Trainer
+from pytorch_lightning.callbacks import BasePredictionWriter
+from pytorch_lightning.utilities.exceptions import MisconfigurationException
+from tests.helpers import BoringModel
+
+
+def test_prediction_writer(tmpdir):
+
+    class CustomPredictionWriter(BasePredictionWriter):
+
+        def __init__(self, writer_interval: str):
+            super().__init__(writer_interval)
+
+            self.write_on_batch_end_called = False
+            self.write_on_epoch_end_called = False
+
+        def write_on_batch_end(self, *args, **kwargs):
+            self.write_on_batch_end_called = True
+
+        def write_on_epoch_end(self, *args, **kwargs):
+            self.write_on_epoch_end_called = True
+
+    with pytest.raises(MisconfigurationException, match=r"`write_interval` should be one of \['batch"):
+        CustomPredictionWriter("something")
+
+    model = BoringModel()
+    cb = CustomPredictionWriter("batch_and_epoch")
+    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    results = trainer.predict(model, dataloaders=model.train_dataloader())
+    assert len(results) == 4
+    assert cb.write_on_batch_end_called
+    assert cb.write_on_epoch_end_called
+
+    cb = CustomPredictionWriter("batch_and_epoch")
+    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    results = trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=False)
+    assert cb.write_on_batch_end_called
+    assert cb.write_on_epoch_end_called
+    assert results == 1
+
+    cb = CustomPredictionWriter("batch")
+    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    results = trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=False)
+    assert cb.write_on_batch_end_called
+    assert not cb.write_on_epoch_end_called
+    assert results == 1
+
+    cb = CustomPredictionWriter("epoch")
+    trainer = Trainer(limit_predict_batches=4, callbacks=cb)
+    results = trainer.predict(model, dataloaders=model.train_dataloader(), return_predictions=False)
+    assert not cb.write_on_batch_end_called
+    assert cb.write_on_epoch_end_called
+    assert results == 1
@@ -102,7 +102,7 @@ def test_epoch_end(self, outputs) -> None:
 
     class StoreHistoryLogger(logger_class):
 
-        def __init__(self, *args, **kwargs):
+        def __init__(self, *args, **kwargs) -> None:
             super().__init__(*args, **kwargs)
             self.history = []
 

@@ -170,7 +170,7 @@ def test_adding_step_key(tmpdir):
 
     class CustomTensorBoardLogger(TensorBoardLogger):
 
-        def __init__(self, *args, **kwargs):
+        def __init__(self, *args, **kwargs) -> None:
             super().__init__(*args, **kwargs)
             self.logged_step = 0
 

@@ -31,6 +31,7 @@
 import tests.helpers.utils as tutils
 from pytorch_lightning import Callback, LightningDataModule, LightningModule, Trainer
 from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
+from pytorch_lightning.callbacks.prediction_writer import BasePredictionWriter
 from pytorch_lightning.core.saving import load_hparams_from_tags_csv, load_hparams_from_yaml, save_hparams_to_tags_csv
 from pytorch_lightning.loggers import TensorBoardLogger
 from pytorch_lightning.overrides.distributed import IndexBatchSamplerWrapper, UnrepeatedDistributedSampler
@@ -1512,7 +1513,33 @@ def predict_dataloader(self):
         return self._dataloaders
 
 
-class CustomPredictionWriter(Callback):
+class CustomPredictionWriter(BasePredictionWriter):
+
+    write_on_batch_end_called = False
+    write_on_epoch_end_called = False
+
+    def __init__(self, output_dir: str, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.output_dir = output_dir
+
+    def write_on_batch_end(self, trainer, pl_module, prediction, batch_indices, *args, **kwargs):
+        assert prediction.shape == torch.Size([1, 2])
+        if trainer.accelerator_connector.is_distributed:
+            assert len(batch_indices) == 1
+        else:
+            assert batch_indices is None
+        self.write_on_batch_end_called = True
+
+    def write_on_epoch_end(self, trainer, pl_module, predictions, batch_indices):
+        expected = 1 if trainer.accelerator_connector.is_distributed else 2
+        assert len(predictions) == 2
+        assert len(predictions[0]) == expected
+        if trainer.accelerator_connector.is_distributed:
+            assert len(batch_indices) == 2
+            assert len(batch_indices[0]) == expected
+        else:
+            assert batch_indices is None
+        self.write_on_epoch_end_called = True
 
     def on_predict_epoch_end(self, trainer, pl_module, outputs):
         if trainer.accelerator_connector.is_distributed:
@@ -1522,12 +1549,17 @@ def on_predict_epoch_end(self, trainer, pl_module, outputs):
         super().on_predict_epoch_end(trainer, pl_module, outputs)
 
 
-def predict(tmpdir, accelerator, gpus, num_processes, model=None, plugins=None, datamodule=True, pbrr=None):
+def predict(
+    tmpdir, accelerator, gpus, num_processes, model=None, plugins=None, datamodule=True, pbrr=None, use_callbacks=True
+):
     dataloaders = [torch.utils.data.DataLoader(RandomDataset(32, 2)), torch.utils.data.DataLoader(RandomDataset(32, 2))]
 
     model = model or BoringModel()
     dm = TestLightningDataModule(dataloaders)
 
+    cb = CustomPredictionWriter(tmpdir, write_interval="batch")
+    cb_1 = CustomPredictionWriter(tmpdir, write_interval="epoch")
+
     trainer = Trainer(
         default_root_dir=tmpdir,
         max_epochs=1,
@@ -1538,7 +1570,7 @@ def predict(tmpdir, accelerator, gpus, num_processes, model=None, plugins=None,
         num_processes=num_processes,
         plugins=plugins,
         progress_bar_refresh_rate=pbrr,
-        callbacks=[CustomPredictionWriter()]
+        callbacks=[cb, cb_1] if use_callbacks else []
     )
     if accelerator == "ddp_spawn":
         with pytest.raises(MisconfigurationException):
@@ -1550,6 +1582,13 @@ def predict(tmpdir, accelerator, gpus, num_processes, model=None, plugins=None,
         results = trainer.predict(model, dataloaders=dataloaders)
 
     if not isinstance(trainer.training_type_plugin, DDPSpawnPlugin):
+        if use_callbacks:
+            assert cb.write_on_batch_end_called
+            assert not cb.write_on_epoch_end_called
+
+            assert not cb_1.write_on_batch_end_called
+            assert cb_1.write_on_epoch_end_called
+
         num_samples = 1 if accelerator == "ddp" else 2
         assert len(results) == 2
         assert len(results[0]) == num_samples
@@ -1572,7 +1611,7 @@ def predict_step(self, batch, batch_idx, dataloader_idx=None):
             return super().predict_step(batch, batch_idx, dataloader_idx)
 
     with pytest.warns(UserWarning, match='predict returned None'):
-        predict(tmpdir, None, None, 1, model=CustomBoringModel())
+        predict(tmpdir, None, None, 1, model=CustomBoringModel(), use_callbacks=False)
 
 
 def test_trainer_predict_grad(tmpdir):
@@ -1583,7 +1622,7 @@ def predict_step(self, batch, batch_idx, dataloader_idx=None):
             assert batch.expand_as(batch).grad_fn is None
             return super().predict_step(batch, batch_idx, dataloader_idx)
 
-    predict(tmpdir, None, None, 1, model=CustomBoringModel())
+    predict(tmpdir, None, None, 1, model=CustomBoringModel(), use_callbacks=False)
 
     x = torch.zeros(1, requires_grad=True)
     assert x.expand_as(x).grad_fn is not None