Lightning-AI · kaushikb11 · Apr 27, 2021 · Apr 14, 2021 · Apr 14, 2021 · Apr 14, 2021
@@ -319,6 +319,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed parsing for pre-release package versions ([#6999](https://github.com/PyTorchLightning/pytorch-lightning/pull/6999))
 
 
+- Fixed `num_sanity_val_steps` affecting reproducibility of training data shuffling ([#7014](https://github.com/PyTorchLightning/pytorch-lightning/pull/7014))
+
+
+
 ## [1.2.7] - 2021-04-06
 
 ### Fixed

@@ -37,7 +37,7 @@
 )
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.seed import reset_seed
 
 if _HYDRA_AVAILABLE:
     from hydra.core.hydra_config import HydraConfig
@@ -180,10 +180,7 @@ def _call_children_scripts(self):
             sleep(delay)
 
     def setup_distributed(self):
-        # TODO: check if needed
-        seed = os.environ.get("PL_GLOBAL_SEED")
-        if seed is not None:
-            seed_everything(int(seed))
+        reset_seed()
 
         # determine which process we are and world size
         self.set_world_ranks()

@@ -32,7 +32,7 @@
 from pytorch_lightning.utilities.cloud_io import atomic_save
 from pytorch_lightning.utilities.cloud_io import load as pl_load
 from pytorch_lightning.utilities.distributed import rank_zero_only, rank_zero_warn, ReduceOp, sync_ddp_if_available
-from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.seed import reset_seed
 
 if _TORCH_GREATER_EQUAL_1_8:
     from pytorch_lightning.utilities.distributed import register_ddp_comm_hook
@@ -132,10 +132,7 @@ def start_predicting(self, trainer):
     def new_process(self, process_idx, trainer, mp_queue):
         self.mp_queue = mp_queue
 
-        # TODO: check if needed
-        seed = os.environ.get("PL_GLOBAL_SEED")
-        if seed is not None:
-            seed_everything(int(seed))
+        reset_seed()
 
         self.set_world_ranks(process_idx)
 

@@ -32,7 +32,7 @@
 from pytorch_lightning.utilities.data import has_len
 from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
-from pytorch_lightning.utilities.seed import seed_everything
+from pytorch_lightning.utilities.seed import reset_seed
 
 if _TPU_AVAILABLE:
     import torch_xla.core.xla_model as xm
@@ -137,9 +137,7 @@ def set_world_ranks(self, process_idx: int = 0) -> None:
     def new_process(self, process_idx: int, trainer, mp_queue) -> None:
         self.mp_queue = mp_queue
 
-        seed = os.environ.get("PL_GLOBAL_SEED")
-        if seed is not None:
-            seed_everything(int(seed))
+        reset_seed()
 
         self.tpu_local_core_rank = xm.get_local_ordinal()
         self.tpu_global_core_rank = xm.get_ordinal()

@@ -62,6 +62,7 @@
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.memory import recursive_detach
 from pytorch_lightning.utilities.model_helpers import is_overridden
+from pytorch_lightning.utilities.seed import reset_seed
 
 log = logging.getLogger(__name__)
 # warnings to ignore in trainer
@@ -864,6 +865,10 @@ def run_sanity_check(self, ref_model):
 
             self._running_stage = stage
 
+            # reset the seed to what it was before sanity check
+            # prevents sanity check to affect random sampling in training
+            reset_seed()
+
     def validate(
         self,
         model: Optional[LightningModule] = None,

@@ -78,6 +78,16 @@ def _select_seed_randomly(min_seed_value: int = 0, max_seed_value: int = 255) ->
     return random.randint(min_seed_value, max_seed_value)
 
 
+def reset_seed() -> None:
+    """
+    Reset the seed to the value that :func:`pytorch_lightning.utilities.seed.seed_everything` previously set.
+    If :func:`pytorch_lightning.utilities.seed.seed_everything` is unused, this function will do nothing.
+    """
+    seed = os.environ.get("PL_GLOBAL_SEED", None)
+    if seed is not None:
+        seed_everything(int(seed))
+
+
 def pl_worker_init_function(worker_id: int, rank: Optional = None) -> None:  # pragma: no cover
     """
     The worker_init_fn that Lightning automatically adds to your dataloader if you previously set

@@ -252,14 +252,14 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool
     actual = [m for m in actual if m.startswith("Applied")]
     assert actual == [
         "Applied `L1Unstructured`. Pruned: 0/1122 (0.00%) -> 544/1122 (48.48%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 500 (48.83%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 44 (68.75%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 503 (49.12%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 41 (64.06%)",  # noqa: E501
         "Applied `RandomUnstructured`. Pruned: 544/1122 (48.48%) -> 680/1122 (60.61%)",
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 500 (48.83%) -> 635 (62.01%)",  # noqa: E501
-        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 44 (68.75%) -> 45 (70.31%)",  # noqa: E501
+        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 503 (49.12%) -> 629 (61.43%)",  # noqa: E501
+        "Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 41 (64.06%) -> 51 (79.69%)",  # noqa: E501
         "Applied `L1Unstructured`. Pruned: 680/1122 (60.61%) -> 884/1122 (78.79%)",
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 635 (62.01%) -> 830 (81.05%)",  # noqa: E501
-        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 45 (70.31%) -> 54 (84.38%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 629 (61.43%) -> 827 (80.76%)",  # noqa: E501
+        "Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 51 (79.69%) -> 57 (89.06%)",  # noqa: E501
     ]
 
     filepath = str(tmpdir / "foo.ckpt")

@@ -27,7 +27,7 @@ def test_finetuning_with_resume_from_checkpoint(tmpdir):
     This test validates that generated ModelCheckpoint is pointing to the right best_model_path during test
     """
 
-    seed_everything(3)
+    seed_everything(4)
 
     checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath=tmpdir, filename="{epoch:02d}", save_top_k=-1)
 

@@ -11,8 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import torch
 
-from pytorch_lightning import Trainer
+from pytorch_lightning import seed_everything, Trainer
 from tests.helpers import BoringModel
 
 
@@ -174,3 +175,36 @@ def on_train_epoch_end(self, outputs):
 
     result = trainer.fit(model)
     assert result == 1, "Training did not complete"
+
+
+def test_training_starts_with_seed(tmpdir):
+    """ Test that the training always starts with the same random state (when using seed_everything). """
+
+    class SeededModel(BoringModel):
+
+        def __init__(self):
+            super().__init__()
+            self.seen_batches = []
+
+        def training_step(self, batch, batch_idx):
+            self.seen_batches.append(batch.view(-1))
+            return super().training_step(batch, batch_idx)
+
+    def run_training(**trainer_kwargs):
+        model = SeededModel()
+        seed_everything(123)
+        trainer = Trainer(**trainer_kwargs)
+        trainer.fit(model)
+        return torch.cat(model.seen_batches)
+
+    sequence0 = run_training(
+        default_root_dir=tmpdir,
+        max_steps=2,
+        num_sanity_val_steps=0,
+    )
+    sequence1 = run_training(
+        default_root_dir=tmpdir,
+        max_steps=2,
+        num_sanity_val_steps=2,
+    )
+    assert torch.allclose(sequence0, sequence1)
@@ -2,6 +2,7 @@
 from unittest import mock
 
 import pytest
+import torch
 
 import pytorch_lightning.utilities.seed as seed_utils
 
@@ -53,3 +54,23 @@ def test_out_of_bounds_seed(seed):
     with pytest.warns(UserWarning, match="is not in bounds"):
         actual = seed_utils.seed_everything(seed)
     assert actual == 123
+
+
+def test_reset_seed_no_op():
+    """ Test that the reset_seed function is a no-op when seed_everything() was not used. """
+    assert "PL_GLOBAL_SEED" not in os.environ
+    seed_before = torch.initial_seed()
+    seed_utils.reset_seed()
+    assert torch.initial_seed() == seed_before
+    assert "PL_GLOBAL_SEED" not in os.environ
+
+
+def test_reset_seed_everything():
+    """ Test that we can reset the seed to the initial value set by seed_everything() """
+    assert "PL_GLOBAL_SEED" not in os.environ
+    seed_utils.seed_everything(123)
+    assert os.environ["PL_GLOBAL_SEED"] == "123"
+    before = torch.rand(1)
+    seed_utils.reset_seed()
+    after = torch.rand(1)
+    assert torch.allclose(before, after)