Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed num_sanity_val_steps affecting reproducibility of training data shuffling #7014

Merged
merged 26 commits into from
Apr 27, 2021
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
- Fixed parsing for pre-release package versions ([#6999](https://github.com/PyTorchLightning/pytorch-lightning/pull/6999))


- Fixed `num_sanity_val_steps` affecting reproducibility of training data shuffling ([#7014](https://github.com/PyTorchLightning/pytorch-lightning/pull/7014))



## [1.2.7] - 2021-04-06

### Fixed
Expand Down
7 changes: 2 additions & 5 deletions pytorch_lightning/plugins/training_type/ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
)
from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp, sync_ddp_if_available
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning.utilities.seed import reset_seed

if _HYDRA_AVAILABLE:
from hydra.core.hydra_config import HydraConfig
Expand Down Expand Up @@ -180,10 +180,7 @@ def _call_children_scripts(self):
sleep(delay)

def setup_distributed(self):
# TODO: check if needed
seed = os.environ.get("PL_GLOBAL_SEED")
if seed is not None:
seed_everything(int(seed))
reset_seed()

# determine which process we are and world size
self.set_world_ranks()
Expand Down
7 changes: 2 additions & 5 deletions pytorch_lightning/plugins/training_type/ddp_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from pytorch_lightning.utilities.cloud_io import atomic_save
from pytorch_lightning.utilities.cloud_io import load as pl_load
from pytorch_lightning.utilities.distributed import rank_zero_only, rank_zero_warn, ReduceOp, sync_ddp_if_available
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning.utilities.seed import reset_seed

if _TORCH_GREATER_EQUAL_1_8:
from pytorch_lightning.utilities.distributed import register_ddp_comm_hook
Expand Down Expand Up @@ -132,10 +132,7 @@ def start_predicting(self, trainer):
def new_process(self, process_idx, trainer, mp_queue):
self.mp_queue = mp_queue

# TODO: check if needed
seed = os.environ.get("PL_GLOBAL_SEED")
if seed is not None:
seed_everything(int(seed))
reset_seed()

self.set_world_ranks(process_idx)

Expand Down
6 changes: 2 additions & 4 deletions pytorch_lightning/plugins/training_type/tpu_spawn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from pytorch_lightning.utilities.data import has_len
from pytorch_lightning.utilities.distributed import rank_zero_only, ReduceOp
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.seed import seed_everything
from pytorch_lightning.utilities.seed import reset_seed

if _TPU_AVAILABLE:
import torch_xla.core.xla_model as xm
Expand Down Expand Up @@ -137,9 +137,7 @@ def set_world_ranks(self, process_idx: int = 0) -> None:
def new_process(self, process_idx: int, trainer, mp_queue) -> None:
self.mp_queue = mp_queue

seed = os.environ.get("PL_GLOBAL_SEED")
if seed is not None:
seed_everything(int(seed))
reset_seed()

self.tpu_local_core_rank = xm.get_local_ordinal()
self.tpu_global_core_rank = xm.get_ordinal()
Expand Down
5 changes: 5 additions & 0 deletions pytorch_lightning/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.memory import recursive_detach
from pytorch_lightning.utilities.model_helpers import is_overridden
from pytorch_lightning.utilities.seed import reset_seed

log = logging.getLogger(__name__)
# warnings to ignore in trainer
Expand Down Expand Up @@ -864,6 +865,10 @@ def run_sanity_check(self, ref_model):

self._running_stage = stage

# reset the seed to what it was before sanity check
# prevents sanity check to affect random sampling in training
reset_seed()

def validate(
self,
model: Optional[LightningModule] = None,
Expand Down
10 changes: 10 additions & 0 deletions pytorch_lightning/utilities/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ def _select_seed_randomly(min_seed_value: int = 0, max_seed_value: int = 255) ->
return random.randint(min_seed_value, max_seed_value)


def reset_seed() -> None:
"""
Reset the seed to the value that :func:`pytorch_lightning.utilities.seed.seed_everything` previously set.
If :func:`pytorch_lightning.utilities.seed.seed_everything` is unused, this function will do nothing.
"""
seed = os.environ.get("PL_GLOBAL_SEED", None)
if seed is not None:
seed_everything(int(seed))


def pl_worker_init_function(worker_id: int, rank: Optional = None) -> None: # pragma: no cover
"""
The worker_init_fn that Lightning automatically adds to your dataloader if you previously set
Expand Down
12 changes: 6 additions & 6 deletions tests/callbacks/test_pruning.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,14 @@ def test_multiple_pruning_callbacks(tmpdir, caplog, make_pruning_permanent: bool
actual = [m for m in actual if m.startswith("Applied")]
assert actual == [
"Applied `L1Unstructured`. Pruned: 0/1122 (0.00%) -> 544/1122 (48.48%)",
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 500 (48.83%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 44 (68.75%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 503 (49.12%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 0 (0.00%) -> 41 (64.06%)", # noqa: E501
"Applied `RandomUnstructured`. Pruned: 544/1122 (48.48%) -> 680/1122 (60.61%)",
"Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 500 (48.83%) -> 635 (62.01%)", # noqa: E501
"Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 44 (68.75%) -> 45 (70.31%)", # noqa: E501
"Applied `RandomUnstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.25. Pruned: 503 (49.12%) -> 629 (61.43%)", # noqa: E501
"Applied `RandomUnstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.25. Pruned: 41 (64.06%) -> 51 (79.69%)", # noqa: E501
"Applied `L1Unstructured`. Pruned: 680/1122 (60.61%) -> 884/1122 (78.79%)",
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 635 (62.01%) -> 830 (81.05%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 45 (70.31%) -> 54 (84.38%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=32, bias=True).weight` with amount=0.5. Pruned: 629 (61.43%) -> 827 (80.76%)", # noqa: E501
"Applied `L1Unstructured` to `Linear(in_features=32, out_features=2, bias=True).weight` with amount=0.5. Pruned: 51 (79.69%) -> 57 (89.06%)", # noqa: E501
]

filepath = str(tmpdir / "foo.ckpt")
Expand Down
2 changes: 1 addition & 1 deletion tests/checkpointing/test_trainer_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def test_finetuning_with_resume_from_checkpoint(tmpdir):
This test validates that generated ModelCheckpoint is pointing to the right best_model_path during test
"""

seed_everything(3)
seed_everything(4)

checkpoint_callback = ModelCheckpoint(monitor='val_loss', dirpath=tmpdir, filename="{epoch:02d}", save_top_k=-1)

Expand Down
36 changes: 35 additions & 1 deletion tests/trainer/test_training_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch

from pytorch_lightning import Trainer
from pytorch_lightning import seed_everything, Trainer
from tests.helpers import BoringModel


Expand Down Expand Up @@ -174,3 +175,36 @@ def on_train_epoch_end(self, outputs):

result = trainer.fit(model)
assert result == 1, "Training did not complete"


def test_training_starts_with_seed(tmpdir):
""" Test that the training always starts with the same random state (when using seed_everything). """

class SeededModel(BoringModel):

def __init__(self):
super().__init__()
self.seen_batches = []

def training_step(self, batch, batch_idx):
self.seen_batches.append(batch.view(-1))
return super().training_step(batch, batch_idx)

def run_training(**trainer_kwargs):
model = SeededModel()
seed_everything(123)
trainer = Trainer(**trainer_kwargs)
trainer.fit(model)
return torch.cat(model.seen_batches)

sequence0 = run_training(
default_root_dir=tmpdir,
max_steps=2,
num_sanity_val_steps=0,
)
sequence1 = run_training(
default_root_dir=tmpdir,
max_steps=2,
num_sanity_val_steps=2,
)
assert torch.allclose(sequence0, sequence1)
21 changes: 21 additions & 0 deletions tests/utilities/test_seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from unittest import mock

import pytest
import torch

import pytorch_lightning.utilities.seed as seed_utils

Expand Down Expand Up @@ -53,3 +54,23 @@ def test_out_of_bounds_seed(seed):
with pytest.warns(UserWarning, match="is not in bounds"):
actual = seed_utils.seed_everything(seed)
assert actual == 123


def test_reset_seed_no_op():
""" Test that the reset_seed function is a no-op when seed_everything() was not used. """
assert "PL_GLOBAL_SEED" not in os.environ
seed_before = torch.initial_seed()
seed_utils.reset_seed()
assert torch.initial_seed() == seed_before
assert "PL_GLOBAL_SEED" not in os.environ


def test_reset_seed_everything():
""" Test that we can reset the seed to the initial value set by seed_everything() """
assert "PL_GLOBAL_SEED" not in os.environ
seed_utils.seed_everything(123)
assert os.environ["PL_GLOBAL_SEED"] == "123"
before = torch.rand(1)
seed_utils.reset_seed()
after = torch.rand(1)
assert torch.allclose(before, after)