Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues with PL 1.8 #5353

Merged
merged 29 commits into from
Nov 28, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
477baaa
Fix issues with PL 1.8
SeanNaren Nov 8, 2022
c6eb78c
Set scripting variable
SeanNaren Nov 8, 2022
4cc6301
Fix missing arg
SeanNaren Nov 8, 2022
25e0f77
Cleanup list
SeanNaren Nov 8, 2022
60db72b
Fix reference
SeanNaren Nov 9, 2022
0ed8d1e
Merge branch 'main' into feat/lightning_1.8_support
SeanNaren Nov 9, 2022
6ef98e7
Try to fix hanging EMA test
SeanNaren Nov 9, 2022
e25704c
Missing \
SeanNaren Nov 9, 2022
4da05eb
Add strategy
SeanNaren Nov 9, 2022
4e6913c
See if setting the chdir fixes the hanging DDP test
SeanNaren Nov 9, 2022
07d4292
See if removing the subdir setter fixes the issue
SeanNaren Nov 10, 2022
9c6307a
Merge branch 'main' into feat/lightning_1.8_support
SeanNaren Nov 10, 2022
15087e5
Remove checks
SeanNaren Nov 10, 2022
4a01cec
Try [0,1] for devices
SeanNaren Nov 10, 2022
6c3e417
Merge branch 'main' into feat/lightning_1.8_support
okuchaiev Nov 14, 2022
1aad730
Merge branch 'main' into feat/lightning_1.8_support
okuchaiev Nov 15, 2022
b1add40
Add code back
SeanNaren Nov 15, 2022
8c7d7e0
Remove space
SeanNaren Nov 21, 2022
dce4918
Merge branch 'main' into feat/lightning_1.8_support
SeanNaren Nov 23, 2022
9e8ab6b
Update requirements
SeanNaren Nov 23, 2022
b33501d
Swap import path
SeanNaren Nov 23, 2022
580654c
Update references
SeanNaren Nov 23, 2022
ed8df6f
Fix deprecated variables
SeanNaren Nov 23, 2022
12d7ea2
Fix missing var
SeanNaren Nov 23, 2022
0e8e59a
Fix var
SeanNaren Nov 23, 2022
8efe28f
Merge branch 'main' into feat/lightning_1.8_support
SeanNaren Nov 23, 2022
d672304
Revert changes
SeanNaren Nov 23, 2022
706b19c
Merge branch 'main' into feat/lightning_1.8_support
SeanNaren Nov 23, 2022
d614378
Address review
SeanNaren Nov 24, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,11 @@ pipeline {
stage('Speech to Text EMA') {
steps {
sh 'python examples/asr/asr_ctc/speech_to_text_ctc.py \
--config-path="../conf/" --config-name="config" \
SeanNaren marked this conversation as resolved.
Show resolved Hide resolved
model.train_ds.manifest_filepath=/home/TestData/an4_dataset/an4_train.json \
model.validation_ds.manifest_filepath=/home/TestData/an4_dataset/an4_val.json \
trainer.devices=2 \
trainer.strategy="ddp" \
trainer.accelerator="gpu" \
+trainer.fast_dev_run=True \
+exp_manager.ema.enable=True \
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/nlp/parts/nlp_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@

import pytorch_lightning as pl
import torch
from lightning_lite.plugins import ClusterEnvironment
from lightning_lite.utilities.types import _PATH
from omegaconf import OmegaConf
from pytorch_lightning.overrides import LightningDistributedModule
from pytorch_lightning.plugins.environments.cluster_environment import ClusterEnvironment
from pytorch_lightning.plugins.io.checkpoint_plugin import CheckpointIO
from pytorch_lightning.plugins.precision.native_amp import NativeMixedPrecisionPlugin
from pytorch_lightning.strategies.ddp import DDPStrategy
from pytorch_lightning.trainer.trainer import Trainer
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.fetching import DataFetcher
from pytorch_lightning.utilities.types import _PATH
from torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks import noop_hook
from torch.nn.parallel import DistributedDataParallel

Expand Down
11 changes: 5 additions & 6 deletions nemo/collections/tts/models/fastpitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf, open_dict
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
from pytorch_lightning.loggers import TensorBoardLogger

from nemo.collections.common.parts.preprocessing import parsers
from nemo.collections.tts.helpers.helpers import plot_alignment_to_numpy, plot_spectrogram_to_numpy, process_batch
Expand Down Expand Up @@ -228,11 +228,10 @@ def tb_logger(self):
if self.logger is None and self.logger.experiment is None:
return None
tb_logger = self.logger.experiment
if isinstance(self.logger, LoggerCollection):
for logger in self.logger:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
break
for logger in self.trainer.loggers:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
SeanNaren marked this conversation as resolved.
Show resolved Hide resolved
break
self._tb_logger = tb_logger
return self._tb_logger

Expand Down
11 changes: 5 additions & 6 deletions nemo/collections/tts/models/radtts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
from pytorch_lightning.loggers import TensorBoardLogger

from nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers import BaseTokenizer
from nemo.collections.tts.helpers.helpers import plot_alignment_to_numpy
Expand Down Expand Up @@ -389,11 +389,10 @@ def tb_logger(self):
if self.logger is None and self.logger.experiment is None:
return None
tb_logger = self.logger.experiment
if isinstance(self.logger, LoggerCollection):
for logger in self.logger:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
break
for logger in self.trainer.loggers:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
break
self._tb_logger = tb_logger
return self._tb_logger

Expand Down
11 changes: 5 additions & 6 deletions nemo/collections/tts/models/tacotron2.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from hydra.utils import instantiate
from omegaconf import MISSING, DictConfig, OmegaConf, open_dict
from omegaconf.errors import ConfigAttributeError
from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger, WandbLogger
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from torch import nn

from nemo.collections.common.parts.preprocessing import parsers
Expand Down Expand Up @@ -284,11 +284,10 @@ def validation_step(self, batch, batch_idx):
def validation_epoch_end(self, outputs):
if self.logger is not None and self.logger.experiment is not None:
logger = self.logger.experiment
if isinstance(self.logger, LoggerCollection):
for logger in self.logger:
if isinstance(logger, TensorBoardLogger):
logger = logger.experiment
break
for logger in self.trainer.loggers:
if isinstance(logger, TensorBoardLogger):
logger = logger.experiment
break
if isinstance(logger, TensorBoardLogger):
tacotron2_log_to_tb_func(
logger, outputs[0].values(), self.global_step, tag="val", log_images=True, add_audio=False,
Expand Down
11 changes: 5 additions & 6 deletions nemo/collections/tts/models/waveglow.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import torch
from hydra.utils import instantiate
from omegaconf import DictConfig, open_dict
from pytorch_lightning.loggers import LoggerCollection, TensorBoardLogger
from pytorch_lightning.loggers import TensorBoardLogger

from nemo.collections.tts.helpers.helpers import OperationMode, waveglow_log_to_tb_func
from nemo.collections.tts.losses.waveglowloss import WaveGlowLoss
Expand Down Expand Up @@ -124,11 +124,10 @@ def validation_step(self, batch, batch_idx):
def validation_epoch_end(self, outputs):
if self.logger is not None and self.logger.experiment is not None:
tb_logger = self.logger.experiment
if isinstance(self.logger, LoggerCollection):
for logger in self.logger:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
break
for logger in self.trainer.loggers:
if isinstance(logger, TensorBoardLogger):
tb_logger = logger.experiment
break
waveglow_log_to_tb_func(
tb_logger,
outputs[0].values(),
Expand Down
3 changes: 2 additions & 1 deletion nemo/core/classes/exportable.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from typing import List, Union

import torch
from pytorch_lightning.core.module import _jit_is_scripting
from torch.onnx import TrainingMode

from nemo.core.classes import typecheck
Expand Down Expand Up @@ -128,7 +129,7 @@ def _export(
# Set module mode
with torch.onnx.select_model_mode_for_export(
self, training
), torch.inference_mode(), torch.jit.optimized_execution(True):
), torch.inference_mode(), torch.jit.optimized_execution(True), _jit_is_scripting():

if input_example is None:
input_example = self.input_module.input_example()
Expand Down
3 changes: 3 additions & 0 deletions nemo/core/config/hydra_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def wrapper(cfg_passthrough: Optional[DictConfig] = None) -> Any:
# Set run.dir ONLY for ExpManager "compatibility" - to be removed.
overrides.append("hydra.run.dir=.")

# Set working directory to the job's output directory
overrides.append("hydra.job.chdir=True")
SeanNaren marked this conversation as resolved.
Show resolved Hide resolved

# Check if user set the schema.
if schema is not None:
# Create config store.
Expand Down
1 change: 1 addition & 0 deletions nemo/core/config/pytorch_lightning.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ class TrainerConfig:
strategy: Any = None
enable_checkpointing: bool = False
enable_model_summary: bool = True
inference_mode: bool = True


# Register the trainer config.
Expand Down
22 changes: 0 additions & 22 deletions nemo/utils/exp_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from omegaconf import DictConfig, OmegaConf, open_dict
from pytorch_lightning.callbacks import Callback, ModelCheckpoint
from pytorch_lightning.callbacks.timer import Interval, Timer
from pytorch_lightning.loggers import LoggerCollection as _LoggerCollection
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger
from pytorch_lightning.loops import TrainingEpochLoop
from pytorch_lightning.strategies.ddp import DDPStrategy
Expand Down Expand Up @@ -658,24 +657,6 @@ def get_git_diff():
return "{}\n".format(err.output.decode("utf-8"))


class LoggerList(_LoggerCollection):
titu1994 marked this conversation as resolved.
Show resolved Hide resolved
""" A thin wrapper on Lightning's LoggerCollection such that name and version are better aligned with exp_manager
"""

def __init__(self, _logger_iterable, nemo_name=None, nemo_version=""):
super().__init__(_logger_iterable)
self._nemo_name = nemo_name
self._nemo_version = nemo_version

@property
def name(self) -> str:
return self._nemo_name

@property
def version(self) -> str:
return self._nemo_version


def configure_loggers(
trainer: 'pytorch_lightning.Trainer',
exp_dir: [Path, str],
Expand Down Expand Up @@ -718,9 +699,6 @@ def configure_loggers(
logger_list.append(wandb_logger)
logging.info("WandBLogger has been set up")

logger_list = (
LoggerList(logger_list, nemo_name=name, nemo_version=version) if len(logger_list) > 1 else logger_list[0]
)
trainer._logger_connector.configure_logger(logger_list)


Expand Down
2 changes: 1 addition & 1 deletion requirements/requirements_lightning.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
hydra-core>=1.2.0,<1.3
omegaconf>=2.2,<2.3
pytorch-lightning>=1.7.0,<1.8
pytorch-lightning>=1.8.0
pyyaml<6 # Pinned until omegaconf works with pyyaml>=6
torchmetrics>=0.4.1rc0
transformers>=4.0.1,<=4.21.2
Expand Down
3 changes: 0 additions & 3 deletions tests/core/test_config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,8 @@ class DummyDataClass:
@pytest.mark.unit
def test_ptl_config(self):
PTL_DEPRECATED = [
'distributed_backend',
'automatic_optimization',
'gpus',
'num_processes',
'weights_save_path',
]

result = config_utils.assert_dataclass_signature_match(ptl.Trainer, TrainerConfig, ignore_args=PTL_DEPRECATED)
Expand Down
9 changes: 5 additions & 4 deletions tests/core/test_optimizers_schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import pytorch_lightning as pl
import torch
import torch.optim
from pytorch_lightning.utilities import rank_zero_only

from nemo.core import config, optim
from nemo.core.optim.lr_scheduler import AVAILABLE_SCHEDULERS
Expand Down Expand Up @@ -85,7 +86,7 @@ def configure_optimizers(self):


class Callback(pl.callbacks.Callback):
@pl.utilities.distributed.rank_zero_only
@rank_zero_only
def on_train_end(self, trainer, module):
count = module.my_opt.param_groups[0]['count']
if trainer.global_step != count or trainer.global_step != module.max_steps:
Expand All @@ -110,13 +111,13 @@ class SchedulerNoOpCallback(Callback):
def on_train_batch_end(self, trainer: pl.Trainer, pl_module, outputs, batch, batch_idx):
# pl_module.max_steps is "original" max steps without trainer extra steps.
if (trainer.global_step + 1) % 3 == 0 and (trainer.global_step + 1) < pl_module.max_steps:
schedulers = trainer.lr_schedulers
schedulers = trainer.lr_scheduler_configs

for scheduler in schedulers:
# Decrement the counter by 2, then perform a scheduler.step() to perform a no-up
# as well as update the optimizer lr in all param groups
scheduler['scheduler'].last_epoch -= 2
scheduler['scheduler'].step()
scheduler.scheduler.last_epoch -= 2
scheduler.scheduler.step()

# Increase the max step count by 1
trainer.fit_loop.max_steps = trainer.fit_loop.max_steps + 1
Expand Down
2 changes: 1 addition & 1 deletion tests/core_ptl/check_for_ranks.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import torch
from omegaconf import OmegaConf
from pytorch_lightning import Trainer
from pytorch_lightning.utilities.distributed import rank_zero_only
from pytorch_lightning.utilities import rank_zero_only

from nemo.core import ModelPT
from nemo.utils import logging
Expand Down
1 change: 0 additions & 1 deletion tests/core_ptl/test_ptl_stateless_timer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import torch
from omegaconf import OmegaConf
from pytorch_lightning import Trainer
from pytorch_lightning.utilities.distributed import rank_zero_only

from nemo.core import ModelPT
from nemo.utils import logging
Expand Down