Skip to content

Commit

Permalink
Akoumparouli/nemo ux update param name (NVIDIA#10441)
Browse files Browse the repository at this point in the history
* NeMoLogger: update dir to log_dir

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

* NeMologger: update calls

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>

---------

Signed-off-by: Alexandros Koumparoulis <akoumparouli@nvidia.com>
Co-authored-by: Marc Romeyn <mromeijn@nvidia.com>
Signed-off-by: Lifu Zhang <tomzhanglf@gmail.com>
  • Loading branch information
2 people authored and tomlifu committed Oct 25, 2024
1 parent 9161d05 commit b60ce45
Show file tree
Hide file tree
Showing 9 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion examples/llm/megatron_gpt_pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_args():
)

nemo_logger = NeMoLogger(
dir=args.experiment_dir,
log_dir=args.experiment_dir,
)

train(
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/llm/recipes/log/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def default_log(
name=name,
tensorboard=tensorboard_logger,
wandb=wandb_logger,
dir=dir,
log_dir=dir,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def get_logger(self) -> Config[nl.NeMoLogger]:
ckpt=ckpt,
tensorboard=tb_logger,
wandb=None,
dir=self.config.path_to_logs,
log_dir=self.config.path_to_logs,
)

def get_run_config(self) -> dict:
Expand Down
12 changes: 6 additions & 6 deletions nemo/lightning/nemo_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class NeMoLogger(IOMixin):
Args:
name (str): Name of the experiment.
dir (Optional[str]): Directory to save logs.
log_dir (Optional[str]): Directory to save logs.
explicit_log_dir (Optional[str]): Explicit log directory.
version (Optional[str]): Version of the experiment.
use_datetime_version (bool): Whether to use datetime as version.
Expand All @@ -56,7 +56,7 @@ class NeMoLogger(IOMixin):
"""

name: str = "default"
dir: Optional[str] = None
log_dir: Optional[str] = None
explicit_log_dir: Optional[str] = None
version: Optional[str] = None
use_datetime_version: bool = True
Expand Down Expand Up @@ -99,9 +99,9 @@ def setup(self, trainer: Union[pl.Trainer, fl.Fabric], resume_if_exists: bool =
f"that was passed to nemo_logger container a logger, but update_logger_directory is False. This means "
f"that the trainer's logger directory may not match with the explicit_log_dir."
)
if self.dir or self.version:
if self.log_dir or self.version:
logging.error(
f"nemo logger received explicit_log_dir: {self.explicit_log_dir} and at least one of dir: {self.dir}, "
f"nemo logger received explicit_log_dir: {self.explicit_log_dir} and at least one of dir: {self.log_dir}, "
f"or version: {self.version}. Please note that dir, name, and version will be ignored."
)
if is_global_rank_zero() and Path(self.explicit_log_dir).exists():
Expand All @@ -110,8 +110,8 @@ def setup(self, trainer: Union[pl.Trainer, fl.Fabric], resume_if_exists: bool =

else:
# Default dir to ./nemo_experiments if None was passed
_dir = self.dir
if self.dir is None:
_dir = self.log_dir
if self.log_dir is None:
_dir = str(Path.cwd() / "nemo_experiments")

if not self.name:
Expand Down
2 changes: 1 addition & 1 deletion tests/collections/llm/auto_conf/test_base_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def get_logger() -> run.Config[nl.NeMoLogger]:
ckpt=ckpt,
tensorboard=tb_logger,
wandb=None,
dir="/",
log_dir="/",
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_args():
)

nemo_logger = NeMoLogger(
dir=args.experiment_dir,
log_dir=args.experiment_dir,
)

train(
Expand Down
2 changes: 1 addition & 1 deletion tests/collections/llm/test_mnist_model_nemo2.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ def run_train_mnist_litautoencoder_with_megatron_strategy_single_gpu():
tb_logger = TensorBoardLogger(save_dir=str(save_dir), name=name)
# Setup the logger and train the model
nemo_logger = NeMoLogger(
dir=str(root_dir), # WARNING: passing a path in here results in mutating the Path class.
log_dir=str(root_dir), # WARNING: passing a path in here results in mutating the Path class.
name=name,
tensorboard=tb_logger,
ckpt=checkpoint_callback,
Expand Down
2 changes: 1 addition & 1 deletion tests/collections/llm/test_mnist_model_nemo2_fsdp.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def run_train_mnist_litautoencoder_with_fsdp_strategy_single_gpu():
tb_logger = TensorBoardLogger(save_dir=str(save_dir), name=name)
# Setup the logger and train the model
nemo_logger = NeMoLogger(
dir=str(root_dir), # WARNING: passing a path in here results in mutating the Path class.
log_dir=str(root_dir), # WARNING: passing a path in here results in mutating the Path class.
name=name,
tensorboard=tb_logger,
ckpt=checkpoint_callback,
Expand Down
2 changes: 1 addition & 1 deletion tests/lightning/test_nemo_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_resume(self, trainer, tmp_path):

logger = nl.NeMoLogger(
name="default",
dir=str(tmp_path) + "/test_resume",
log_dir=str(tmp_path) + "/test_resume",
version="version_0",
use_datetime_version=False,
)
Expand Down

0 comments on commit b60ce45

Please sign in to comment.