From 36d580ee2415dd2d81fa7b41cb7cb5b7ca2023f4 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Fri, 30 Aug 2024 17:52:51 -0700 Subject: [PATCH] Fix llama3 pretraining NeMo 2.0 script (#10307) Signed-off-by: Hemil Desai Signed-off-by: adityavavre --- examples/llm/run/llama3_pretraining.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/llm/run/llama3_pretraining.py b/examples/llm/run/llama3_pretraining.py index 612b58e2169f..6cc001029a3b 100644 --- a/examples/llm/run/llama3_pretraining.py +++ b/examples/llm/run/llama3_pretraining.py @@ -140,7 +140,7 @@ def main(): # Uses configs from NeMo directly pretrain = MODEL_SIZE_MAPPING[args.size]["nemo"]["pretrain"]( name=exp_name, - ckpt_dir=f"/{exp_name}/checkpoints", + ckpt_dir="/nemo_run/checkpoints", ) # Overwrite the dataloader in the recipe to use your custom dataloader. @@ -170,8 +170,6 @@ def main(): executor = local_executor_torchrun(nodes=pretrain.trainer.num_nodes, devices=pretrain.trainer.devices) with run.Experiment(f"{exp_name}{args.tag}") as exp: - pretrain.log.dir = f"/{exp_name}/checkpoints" - for i in range(1): exp.add( pretrain,