From 6c6ec1b481b6f4025e4ff830c3e7bfbb6963eccf Mon Sep 17 00:00:00 2001 From: Douglas Jacobsen Date: Thu, 31 Oct 2024 18:12:11 -0600 Subject: [PATCH 1/3] Add executable to ensure transformer cache is created This commit adds an executable to the py-nemo application to ensure the transformer cache is created properly before the actual nemo experiment is executed. --- .../builtin/applications/py-nemo/application.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/var/ramble/repos/builtin/applications/py-nemo/application.py b/var/ramble/repos/builtin/applications/py-nemo/application.py index b7cbca30e..9af3c672a 100644 --- a/var/ramble/repos/builtin/applications/py-nemo/application.py +++ b/var/ramble/repos/builtin/applications/py-nemo/application.py @@ -27,6 +27,12 @@ class PyNemo(ExecutableApplication): tags("ml-framework", "machine-learning") + executable( + "setup_transformer_cache", + 'bash -c "python3 -c \'from transformers import AutoTokenizer; AutoTokenizer.from_pretrained(\\"gpt2\\")\'"', + use_mpi=True, + ) + executable( "pretraining_exec", 'bash -c "cd /opt/NeMo; git rev-parse HEAD; export PYTHONPATH=/opt/NeMo:\${PYTHONPATH}; ' @@ -50,7 +56,11 @@ class PyNemo(ExecutableApplication): workload( "pretraining", - executables=["create_logs", "pretraining_exec"], + executables=[ + "create_logs", + "setup_transformer_cache", + "pretraining_exec", + ], inputs=["nemo_fetched_config"], ) From 8f7a50d942c0968b34d32cdcfc347f2ce8429bdc Mon Sep 17 00:00:00 2001 From: Douglas Jacobsen Date: Fri, 1 Nov 2024 15:45:24 -0600 Subject: [PATCH 2/3] Avoid trying to open a non-existent log file --- .../applications/py-nemo/application.py | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/var/ramble/repos/builtin/applications/py-nemo/application.py b/var/ramble/repos/builtin/applications/py-nemo/application.py index 9af3c672a..54c448bf6 100644 --- a/var/ramble/repos/builtin/applications/py-nemo/application.py +++ b/var/ramble/repos/builtin/applications/py-nemo/application.py @@ -1371,38 +1371,39 @@ def _preprocess_log(self, workspace, app_inst): final_regex = re.compile(self.final_epoch_regex) - with open(log_file, "r", encoding="ISO-8859-1") as f: - data = f.read() + if os.path.exists(log_file): + with open(log_file, "r", encoding="ISO-8859-1") as f: + data = f.read() - with open(log_file, "r", encoding="ISO-8859-1") as f: - for line in f.readlines(): - m = final_regex.match(line) + with open(log_file, "r", encoding="ISO-8859-1") as f: + for line in f.readlines(): + m = final_regex.match(line) - if m: - timestamp = m.group("elapsed_time") + if m: + timestamp = m.group("elapsed_time") - time_parts = timestamp.split(":") + time_parts = timestamp.split(":") - part_s = 0 - mult = 1 - for part in reversed(time_parts): - part_s += int(part) * mult - mult = mult * 60 - elapsed_s += part_s + part_s = 0 + mult = 1 + for part in reversed(time_parts): + part_s += int(part) * mult + mult = mult * 60 + elapsed_s += part_s - processed_log = self.expander.expand_var( - "{experiment_run_dir}/processed_{experiment_name}.out" - ) - - with open(processed_log, "w+") as f: - f.write( - data.replace("\x13", "\n") - .replace("\x96\x88", "") - .replace("â", "") + processed_log = self.expander.expand_var( + "{experiment_run_dir}/processed_{experiment_name}.out" ) - sec_file_path = self.expander.expand_var( - "{experiment_run_dir}/elapsed_seconds" - ) - with open(sec_file_path, "w+") as f: - f.write(f"Elapsed seconds: {elapsed_s}") + with open(processed_log, "w+") as f: + f.write( + data.replace("\x13", "\n") + .replace("\x96\x88", "") + .replace("â", "") + ) + + sec_file_path = self.expander.expand_var( + "{experiment_run_dir}/elapsed_seconds" + ) + with open(sec_file_path, "w+") as f: + f.write(f"Elapsed seconds: {elapsed_s}") From 376b7b8e52325de1e574f8c5fb2f3572092abdf8 Mon Sep 17 00:00:00 2001 From: Douglas Jacobsen Date: Fri, 1 Nov 2024 15:46:40 -0600 Subject: [PATCH 3/3] Remove unneeded commands in executable --- var/ramble/repos/builtin/applications/py-nemo/application.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/var/ramble/repos/builtin/applications/py-nemo/application.py b/var/ramble/repos/builtin/applications/py-nemo/application.py index 54c448bf6..3b846caba 100644 --- a/var/ramble/repos/builtin/applications/py-nemo/application.py +++ b/var/ramble/repos/builtin/applications/py-nemo/application.py @@ -35,8 +35,7 @@ class PyNemo(ExecutableApplication): executable( "pretraining_exec", - 'bash -c "cd /opt/NeMo; git rev-parse HEAD; export PYTHONPATH=/opt/NeMo:\${PYTHONPATH}; ' - "CUDA_VISIBLE_DEVICES={cuda_visible_devices} " + 'bash -c "cd /opt/NeMo; git rev-parse HEAD; ' "python3 -u /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_pretraining.py " '--config-path={nemo_generated_config_path} --config-name={nemo_generated_config_name}"', use_mpi=True,