diff --git a/var/ramble/repos/builtin/applications/py-nemo/application.py b/var/ramble/repos/builtin/applications/py-nemo/application.py index b7cbca30e..3b846caba 100644 --- a/var/ramble/repos/builtin/applications/py-nemo/application.py +++ b/var/ramble/repos/builtin/applications/py-nemo/application.py @@ -27,10 +27,15 @@ class PyNemo(ExecutableApplication): tags("ml-framework", "machine-learning") + executable( + "setup_transformer_cache", + 'bash -c "python3 -c \'from transformers import AutoTokenizer; AutoTokenizer.from_pretrained(\\"gpt2\\")\'"', + use_mpi=True, + ) + executable( "pretraining_exec", - 'bash -c "cd /opt/NeMo; git rev-parse HEAD; export PYTHONPATH=/opt/NeMo:\${PYTHONPATH}; ' - "CUDA_VISIBLE_DEVICES={cuda_visible_devices} " + 'bash -c "cd /opt/NeMo; git rev-parse HEAD; ' "python3 -u /opt/NeMo/examples/nlp/language_modeling/megatron_gpt_pretraining.py " '--config-path={nemo_generated_config_path} --config-name={nemo_generated_config_name}"', use_mpi=True, @@ -50,7 +55,11 @@ class PyNemo(ExecutableApplication): workload( "pretraining", - executables=["create_logs", "pretraining_exec"], + executables=[ + "create_logs", + "setup_transformer_cache", + "pretraining_exec", + ], inputs=["nemo_fetched_config"], ) @@ -1361,38 +1370,39 @@ def _preprocess_log(self, workspace, app_inst): final_regex = re.compile(self.final_epoch_regex) - with open(log_file, "r", encoding="ISO-8859-1") as f: - data = f.read() - - with open(log_file, "r", encoding="ISO-8859-1") as f: - for line in f.readlines(): - m = final_regex.match(line) + if os.path.exists(log_file): + with open(log_file, "r", encoding="ISO-8859-1") as f: + data = f.read() - if m: - timestamp = m.group("elapsed_time") + with open(log_file, "r", encoding="ISO-8859-1") as f: + for line in f.readlines(): + m = final_regex.match(line) - time_parts = timestamp.split(":") + if m: + timestamp = m.group("elapsed_time") - part_s = 0 - mult = 1 - for part in reversed(time_parts): - part_s += int(part) * mult - mult = mult * 60 - elapsed_s += part_s + time_parts = timestamp.split(":") - processed_log = self.expander.expand_var( - "{experiment_run_dir}/processed_{experiment_name}.out" - ) + part_s = 0 + mult = 1 + for part in reversed(time_parts): + part_s += int(part) * mult + mult = mult * 60 + elapsed_s += part_s - with open(processed_log, "w+") as f: - f.write( - data.replace("\x13", "\n") - .replace("\x96\x88", "") - .replace("â", "") + processed_log = self.expander.expand_var( + "{experiment_run_dir}/processed_{experiment_name}.out" ) - sec_file_path = self.expander.expand_var( - "{experiment_run_dir}/elapsed_seconds" - ) - with open(sec_file_path, "w+") as f: - f.write(f"Elapsed seconds: {elapsed_s}") + with open(processed_log, "w+") as f: + f.write( + data.replace("\x13", "\n") + .replace("\x96\x88", "") + .replace("â", "") + ) + + sec_file_path = self.expander.expand_var( + "{experiment_run_dir}/elapsed_seconds" + ) + with open(sec_file_path, "w+") as f: + f.write(f"Elapsed seconds: {elapsed_s}")