From 7d3242976e530289ef1496d7e5e69ecac90329b0 Mon Sep 17 00:00:00 2001
From: Somshubra Majumdar <titu1994@gmail.com>
Date: Tue, 27 Jun 2023 20:01:17 -0700
Subject: [PATCH] Add missing save restore connector to eval scripts

Signed-off-by: smajumdar <titu1994@gmail.com>
---
 examples/nlp/language_modeling/megatron_gpt_eval.py    |  8 +++++++-
 examples/nlp/language_modeling/megatron_t5_eval.py     | 10 +++++++++-
 .../language_modeling/tuning/megatron_gpt_ia3_eval.py  |  2 +-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py
index af1657b44d7b..d8bc03bba4f2 100644
--- a/examples/nlp/language_modeling/megatron_gpt_eval.py
+++ b/examples/nlp/language_modeling/megatron_gpt_eval.py
@@ -173,8 +173,14 @@ def main(cfg) -> None:
         or cfg.pipeline_model_parallel_size < 0
         or cfg.get('pipeline_model_parallel_split_rank', -1) < 0
     ):
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(cfg.gpt_model_file):
+            save_restore_connector.model_extracted_dir = cfg.gpt_model_file
         model_config = MegatronGPTModel.restore_from(
-            restore_path=cfg.gpt_model_file, trainer=trainer, return_config=True,
+            restore_path=cfg.gpt_model_file,
+            trainer=trainer,
+            return_config=True,
+            save_restore_connector=save_restore_connector,
         )
 
         with open_dict(cfg):
diff --git a/examples/nlp/language_modeling/megatron_t5_eval.py b/examples/nlp/language_modeling/megatron_t5_eval.py
index 0282f9fb2913..0b6ea54b6b99 100644
--- a/examples/nlp/language_modeling/megatron_t5_eval.py
+++ b/examples/nlp/language_modeling/megatron_t5_eval.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 
+import os
 from argparse import ArgumentParser
 
 import torch
@@ -61,8 +62,15 @@ def main():
         or args.pipeline_model_parallel_size < 0
         or args.pipeline_model_parallel_split_rank < 0
     ):
+        save_restore_connector = NLPSaveRestoreConnector()
+        if os.path.isdir(args.model_file):
+            save_restore_connector.model_extracted_dir = args.model_file
+
         model_config = MegatronT5Model.restore_from(
-            restore_path=args.model_file, trainer=Trainer(strategy=NLPDDPStrategy()), return_config=True,
+            restore_path=args.model_file,
+            trainer=Trainer(strategy=NLPDDPStrategy()),
+            return_config=True,
+            save_restore_connector=save_restore_connector,
         )
 
         args.tensor_model_parallel_size = model_config.get('tensor_model_parallel_size', 1)
diff --git a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
index a676fee00a7e..a30818f29fb3 100644
--- a/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
+++ b/examples/nlp/language_modeling/tuning/megatron_gpt_ia3_eval.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import os
 import torch
 import torch.multiprocessing as mp
 from megatron.core import parallel_state