merge changes from main

Signed-off-by: Anh Uong <anh.uong@ibm.com>
foundation-model-stack · Sep 25, 2024 · d63abd6 · d63abd6
2 parents 5df031a + 7714dfc
commit d63abd6
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -463,6 +463,7 @@ post_process_vLLM_adapters_new_tokens(
 )
 # where num_added_tokens is returned by sft_trainer.train()
 ```
+</details>
 
 _________________________
 

diff --git a/build/utils.py b/build/utils.py
@@ -24,12 +24,16 @@
 import shutil
 
 
-def copy_checkpoint(source, destination):
+def copy_checkpoint(source, destination, exclude_files: list[str] = None):
     if not os.path.exists(destination):
         os.makedirs(destination)
         shutil.copystat(source, destination)
     # Have a list of directory objects, now iterate over them.
+    if exclude_files is None:
+        exclude_files = []
     for item in os.listdir(source):
+        if item in exclude_files:
+            continue
         source_file = os.path.join(source, item)
         destination_file = os.path.join(destination, item)
         if os.path.isdir(source_file):

diff --git a/pyproject.toml b/pyproject.toml
@@ -28,7 +28,7 @@ classifiers=[
 dependencies = [
 "numpy>=1.26.4,<2.0",
 "accelerate>=0.20.3,<0.34",
-"transformers>4.41,<5.0",
+"transformers>4.41,<4.45",
 "torch>=2.2.0,<3.0",
 "sentencepiece>=0.1.99,<0.3",
 "tokenizers>=0.13.3,<1.0",

diff --git a/scripts/post_process_adapters_vLLM.py b/scripts/post_process_adapters_vLLM.py
@@ -11,7 +11,9 @@
 # Standard
 import argparse
 import json
+import logging
 import os
+import sys
 
 # Local
 from tuning.utils.merge_model_utils import (
@@ -27,9 +29,9 @@ def main():
     )
     parser.add_argument(
         "--model_path",
-        help="Path to tuned model containing either one or multiple checkpoints \
-                              Path should have file added_tokens_info.json produced by tuning \
-                              Hint: This will be either output_dir or save_model_dir arguments while tuning \
+        help="Path to tuned model containing either one or multiple checkpoints. \
+                              Path should have file added_tokens_info.json produced by tuning. \
+                              Hint: This will be either output_dir or save_model_dir arguments while tuning. \
                               If multiple checkpoints are present, each checkpoint folder name \
                               should begin with 'checkpoint-'",
         required=True,
@@ -44,16 +46,26 @@ def main():
 
     if args.output_model_path is None:
         output_model_path = args.model_path
+    else:
+        output_model_path = args.output_model_path
     if os.path.exists(os.path.join(args.model_path, "added_tokens_info.json")):
         with open(
             os.path.join(args.model_path, "added_tokens_info.json"), encoding="utf-8"
         ) as json_data:
             added_tokens_info = json.load(json_data)
             num_added_tokens = added_tokens_info["num_new_tokens"]
     else:
-        print("file added_tokens_info.json not in model_path. Cannot post-processes")
+        raise ValueError(
+            "file added_tokens_info.json not in model_path. \
+                        Cannot post-processes"
+        )
+    if num_added_tokens == 0:
+        logging.info("No new tokens added, hence post-processing not needed")
+        sys.exit(0)
 
+    found_adapters = 0
     if os.path.exists(os.path.join(args.model_path, "adapter_model.safetensors")):
+        found_adapters = 1
         post_process_vLLM_adapters_new_tokens(
             args.model_path, output_model_path, num_added_tokens
         )
@@ -68,12 +80,14 @@ def main():
                     num_added_tokens,
                 )
                 found_checkpoints = 1
-        if found_checkpoints and output_model_path != args.model_path:
-            copy_files_to_directory(
-                args.model_path,
-                output_model_path,
-                exclude_files=["adapter_model.safetensors"],
-            )
+    if found_checkpoints and output_model_path != args.model_path:
+        copy_files_to_directory(
+            args.model_path,
+            output_model_path,
+            exclude_files=["adapter_model.safetensors"],
+        )
+    if not found_adapters and not found_checkpoints:
+        logging.warning("No adapters were found to process in model path provided")
 
 
 if __name__ == "__main__":

diff --git a/tests/utils/test_merge_model_utils.py b/tests/utils/test_merge_model_utils.py
@@ -17,12 +17,12 @@
 
 # Standard
 import os
+import shutil
 import tempfile
 
 # Third Party
 from safetensors import safe_open
 import pytest
-import torch
 
 # Local
 from tuning.utils.merge_model_utils import post_process_vLLM_adapters_new_tokens
@@ -33,11 +33,7 @@
 )
 
 
-@pytest.mark.skipif(
-    not (torch.cuda.is_available()),
-    reason="Only runs if cuda is supported",
-)
-def test_post_process_vLLM_adapters_new_tokens():
+def test_post_process_vllm_adapters_new_tokens():
     """Ensure that in post-process, we output the correct format supported by vLLM for added_tokens
     - we should output a new_embeddings.safetensors
     - we should not have lm_head.weight in adapter_model.safetensors
@@ -73,3 +69,64 @@ def test_post_process_vLLM_adapters_new_tokens():
                 if "lm_head.weight" in k:
                     found_lm_head = True
         assert not found_lm_head
+
+
+def test_post_process_vllm_adapters_no_new_tokens():
+    """Ensure that an error is returned if no added tokens while tuning, \
+          but embeddings resized.
+    """
+    # first, double check dummy tuned llama has a lm_head.weight
+    found_lm_head = False
+    with safe_open(
+        os.path.join(DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, "adapter_model.safetensors"),
+        framework="pt",
+    ) as f:
+        for k in f.keys():
+            if "lm_head.weight" in k:
+                found_lm_head = True
+    assert found_lm_head
+    # do the post processing
+    with pytest.raises(NotImplementedError):
+        post_process_vLLM_adapters_new_tokens(
+            DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, None, num_added_tokens=0
+        )
+
+
+def test_post_process_in_place_vllm_adapters_new_tokens():
+    """Ensure that in post-process, we output the correct format supported by vLLM for added_tokens
+    - if output dir is not specified, it should modify files in place
+    - we should output a new_embeddings.safetensors
+    - we should not have lm_head.weight in adapter_model.safetensors
+    """
+    # first, double check dummy tuned llama has a lm_head.weight
+    found_lm_head = False
+    with safe_open(
+        os.path.join(DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, "adapter_model.safetensors"),
+        framework="pt",
+    ) as f:
+        for k in f.keys():
+            if "lm_head.weight" in k:
+                found_lm_head = True
+    assert found_lm_head
+
+    # do the post processing
+    with tempfile.TemporaryDirectory() as tempdir:
+        shutil.copytree(
+            DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, tempdir, dirs_exist_ok=True
+        )
+        post_process_vLLM_adapters_new_tokens(tempdir, None, num_added_tokens=1)
+
+        # check that new_embeddings.safetensors exist
+        new_embeddings = os.path.join(tempdir, "new_embeddings.safetensors")
+        assert os.path.exists(new_embeddings)
+
+        # check that lm_head.weight NOT in the new outputted adapter_model.safetensors
+        adapter_model = os.path.join(tempdir, "adapter_model.safetensors")
+        assert os.path.exists(adapter_model)
+
+        found_lm_head = False
+        with safe_open(adapter_model, framework="pt") as f:
+            for k in f.keys():
+                if "lm_head.weight" in k:
+                    found_lm_head = True
+        assert not found_lm_head
diff --git a/tests/utils/test_tokenizer_data_utils.py b/tests/utils/test_tokenizer_data_utils.py
@@ -0,0 +1,20 @@
+# Third party
+# Third Party
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+# First Party
+from tests.data import MODEL_NAME
+
+# Local
+# First party
+from tuning.data.tokenizer_data_utils import tokenizer_and_embedding_resize
+
+
+def test_tokenizer_and_embedding_resize_return_values():
+    """Test to ensure number of added tokens are returned correctly"""
+    special_tokens_dict = {"pad_token": "<pad>"}
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+    metadata = tokenizer_and_embedding_resize(special_tokens_dict, tokenizer, model)
+    assert metadata["num_new_tokens"] == 1
+    assert "new_embedding_size" in metadata
diff --git a/tuning/sft_trainer.py b/tuning/sft_trainer.py
@@ -90,7 +90,7 @@ def train(
     attention_and_distributed_packing_config: Optional[
         AttentionAndDistributedPackingConfig
     ] = None,
-):
+) -> tuple[SFTTrainer, dict]:
     """Call the SFTTrainer
 
     Args:
@@ -118,6 +118,10 @@ def train(
             Should be used in combination with quantized_lora_config. Also currently 
             fused_lora and fast_kernels must used together (may change in future). \
         attention_and_distributed_packing_config: Used for padding-free attention and multipack.
+
+    Returns:
+        Tuple: Instance of SFTTrainer , some metadata in a dict
+            Metadata contains information on number of added tokens while tuning.
     """
 
     train_args, logger = set_log_level(train_args, "sft_trainer_train")

diff --git a/tuning/utils/merge_model_utils.py b/tuning/utils/merge_model_utils.py
@@ -178,8 +178,7 @@ def post_process_vLLM_adapters_new_tokens(
                     # Retain all other weights in adapters.safetensors
                     adapters[k] = f.get_tensor(k)
 
-            if not os.path.exists(modified_checkpoint_path):
-                os.makedirs(modified_checkpoint_path, exist_ok=True)
+            os.makedirs(modified_checkpoint_path, exist_ok=True)
 
             save_file(
                 new_embeddings,