Skip to content

Commit

Permalink
merge changes from main
Browse files Browse the repository at this point in the history
Signed-off-by: Anh Uong <anh.uong@ibm.com>
  • Loading branch information
anhuong committed Sep 25, 2024
2 parents 5df031a + 7714dfc commit d63abd6
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 21 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ post_process_vLLM_adapters_new_tokens(
)
# where num_added_tokens is returned by sft_trainer.train()
```
</details>

_________________________

Expand Down
6 changes: 5 additions & 1 deletion build/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,16 @@
import shutil


def copy_checkpoint(source, destination):
def copy_checkpoint(source, destination, exclude_files: list[str] = None):
if not os.path.exists(destination):
os.makedirs(destination)
shutil.copystat(source, destination)
# Have a list of directory objects, now iterate over them.
if exclude_files is None:
exclude_files = []
for item in os.listdir(source):
if item in exclude_files:
continue
source_file = os.path.join(source, item)
destination_file = os.path.join(destination, item)
if os.path.isdir(source_file):
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ classifiers=[
dependencies = [
"numpy>=1.26.4,<2.0",
"accelerate>=0.20.3,<0.34",
"transformers>4.41,<5.0",
"transformers>4.41,<4.45",
"torch>=2.2.0,<3.0",
"sentencepiece>=0.1.99,<0.3",
"tokenizers>=0.13.3,<1.0",
Expand Down
34 changes: 24 additions & 10 deletions scripts/post_process_adapters_vLLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
# Standard
import argparse
import json
import logging
import os
import sys

# Local
from tuning.utils.merge_model_utils import (
Expand All @@ -27,9 +29,9 @@ def main():
)
parser.add_argument(
"--model_path",
help="Path to tuned model containing either one or multiple checkpoints \
Path should have file added_tokens_info.json produced by tuning \
Hint: This will be either output_dir or save_model_dir arguments while tuning \
help="Path to tuned model containing either one or multiple checkpoints. \
Path should have file added_tokens_info.json produced by tuning. \
Hint: This will be either output_dir or save_model_dir arguments while tuning. \
If multiple checkpoints are present, each checkpoint folder name \
should begin with 'checkpoint-'",
required=True,
Expand All @@ -44,16 +46,26 @@ def main():

if args.output_model_path is None:
output_model_path = args.model_path
else:
output_model_path = args.output_model_path
if os.path.exists(os.path.join(args.model_path, "added_tokens_info.json")):
with open(
os.path.join(args.model_path, "added_tokens_info.json"), encoding="utf-8"
) as json_data:
added_tokens_info = json.load(json_data)
num_added_tokens = added_tokens_info["num_new_tokens"]
else:
print("file added_tokens_info.json not in model_path. Cannot post-processes")
raise ValueError(
"file added_tokens_info.json not in model_path. \
Cannot post-processes"
)
if num_added_tokens == 0:
logging.info("No new tokens added, hence post-processing not needed")
sys.exit(0)

found_adapters = 0
if os.path.exists(os.path.join(args.model_path, "adapter_model.safetensors")):
found_adapters = 1
post_process_vLLM_adapters_new_tokens(
args.model_path, output_model_path, num_added_tokens
)
Expand All @@ -68,12 +80,14 @@ def main():
num_added_tokens,
)
found_checkpoints = 1
if found_checkpoints and output_model_path != args.model_path:
copy_files_to_directory(
args.model_path,
output_model_path,
exclude_files=["adapter_model.safetensors"],
)
if found_checkpoints and output_model_path != args.model_path:
copy_files_to_directory(
args.model_path,
output_model_path,
exclude_files=["adapter_model.safetensors"],
)
if not found_adapters and not found_checkpoints:
logging.warning("No adapters were found to process in model path provided")


if __name__ == "__main__":
Expand Down
69 changes: 63 additions & 6 deletions tests/utils/test_merge_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

# Standard
import os
import shutil
import tempfile

# Third Party
from safetensors import safe_open
import pytest
import torch

# Local
from tuning.utils.merge_model_utils import post_process_vLLM_adapters_new_tokens
Expand All @@ -33,11 +33,7 @@
)


@pytest.mark.skipif(
not (torch.cuda.is_available()),
reason="Only runs if cuda is supported",
)
def test_post_process_vLLM_adapters_new_tokens():
def test_post_process_vllm_adapters_new_tokens():
"""Ensure that in post-process, we output the correct format supported by vLLM for added_tokens
- we should output a new_embeddings.safetensors
- we should not have lm_head.weight in adapter_model.safetensors
Expand Down Expand Up @@ -73,3 +69,64 @@ def test_post_process_vLLM_adapters_new_tokens():
if "lm_head.weight" in k:
found_lm_head = True
assert not found_lm_head


def test_post_process_vllm_adapters_no_new_tokens():
"""Ensure that an error is returned if no added tokens while tuning, \
but embeddings resized.
"""
# first, double check dummy tuned llama has a lm_head.weight
found_lm_head = False
with safe_open(
os.path.join(DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, "adapter_model.safetensors"),
framework="pt",
) as f:
for k in f.keys():
if "lm_head.weight" in k:
found_lm_head = True
assert found_lm_head
# do the post processing
with pytest.raises(NotImplementedError):
post_process_vLLM_adapters_new_tokens(
DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, None, num_added_tokens=0
)


def test_post_process_in_place_vllm_adapters_new_tokens():
"""Ensure that in post-process, we output the correct format supported by vLLM for added_tokens
- if output dir is not specified, it should modify files in place
- we should output a new_embeddings.safetensors
- we should not have lm_head.weight in adapter_model.safetensors
"""
# first, double check dummy tuned llama has a lm_head.weight
found_lm_head = False
with safe_open(
os.path.join(DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, "adapter_model.safetensors"),
framework="pt",
) as f:
for k in f.keys():
if "lm_head.weight" in k:
found_lm_head = True
assert found_lm_head

# do the post processing
with tempfile.TemporaryDirectory() as tempdir:
shutil.copytree(
DUMMY_TUNED_LLAMA_WITH_ADDED_TOKENS, tempdir, dirs_exist_ok=True
)
post_process_vLLM_adapters_new_tokens(tempdir, None, num_added_tokens=1)

# check that new_embeddings.safetensors exist
new_embeddings = os.path.join(tempdir, "new_embeddings.safetensors")
assert os.path.exists(new_embeddings)

# check that lm_head.weight NOT in the new outputted adapter_model.safetensors
adapter_model = os.path.join(tempdir, "adapter_model.safetensors")
assert os.path.exists(adapter_model)

found_lm_head = False
with safe_open(adapter_model, framework="pt") as f:
for k in f.keys():
if "lm_head.weight" in k:
found_lm_head = True
assert not found_lm_head
20 changes: 20 additions & 0 deletions tests/utils/test_tokenizer_data_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Third party
# Third Party
from transformers import AutoModelForCausalLM, AutoTokenizer

# First Party
from tests.data import MODEL_NAME

# Local
# First party
from tuning.data.tokenizer_data_utils import tokenizer_and_embedding_resize


def test_tokenizer_and_embedding_resize_return_values():
"""Test to ensure number of added tokens are returned correctly"""
special_tokens_dict = {"pad_token": "<pad>"}
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
metadata = tokenizer_and_embedding_resize(special_tokens_dict, tokenizer, model)
assert metadata["num_new_tokens"] == 1
assert "new_embedding_size" in metadata
6 changes: 5 additions & 1 deletion tuning/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def train(
attention_and_distributed_packing_config: Optional[
AttentionAndDistributedPackingConfig
] = None,
):
) -> tuple[SFTTrainer, dict]:
"""Call the SFTTrainer
Args:
Expand Down Expand Up @@ -118,6 +118,10 @@ def train(
Should be used in combination with quantized_lora_config. Also currently
fused_lora and fast_kernels must used together (may change in future). \
attention_and_distributed_packing_config: Used for padding-free attention and multipack.
Returns:
Tuple: Instance of SFTTrainer , some metadata in a dict
Metadata contains information on number of added tokens while tuning.
"""

train_args, logger = set_log_level(train_args, "sft_trainer_train")
Expand Down
3 changes: 1 addition & 2 deletions tuning/utils/merge_model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,8 +178,7 @@ def post_process_vLLM_adapters_new_tokens(
# Retain all other weights in adapters.safetensors
adapters[k] = f.get_tensor(k)

if not os.path.exists(modified_checkpoint_path):
os.makedirs(modified_checkpoint_path, exist_ok=True)
os.makedirs(modified_checkpoint_path, exist_ok=True)

save_file(
new_embeddings,
Expand Down

0 comments on commit d63abd6

Please sign in to comment.