Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance lora tests with more layer and rank variations #3243

Merged
merged 18 commits into from
Mar 10, 2024
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ transformers >= 4.38.0 # Required for Gemma.
xformers == 0.0.23.post1 # Required for CUDA 12.1.
fastapi
uvicorn[standard]
peft == 0.8.2
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given that this is only used for testing, it should go to requirements-dev.txt

pydantic >= 2.0 # Required for OpenAI server.
prometheus_client >= 0.18.0
pynvml == 11.5.0
Expand Down
11 changes: 11 additions & 0 deletions tests/lora/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import contextlib
import gc
import os
import tempfile
from collections import OrderedDict
from unittest.mock import patch, MagicMock
Expand All @@ -21,6 +22,8 @@
from vllm.model_executor.parallel_utils.parallel_state import (
destroy_model_parallel, initialize_model_parallel)

TMP_PATH = "/mnt/local_storage/"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be removed



def cleanup():
destroy_model_parallel()
Expand Down Expand Up @@ -121,6 +124,14 @@ def sql_lora_files():
return snapshot_download(repo_id="yard1/llama-2-7b-sql-lora-test")


@pytest.fixture(scope="session")
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't need this fixture and TMP_PATH above. Pytest already has a built in tmpdir fixture that you can use https://docs.pytest.org/en/6.2.x/tmpdir.html#the-tmpdir-fixture

def tmp_path():
if os.path.exists(TMP_PATH):
return TMP_PATH
else:
return tempfile.mkstemp()[1]


@pytest.fixture(scope="session")
def mixtral_lora_files():
return snapshot_download(repo_id="terrysun/mixtral-lora-adapter")
Expand Down
126 changes: 126 additions & 0 deletions tests/lora/test_layer_variation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
from typing import List, Optional
import peft
import pytest
from random import sample
from transformers import AutoModelForCausalLM

import vllm
from vllm.lora.request import LoRARequest

MODEL_PATH = "meta-llama/Llama-2-7b-hf"
PROMPTS = [
"[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nSpellForce 3 is a pretty bad game. The developer Grimlore Games is clearly a bunch of no-talent hacks, and 2017 was a terrible year for games anyway. [/user] [assistant]",
"[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nI wanted to like Grimlore Games' 2017 entry, but in SpellForce 3 they just didn't get anything right. [/user] [assistant]",
"[system] Given a target sentence construct the underlying meaning representation\nof the input sentence as a single function with attributes and attribute\nvalues. This function should describe the target string accurately and the\nfunction must be one of the following ['inform', 'request', 'give_opinion',\n'confirm', 'verify_attribute', 'suggest', 'request_explanation',\n'recommend', 'request_attribute'].\n\nThe attributes must be one of the following:\n['name', 'exp_release_date', 'release_year', 'developer', 'esrb', 'rating',\n'genres', 'player_perspective', 'has_multiplayer', 'platforms',\n'available_on_steam', 'has_linux_release', 'has_mac_release', 'specifier'] [/system] [user] Here is the target sentence:\nBioShock is a good role-playing, action-adventure, shooter that released for PlayStation, Xbox, and PC in 2007. It is available on Steam, and it has a Mac release but not a Linux release. [/user] [assistant]",
]
TMP_PATH = "/mnt/local_storage/"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove this :)



def get_lora_model(model_id: str, target_modules: List[str], rank: int):
Copy link
Collaborator

@pcmoritz pcmoritz Mar 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I currently don't understand this function -- what are the lora model weights that are actually applied on top of the meta-llama/Llama-2-7b-hf base model?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's a default initialized lora, we use the merged one as golden reference to verify the correctness, the lora weights won't matter as long as we're using the same one

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you point to where in the docs it says it is a default LoRA and what it is? That part was not clear to me (maybe add a comment)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

model = AutoModelForCausalLM.from_pretrained(model_id)
lora_config = peft.tuners.lora.LoraConfig(target_modules, rank)
lora_model = peft.PeftModel(model, lora_config)
return lora_model


def do_sample(llm,
lora_path: Optional[str] = None,
lora_id: Optional[int] = None,
logprobs: int = 0,
n_tokens: int = 256):
prompts = PROMPTS
sampling_params = vllm.SamplingParams(temperature=0,
max_tokens=256,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason btw you are not setting max_tokens=n_tokens here and then skip the slicing below?

logprobs=logprobs,
stop=["[/assistant]"])
outputs = llm.generate(
prompts,
sampling_params,
lora_request=LoRARequest(str(lora_id), lora_id, lora_path)
if lora_id else None)
# Print the outputs.
generated_texts = []
generated_logprobs = []
for output in outputs:
prompt = output.prompt
generated_text = output.outputs[0].text
generated_texts.append(generated_text)
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
generated_logprobs.append([
list(logprob.keys()) for logprob in outputs[0].outputs[0].logprobs
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be output.outputs[0].logprobs? Otherwise you will only ever use the first prompt, right?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait, but this is only ever using the first prompt (i.e. PROMPT[0]) if I understand this correctly -- that can't possibly be your intention. Otherwise why even include the other prompts?

Copy link
Contributor Author

@tterrysun tterrysun Mar 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes you're right, I misread the comment. fixed

][:n_tokens])
return generated_logprobs if logprobs else generated_texts


SUPPORTED_MODULES = [
"qkv_proj", "o_proj", "gate_up_proj", "down_proj", "embed_tokens",
"lm_head"
]
TARGET_MODULES_LIST = []
for length in range(2, 6):
TARGET_MODULES_LIST.extend(
[sample(SUPPORTED_MODULES, length) for _ in range(3)])


# Test the functionality when layer and rank are varied
@pytest.mark.parametrize("target_modules", TARGET_MODULES_LIST)
@pytest.mark.parametrize("rank", [8, 16, 32, 64])
def test_layer_variation_functionality(target_modules, rank, tmp_path):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should just remove this test -- it is completely subsumed by test_layer_variation_verify_reference, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed!

llm = vllm.LLM(MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
tensor_parallel_size=4,
worker_use_ray=True)

model = get_lora_model(MODEL_PATH, target_modules, rank)
tmp_dir = os.path.join(tmp_path, "tmp_dir")
model.save_pretrained(tmp_dir)
# functionality test, only check if probs can be generated without error
do_sample(llm, tmp_dir, 1, logprobs=5, n_tokens=1)


# Verify the reference used below is always the same
@pytest.mark.parametrize("target_modules", TARGET_MODULES_LIST)
@pytest.mark.parametrize("rank", [8, 16, 32, 64])
def test_layer_variation_verify_reference(target_modules, rank, tmp_path):
llm = vllm.LLM(MODEL_PATH,
enable_lora=True,
max_num_seqs=16,
max_loras=4,
tensor_parallel_size=4,
worker_use_ray=True)
model = get_lora_model(MODEL_PATH, target_modules, rank)
tmp_dir_lora = os.path.join(tmp_path, "tmp_dir_lora")
model.save_pretrained(tmp_dir_lora)
merged_probs = do_sample(llm, tmp_dir_lora, 1, logprobs=5, n_tokens=1)
reference_id_sets = [set(prob[0]) for prob in merged_probs]
assert reference_id_sets == [{450, 13, 306, 11221, 2266},
{450, 13, 306, 11221, 2266},
{450, 13, 306, 11221, 2266}]


# Test the correctness when layer and rank are varied
@pytest.mark.parametrize("target_modules", TARGET_MODULES_LIST)
@pytest.mark.parametrize("rank", [8, 16, 32, 64])
def test_layer_variation_correctness(target_modules, rank, tmp_path):
reference_id_sets = [{450, 13, 306, 11221, 2266},
{450, 13, 306, 11221, 2266},
{450, 13, 306, 11221, 2266}]

model = get_lora_model(MODEL_PATH, target_modules, rank)
tmp_dir_merged = os.path.join(tmp_path, "tmp_dir_merged")
merged_model = model.merge_and_unload()
merged_model.save_pretrained(tmp_dir_merged)

llm = vllm.LLM(tmp_dir_merged,
tokenizer=MODEL_PATH,
enable_lora=False,
max_num_seqs=16,
tensor_parallel_size=4,
worker_use_ray=True)
probs = do_sample(llm, logprobs=5, n_tokens=1)
# for the first token, verify the top-5 tokens are identical
id_sets = [set(prob[0]) for prob in probs]
assert id_sets == reference_id_sets
Loading