From 7e72a436f4e2e627dbc067f080e4634f330cbf2f Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 15 Nov 2024 23:00:59 -0500 Subject: [PATCH 1/3] optim e2e tests to run a bit faster --- tests/e2e/conftest.py | 19 +++++++++++++++++++ tests/e2e/multigpu/test_llama.py | 16 ++++++---------- tests/e2e/patched/test_fa_xentropy.py | 2 ++ .../e2e/patched/test_lora_llama_multipack.py | 3 +++ 4 files changed, 30 insertions(+), 10 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 723a44f03a..3781cc94db 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -5,6 +5,25 @@ import tempfile import pytest +from huggingface_hub import snapshot_download + + +@pytest.fixture(scope="session", autouse=True) +def download_smollm2_135m_model(): + # download the model + snapshot_download("HuggingFaceTB/SmolLM-135M") + + +@pytest.fixture(scope="session", autouse=True) +def download_tatsu_lab_alpaca_dataset(): + # download the model + snapshot_download("tatsu-lab/alpaca", repo_type="dataset") + + +@pytest.fixture(scope="session", autouse=True) +def download_mhenrichsen_alpaca_2k_dataset(): + # download the model + snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset") @pytest.fixture diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index b2c8abc604..df2fbdf93a 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -25,7 +25,7 @@ @pytest.fixture(scope="session", autouse=True) def download_model(): # download the model - snapshot_download("TinyLlama/TinyLlama_v1.1") + snapshot_download("HuggingFaceTB/SmolLM-135M") class TestMultiGPULlama: @@ -149,8 +149,7 @@ def test_dpo_lora_ddp(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "TinyLlama/TinyLlama_v1.1", - "tokenizer_type": "LlamaTokenizer", + "base_model": "HuggingFaceTB/SmolLM-135M", "sequence_len": 2048, "sample_packing": False, "eval_sample_packing": False, @@ -163,12 +162,10 @@ def test_dpo_lora_ddp(self, temp_dir): "lora_target_linear": True, "val_set_size": 0.05, "special_tokens": { - "unk_token": "", - "bos_token": "", - "eos_token": "", + "pad_token": "<|endoftext|>", }, "rl": "dpo", - "chat_template": "llama3", + "chat_template": "chatml", "datasets": [ { "path": "fozziethebeat/alpaca_messages_2k_dpo_test", @@ -422,8 +419,7 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16", - "tokenizer_type": "AutoTokenizer", + "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16", "adapter": "qlora", "mean_resizing_embeddings": True, "load_in_4bit": True, @@ -441,7 +437,7 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir): "sequence_len": 2048, "val_set_size": 0.05, "special_tokens": { - "pad_token": "", + "pad_token": "<|endoftext|>", }, "datasets": [ { diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py index 0991bdd742..8b76362fb4 100644 --- a/tests/e2e/patched/test_fa_xentropy.py +++ b/tests/e2e/patched/test_fa_xentropy.py @@ -66,6 +66,8 @@ def test_lora_packing_fa_cross_entropy(self, temp_dir): }, ], "num_epochs": 1, + "max_steps": 10, + "save_steps": 10, "micro_batch_size": 8, "gradient_accumulation_steps": 1, "output_dir": temp_dir, diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py index f251f9b661..5dbf146542 100644 --- a/tests/e2e/patched/test_lora_llama_multipack.py +++ b/tests/e2e/patched/test_lora_llama_multipack.py @@ -56,6 +56,8 @@ def test_lora_packing(self, temp_dir): }, ], "num_epochs": 2, + "max_steps": 20, + "save_steps": 10, "micro_batch_size": 8, "gradient_accumulation_steps": 1, "output_dir": temp_dir, @@ -109,6 +111,7 @@ def test_lora_gptq_packed(self, temp_dir): }, ], "num_epochs": 2, + "max_steps": 20, "save_steps": 0.5, "micro_batch_size": 8, "gradient_accumulation_steps": 1, From f5a25a3b14473bf3375635020649d8b71cf5ba53 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sat, 16 Nov 2024 14:07:40 -0500 Subject: [PATCH 2/3] run prequant w/o lora_modules_to_save --- tests/e2e/multigpu/test_llama.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index df2fbdf93a..f37368e847 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -427,10 +427,10 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir): "lora_alpha": 16, "lora_dropout": 0.05, "lora_target_linear": True, - "lora_modules_to_save": [ - "embed_tokens", - "lm_head", - ], + # "lora_modules_to_save": [ + # "embed_tokens", + # "lm_head", + # ], "sample_packing": True, "eval_sample_packing": False, "pad_to_sequence_len": True, From 30b7701d52c981dd211a08e19fa83c83927af331 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 18 Nov 2024 08:41:09 -0500 Subject: [PATCH 3/3] use smollm2 --- tests/e2e/conftest.py | 2 +- tests/e2e/multigpu/test_llama.py | 18 +++++++++--------- tests/e2e/test_optimizers.py | 2 +- tests/e2e/test_packing_loss.py | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 3781cc94db..c316f6c83e 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -11,7 +11,7 @@ @pytest.fixture(scope="session", autouse=True) def download_smollm2_135m_model(): # download the model - snapshot_download("HuggingFaceTB/SmolLM-135M") + snapshot_download("HuggingFaceTB/SmolLM2-135M") @pytest.fixture(scope="session", autouse=True) diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py index f37368e847..d8dcf3118a 100644 --- a/tests/e2e/multigpu/test_llama.py +++ b/tests/e2e/multigpu/test_llama.py @@ -25,7 +25,7 @@ @pytest.fixture(scope="session", autouse=True) def download_model(): # download the model - snapshot_download("HuggingFaceTB/SmolLM-135M") + snapshot_download("HuggingFaceTB/SmolLM2-135M") class TestMultiGPULlama: @@ -37,7 +37,7 @@ def test_lora_ddp(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "adapter": "lora", "lora_r": 8, @@ -93,7 +93,7 @@ def test_lora_ddp_packed(self, temp_dir, gradient_accumulation_steps): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": True, "eval_sample_packing": False, @@ -149,7 +149,7 @@ def test_dpo_lora_ddp(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": False, "eval_sample_packing": False, @@ -218,7 +218,7 @@ def test_dpo_qlora_ddp(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "sample_packing": False, "eval_sample_packing": False, @@ -291,7 +291,7 @@ def test_fsdp(self, temp_dir, gradient_accumulation_steps): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 2048, "val_set_size": 0.01, "special_tokens": { @@ -356,7 +356,7 @@ def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sample_packing": True, "pad_to_sequence_len": True, "sequence_len": 2048, @@ -499,7 +499,7 @@ def test_ds_zero3_packed(self, temp_dir, gradient_accumulation_steps): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sample_packing": True, "pad_to_sequence_len": True, "sequence_len": 2048, @@ -549,7 +549,7 @@ def test_ds_zero3_qlora_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "load_in_4bit": True, "adapter": "qlora", "lora_r": 8, diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py index b9fa368f6f..af5445461c 100644 --- a/tests/e2e/test_optimizers.py +++ b/tests/e2e/test_optimizers.py @@ -113,7 +113,7 @@ def test_adopt_adamw(self, temp_dir): def test_fft_schedule_free_adamw(self, temp_dir): cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "val_set_size": 0.1, "special_tokens": { diff --git a/tests/e2e/test_packing_loss.py b/tests/e2e/test_packing_loss.py index 73f9e60bac..60f1673814 100644 --- a/tests/e2e/test_packing_loss.py +++ b/tests/e2e/test_packing_loss.py @@ -31,7 +31,7 @@ def test_loss_packed(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "HuggingFaceTB/SmolLM-135M", + "base_model": "HuggingFaceTB/SmolLM2-135M", "sequence_len": 1024, "sample_packing": True, "flash_attention": True,