From 7e72a436f4e2e627dbc067f080e4634f330cbf2f Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Fri, 15 Nov 2024 23:00:59 -0500
Subject: [PATCH 1/3] optim e2e tests to run a bit faster

---
 tests/e2e/conftest.py                         | 19 +++++++++++++++++++
 tests/e2e/multigpu/test_llama.py              | 16 ++++++----------
 tests/e2e/patched/test_fa_xentropy.py         |  2 ++
 .../e2e/patched/test_lora_llama_multipack.py  |  3 +++
 4 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 723a44f03a..3781cc94db 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -5,6 +5,25 @@
 import tempfile
 
 import pytest
+from huggingface_hub import snapshot_download
+
+
+@pytest.fixture(scope="session", autouse=True)
+def download_smollm2_135m_model():
+    # download the model
+    snapshot_download("HuggingFaceTB/SmolLM-135M")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def download_tatsu_lab_alpaca_dataset():
+    # download the model
+    snapshot_download("tatsu-lab/alpaca", repo_type="dataset")
+
+
+@pytest.fixture(scope="session", autouse=True)
+def download_mhenrichsen_alpaca_2k_dataset():
+    # download the model
+    snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset")
 
 
 @pytest.fixture
diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index b2c8abc604..df2fbdf93a 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -25,7 +25,7 @@
 @pytest.fixture(scope="session", autouse=True)
 def download_model():
     # download the model
-    snapshot_download("TinyLlama/TinyLlama_v1.1")
+    snapshot_download("HuggingFaceTB/SmolLM-135M")
 
 
 class TestMultiGPULlama:
@@ -149,8 +149,7 @@ def test_dpo_lora_ddp(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "TinyLlama/TinyLlama_v1.1",
-                "tokenizer_type": "LlamaTokenizer",
+                "base_model": "HuggingFaceTB/SmolLM-135M",
                 "sequence_len": 2048,
                 "sample_packing": False,
                 "eval_sample_packing": False,
@@ -163,12 +162,10 @@ def test_dpo_lora_ddp(self, temp_dir):
                 "lora_target_linear": True,
                 "val_set_size": 0.05,
                 "special_tokens": {
-                    "unk_token": "<unk>",
-                    "bos_token": "<s>",
-                    "eos_token": "</s>",
+                    "pad_token": "<|endoftext|>",
                 },
                 "rl": "dpo",
-                "chat_template": "llama3",
+                "chat_template": "chatml",
                 "datasets": [
                     {
                         "path": "fozziethebeat/alpaca_messages_2k_dpo_test",
@@ -422,8 +419,7 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "axolotl-ai-co/TinyLlama_v1.1-bnb-nf4-bf16",
-                "tokenizer_type": "AutoTokenizer",
+                "base_model": "axolotl-ai-co/SmolLM2-135M-bnb-nf4-bf16",
                 "adapter": "qlora",
                 "mean_resizing_embeddings": True,
                 "load_in_4bit": True,
@@ -441,7 +437,7 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir):
                 "sequence_len": 2048,
                 "val_set_size": 0.05,
                 "special_tokens": {
-                    "pad_token": "</s>",
+                    "pad_token": "<|endoftext|>",
                 },
                 "datasets": [
                     {
diff --git a/tests/e2e/patched/test_fa_xentropy.py b/tests/e2e/patched/test_fa_xentropy.py
index 0991bdd742..8b76362fb4 100644
--- a/tests/e2e/patched/test_fa_xentropy.py
+++ b/tests/e2e/patched/test_fa_xentropy.py
@@ -66,6 +66,8 @@ def test_lora_packing_fa_cross_entropy(self, temp_dir):
                     },
                 ],
                 "num_epochs": 1,
+                "max_steps": 10,
+                "save_steps": 10,
                 "micro_batch_size": 8,
                 "gradient_accumulation_steps": 1,
                 "output_dir": temp_dir,
diff --git a/tests/e2e/patched/test_lora_llama_multipack.py b/tests/e2e/patched/test_lora_llama_multipack.py
index f251f9b661..5dbf146542 100644
--- a/tests/e2e/patched/test_lora_llama_multipack.py
+++ b/tests/e2e/patched/test_lora_llama_multipack.py
@@ -56,6 +56,8 @@ def test_lora_packing(self, temp_dir):
                     },
                 ],
                 "num_epochs": 2,
+                "max_steps": 20,
+                "save_steps": 10,
                 "micro_batch_size": 8,
                 "gradient_accumulation_steps": 1,
                 "output_dir": temp_dir,
@@ -109,6 +111,7 @@ def test_lora_gptq_packed(self, temp_dir):
                     },
                 ],
                 "num_epochs": 2,
+                "max_steps": 20,
                 "save_steps": 0.5,
                 "micro_batch_size": 8,
                 "gradient_accumulation_steps": 1,

From f5a25a3b14473bf3375635020649d8b71cf5ba53 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Sat, 16 Nov 2024 14:07:40 -0500
Subject: [PATCH 2/3] run prequant w/o lora_modules_to_save

---
 tests/e2e/multigpu/test_llama.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index df2fbdf93a..f37368e847 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -427,10 +427,10 @@ def test_fsdp_qlora_prequant_packed(self, temp_dir):
                 "lora_alpha": 16,
                 "lora_dropout": 0.05,
                 "lora_target_linear": True,
-                "lora_modules_to_save": [
-                    "embed_tokens",
-                    "lm_head",
-                ],
+                # "lora_modules_to_save": [
+                #     "embed_tokens",
+                #     "lm_head",
+                # ],
                 "sample_packing": True,
                 "eval_sample_packing": False,
                 "pad_to_sequence_len": True,

From 30b7701d52c981dd211a08e19fa83c83927af331 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Mon, 18 Nov 2024 08:41:09 -0500
Subject: [PATCH 3/3] use smollm2

---
 tests/e2e/conftest.py            |  2 +-
 tests/e2e/multigpu/test_llama.py | 18 +++++++++---------
 tests/e2e/test_optimizers.py     |  2 +-
 tests/e2e/test_packing_loss.py   |  2 +-
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
index 3781cc94db..c316f6c83e 100644
--- a/tests/e2e/conftest.py
+++ b/tests/e2e/conftest.py
@@ -11,7 +11,7 @@
 @pytest.fixture(scope="session", autouse=True)
 def download_smollm2_135m_model():
     # download the model
-    snapshot_download("HuggingFaceTB/SmolLM-135M")
+    snapshot_download("HuggingFaceTB/SmolLM2-135M")
 
 
 @pytest.fixture(scope="session", autouse=True)
diff --git a/tests/e2e/multigpu/test_llama.py b/tests/e2e/multigpu/test_llama.py
index f37368e847..d8dcf3118a 100644
--- a/tests/e2e/multigpu/test_llama.py
+++ b/tests/e2e/multigpu/test_llama.py
@@ -25,7 +25,7 @@
 @pytest.fixture(scope="session", autouse=True)
 def download_model():
     # download the model
-    snapshot_download("HuggingFaceTB/SmolLM-135M")
+    snapshot_download("HuggingFaceTB/SmolLM2-135M")
 
 
 class TestMultiGPULlama:
@@ -37,7 +37,7 @@ def test_lora_ddp(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 2048,
                 "adapter": "lora",
                 "lora_r": 8,
@@ -93,7 +93,7 @@ def test_lora_ddp_packed(self, temp_dir, gradient_accumulation_steps):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 2048,
                 "sample_packing": True,
                 "eval_sample_packing": False,
@@ -149,7 +149,7 @@ def test_dpo_lora_ddp(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 2048,
                 "sample_packing": False,
                 "eval_sample_packing": False,
@@ -218,7 +218,7 @@ def test_dpo_qlora_ddp(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 2048,
                 "sample_packing": False,
                 "eval_sample_packing": False,
@@ -291,7 +291,7 @@ def test_fsdp(self, temp_dir, gradient_accumulation_steps):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 2048,
                 "val_set_size": 0.01,
                 "special_tokens": {
@@ -356,7 +356,7 @@ def test_fsdp_packed(self, temp_dir, fsdp_state_dict_type):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sample_packing": True,
                 "pad_to_sequence_len": True,
                 "sequence_len": 2048,
@@ -499,7 +499,7 @@ def test_ds_zero3_packed(self, temp_dir, gradient_accumulation_steps):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sample_packing": True,
                 "pad_to_sequence_len": True,
                 "sequence_len": 2048,
@@ -549,7 +549,7 @@ def test_ds_zero3_qlora_packed(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "load_in_4bit": True,
                 "adapter": "qlora",
                 "lora_r": 8,
diff --git a/tests/e2e/test_optimizers.py b/tests/e2e/test_optimizers.py
index b9fa368f6f..af5445461c 100644
--- a/tests/e2e/test_optimizers.py
+++ b/tests/e2e/test_optimizers.py
@@ -113,7 +113,7 @@ def test_adopt_adamw(self, temp_dir):
     def test_fft_schedule_free_adamw(self, temp_dir):
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 1024,
                 "val_set_size": 0.1,
                 "special_tokens": {
diff --git a/tests/e2e/test_packing_loss.py b/tests/e2e/test_packing_loss.py
index 73f9e60bac..60f1673814 100644
--- a/tests/e2e/test_packing_loss.py
+++ b/tests/e2e/test_packing_loss.py
@@ -31,7 +31,7 @@ def test_loss_packed(self, temp_dir):
         # pylint: disable=duplicate-code
         cfg = DictDefault(
             {
-                "base_model": "HuggingFaceTB/SmolLM-135M",
+                "base_model": "HuggingFaceTB/SmolLM2-135M",
                 "sequence_len": 1024,
                 "sample_packing": True,
                 "flash_attention": True,