[LLM Runtime]Add GGUF API UT (#1160)

intel · Jan 19, 2024 · 1383c76 · 1383c76
1 parent ea58cd5
commit 1383c76
Showing 1 changed file with 16 additions and 0 deletions.
diff --git a/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py b/intel_extension_for_transformers/llm/runtime/graph/tests/test_llm_runtime.py
@@ -76,6 +76,22 @@ def test_llm_runtime(self):
             print(config_type, cmpData(pt_logits.detach().numpy().flatten(), itrex_logits.flatten()))
 
 
+    def test_gguf_api(self):
+        model_name = "TheBloke/Mistral-7B-v0.1-GGUF"
+        model_file = "mistral-7b-v0.1.Q4_0.gguf"
+        tokenizer_name = "/tf_dataset2/models/pytorch/Mistral-7B-v0.1"
+
+        prompt = "Once upon a time"
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, trust_remote_code=True)
+        inputs = tokenizer(prompt, return_tensors="pt").input_ids
+        streamer = TextStreamer(tokenizer)
+
+        model = AutoModelForCausalLM.from_pretrained(model_name, model_file = model_file)
+        output = model.generate(inputs, streamer=streamer, max_new_tokens=10)
+        print("output = ", output)
+        assert(output == [[1, 5713, 3714, 264, 727, 28725, 736, 403, 264, 1628, 2746, 693, 6045, 298, 1220, 28723, 985]])
+
+
     def test_beam_search(self):
         model_name = "/tf_dataset2/models/pytorch/gpt-j-6B"  # or local path to model
         prompts = [