Skip to content

Commit 26e472e

Browse files
authored
Support of AutoModel (#192)
* Docker-driven tests with latest SDKs (#180) * Added Docker support to the Jenkins tests Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Addressed comments Signed-off-by: amitraj <quic_amitraj@quicinc.com> * updated qaic tests time upper limit to 60 minutes Signed-off-by: Onkar Chougule <quic_ochougul@quicinc.com> --------- Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Added support for Embedding moodels * Added support for embedding models Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Lint & Format Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Added batch_size Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Docstring added Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Fix-1 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Comments Addressed-1 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Comments addressed-2 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Lint and formatted Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Comments addressed-3 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Fix-2 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Comments addressed-4 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Minor fix-1 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * fix-major Signed-off-by: amitraj <quic_amitraj@quicinc.com> * fix-minor-2 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * fix-minor-3 Signed-off-by: amitraj <quic_amitraj@quicinc.com> * Update ONNX_EXPORT_OPSET to 13 Signed-off-by: amitraj <quic_amitraj@quicinc.com> --------- Signed-off-by: amitraj <quic_amitraj@quicinc.com>
1 parent 0594f5e commit 26e472e

File tree

8 files changed

+384
-58
lines changed

8 files changed

+384
-58
lines changed

QEfficient/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#
66
# -----------------------------------------------------------------------------
77

8-
from QEfficient.base import QEffAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
8+
from QEfficient.base import QEFFAutoModel, QEFFAutoModelForCausalLM, QEFFCommonLoader
99
from QEfficient.compile.compile_helper import compile
1010
from QEfficient.exporter.export_hf_to_cloud_ai_100 import qualcomm_efficient_converter
1111
from QEfficient.generation.text_generation_inference import cloud_ai_100_exec_kv
@@ -21,7 +21,7 @@
2121
"export",
2222
"compile",
2323
"cloud_ai_100_exec_kv",
24-
"QEffAutoModel",
24+
"QEFFAutoModel",
2525
"QEFFAutoModelForCausalLM",
2626
"QEffAutoPeftModelForCausalLM",
2727
"QEFFCommonLoader",

QEfficient/base/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@
66
# -----------------------------------------------------------------------------
77

88
from QEfficient.base.common import QEFFCommonLoader # noqa: F401
9-
from QEfficient.transformers.models.modeling_auto import QEffAutoModel, QEFFAutoModelForCausalLM # noqa: F401
9+
from QEfficient.transformers.models.modeling_auto import QEFFAutoModel, QEFFAutoModelForCausalLM # noqa: F401

QEfficient/base/common.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(self, *args: Any, **kwds: Any) -> None:
7676
@classmethod
7777
def from_pretrained(cls, pretrained_model_name_or_path: str, *args, **kwargs) -> QEFFBaseModel:
7878
"""
79-
Downloads HuggingFace model if already doesn't exist locally, returns QEffAutoModel object based on type of model.
79+
Downloads HuggingFace model if already doesn't exist locally, returns QEFFAutoModel object based on type of model.
8080
"""
8181
if not os.path.isdir(pretrained_model_name_or_path):
8282
pretrained_model_name_or_path = login_and_download_hf_lm(pretrained_model_name_or_path, *args, **kwargs)

QEfficient/base/modeling_qeff.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,9 @@ def _compile(
251251

252252
# Check if already compiled
253253
compile_hash = compile_hash.hexdigest()[:16]
254-
qpc_path = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
254+
compile_dir = qpc_path.with_name(qpc_path.name + "-" + compile_hash)
255+
qpc_path = compile_dir / "qpc"
256+
qpc_path.mkdir(parents=True, exist_ok=True)
255257
if qpc_path.is_dir():
256258
if (qpc_path / "programqpc.bin").is_file():
257259
self.qpc_path = qpc_path

QEfficient/transformers/models/modeling_auto.py

Lines changed: 277 additions & 46 deletions
Large diffs are not rendered by default.

docs/source/hl_api.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@
88
:member-order: bysource
99
:members:
1010
```
11-
11+
## `QEFFAutoModel`
12+
```{eval-rst}
13+
.. autoclass:: QEfficient.transformers.models.modeling_auto.QEFFAutoModel
14+
:member-order: bysource
15+
:members:
16+
```
1217
## `QEffAutoPeftModelForCausalLM`
1318
```{eval-rst}
1419
.. autoclass:: QEfficient.peft.auto.QEffAutoPeftModelForCausalLM

scripts/Jenkinsfile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ pipeline {
1313
steps {
1414
sh '''
1515
. ~/.bashrc
16-
docker run --privileged -dit --name ${BUILD_TAG} -v ./:/efficient-transformers -v ${HF_PATH}:${DOCKER_HF_PATH} ${DOCKER_LATEST}:master_latest
17-
docker exec ${BUILD_TAG} bash -c "
16+
sudo docker run --privileged -dit --name ${BUILD_TAG} -v ./:/efficient-transformers -v ${HF_PATH}:${DOCKER_HF_PATH} ${DOCKER_LATEST}:master_latest
17+
sudo docker exec ${BUILD_TAG} bash -c "
1818
cd /efficient-transformers &&
1919
apt update &&
2020
apt install -y python3.10-venv &&
@@ -34,7 +34,7 @@ pipeline {
3434
steps {
3535
timeout(time: 10, unit: 'MINUTES') {
3636
sh '''
37-
docker exec ${BUILD_TAG} bash -c "
37+
sudo docker exec ${BUILD_TAG} bash -c "
3838
cd /efficient-transformers &&
3939
. preflight_qeff/bin/activate &&
4040
mkdir -p $PWD/Non_cli_qaic &&
@@ -50,7 +50,7 @@ pipeline {
5050
steps {
5151
timeout(time: 60, unit: 'MINUTES') {
5252
sh '''
53-
docker exec ${BUILD_TAG} bash -c "
53+
sudo docker exec ${BUILD_TAG} bash -c "
5454
cd /efficient-transformers &&
5555
. preflight_qeff/bin/activate &&
5656
mkdir -p $PWD/Non_qaic &&
@@ -68,7 +68,7 @@ pipeline {
6868
steps {
6969
timeout(time: 15, unit: 'MINUTES') {
7070
sh '''
71-
docker exec ${BUILD_TAG} bash -c "
71+
sudo docker exec ${BUILD_TAG} bash -c "
7272
cd /efficient-transformers &&
7373
. preflight_qeff/bin/activate &&
7474
mkdir -p $PWD/cli &&
@@ -88,7 +88,7 @@ pipeline {
8888
script {
8989
try {
9090
sh '''
91-
docker rm -f ${BUILD_TAG}
91+
sudo docker rm -f ${BUILD_TAG}
9292
sudo chown -R ubuntu .
9393
'''
9494
} catch (error) {
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# -----------------------------------------------------------------------------
2+
#
3+
# Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
4+
# SPDX-License-Identifier: BSD-3-Clause
5+
#
6+
# -----------------------------------------------------------------------------
7+
8+
9+
import numpy as np
10+
import onnxruntime as ort
11+
import pytest
12+
from transformers import AutoModel, AutoTokenizer
13+
14+
from QEfficient.transformers.models.modeling_auto import QEFFAutoModel
15+
from QEfficient.utils.constants import Constants
16+
17+
embed_test_models = [
18+
# model_name, architecture
19+
"sentence-transformers/multi-qa-mpnet-base-cos-v1", # MPNetForMaskedLM
20+
"BAAI/bge-reranker-v2-m3", # XLMRobertaForSequenceClassification
21+
"BAAI/bge-small-en-v1.5", # BertModel
22+
]
23+
24+
25+
def check_embed_pytorch_vs_ort_vs_ai100(
26+
model_name: str,
27+
seq_len: int = Constants.CTX_LEN,
28+
n_layer: int = 1,
29+
):
30+
# Prepare input
31+
tokenizer = AutoTokenizer.from_pretrained(model_name)
32+
inputs = tokenizer("My name is", return_tensors="pt")
33+
34+
# Original PyTorch model
35+
pt_model = AutoModel.from_pretrained(
36+
model_name,
37+
num_hidden_layers=n_layer,
38+
attn_implementation="eager",
39+
trust_remote_code=True,
40+
)
41+
42+
pt_outputs = pt_model(**inputs)
43+
pt_embeddings = pt_outputs[0][0].detach().numpy()
44+
# Pytorch transformed model
45+
qeff_model = QEFFAutoModel(pt_model)
46+
qeff_pt_outputs = qeff_model.generate(inputs=inputs, runtime_ai100=False)
47+
qeff_pt_embeddings = qeff_pt_outputs[0][0].detach().numpy()
48+
mad = np.mean(np.abs(pt_embeddings - qeff_pt_embeddings))
49+
print("Mad for PyTorch and PyTorch transformed qeff_model is ", mad)
50+
assert mad <= 0, f"MAD is too high for onnx and Pytorch: {mad}"
51+
52+
onnx_model = qeff_model.export()
53+
ort_session = ort.InferenceSession(str(onnx_model))
54+
55+
# Prepare the inputs for ONNX Runtime
56+
input_ids = np.array(inputs["input_ids"])
57+
attention_mask = np.array(inputs["attention_mask"])
58+
59+
onnx_inputs = {"input_ids": input_ids, "attention_mask": attention_mask}
60+
# Run inference
61+
onnx_outputs = ort_session.run(None, onnx_inputs)
62+
63+
# Compare Transformed PyTorch and ONNX outputs
64+
65+
pt_embeddings = pt_outputs[0][0].detach().numpy()
66+
onnx_embeddings = onnx_outputs[0]
67+
mad = np.mean(np.abs(pt_embeddings - onnx_embeddings))
68+
print("Mad for onnx and PyTorch is ", mad)
69+
assert mad <= 10**-5, f"MAD is too high for onnx and Pytorch: {mad}"
70+
71+
qeff_model.compile(
72+
num_cores=14,
73+
)
74+
ai100_output = qeff_model.generate(inputs=inputs)
75+
76+
# Compare ONNX and AI 100 outputs
77+
mad = np.mean(np.abs(ai100_output - onnx_outputs[0]))
78+
print("Mad for onnx and AI 100 output is ", mad)
79+
assert mad <= 10**-3, f"MAD is too high for onnx and Pytorch: {mad}"
80+
81+
82+
@pytest.mark.on_qaic
83+
@pytest.mark.parametrize("model_name", embed_test_models)
84+
def test_embed_model_pytorch_vs_onnx_vs_ai100(model_name):
85+
"""
86+
Test function to validate output of the Pytorch, ONNX and AI 100 runtime model output.
87+
"""
88+
check_embed_pytorch_vs_ort_vs_ai100(model_name=model_name, seq_len=32, n_layer=1)

0 commit comments

Comments
 (0)