chatglm & copyright

intel · Dec 20, 2023 · 05a43d4 · 05a43d4
1 parent 93fbaa0
commit 05a43d4
Show file tree

Hide file tree

Showing 7 changed files with 62 additions and 17 deletions.
diff --git a/intel_extension_for_transformers/llm/runtime/graph/requirements.txt b/intel_extension_for_transformers/llm/runtime/graph/requirements.txt
@@ -1,11 +1 @@
-torch
-transformers
-numpy
-sentencepiece
-protobuf<3.20
-einops
-accelerate
-peft
-datasets
-transformers_stream_generator
-tiktoken
+-r scripts/requirements/common.txt
diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/ci/calculate_percentiles.py b/intel_extension_for_transformers/llm/runtime/graph/scripts/ci/calculate_percentiles.py
@@ -1,13 +1,31 @@
+#  Copyright (c) 2023 Intel Corporation
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
 import numpy as np
 import re
 import sys
 import os
+
+
 def calculate_percentile(data, percentile):
     return np.percentile(data, percentile, method="closest_observation")
 
+
 def calculate_mean(data):
     return np.mean(data)
 
+
 def parse_output_file(file_path):
     predictions = []
     with open(file_path, 'r', encoding='UTF-8', errors='ignore') as file:
@@ -17,6 +35,8 @@ def parse_output_file(file_path):
                 prediction_time = float(match.group(1))  # Assuming the prediction time is in the second column
                 predictions.append(prediction_time)
     return predictions
+
+
 def parse_memory_file(memory_file):
     memory_values = []
     if os.path.exists(memory_file):
@@ -51,7 +71,7 @@ def parse_memory_file(memory_file):
     p99 = calculate_percentile(predictions, 99)
     latency_mean = calculate_mean(predictions[1:])
     total_latency = np.sum(predictions)
-     
+
     print("P90: {:.2f} ms".format(p90))
     print("P99: {:.2f} ms".format(p99))
     print("average_latency: {:.2f} ms".format(latency_mean))
@@ -66,7 +86,7 @@ def parse_memory_file(memory_file):
     log_file = os.environ.get("WORKSPACE") + "/cpp_graph_summary.log"
     log_prefix = os.environ.get("log_prefix")
     link = str(log_prefix) + os.path.basename(output_file)
-    with open (log_file, 'a') as f:
+    with open(log_file, 'a') as f:
         f.write("engine,")
         f.write("latency,")
         f.write(model + ",")
@@ -82,8 +102,8 @@ def parse_memory_file(memory_file):
         f.write(link + ",")
         f.write("{:.2f},".format(p90))
         f.write("{:.2f},".format(p99))
-        #f.write(",latency:")
-        #for latency in predictions:
+        # f.write(",latency:")
+        # for latency in predictions:
         #    f.write(",{:.2f}".format(latency))
         f.write("\n")
         f.close()
diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/ci/cpp_graph_inference.sh b/intel_extension_for_transformers/llm/runtime/graph/scripts/ci/cpp_graph_inference.sh
@@ -1,4 +1,20 @@
 #!/bin/bash
+#===============================================================================
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#===============================================================================
+
 script_dir=$(dirname "${BASH_SOURCE[0]}")
 set -x
 quant_nthr=48
@@ -121,6 +137,7 @@ function main() {
 
     # init params
     precision_list=()
+    requirements_file="requirements.txt"  # some models need extra constraints
     if [[ "${model}" == "llama-2-7b-chat" ]]; then
         quant_script="./build/bin/quant_llama"
         infer_cmd="./build/bin/run_llama"
@@ -182,6 +199,7 @@ function main() {
         model_name="THUDM/chatglm-6b"
         input_model="/tf_dataset2/models/pytorch/chatglm-6b"
         extension=" --model_name chatglm --tokenizer $input_model"
+        requirements_file="scripts/requirements/chatglm-6b.txt"
     elif [[ "${model}" == "baichuan2-13b" ]]; then
         quant_script="./build/bin/quant_baichuan"
         infer_cmd="python ./scripts/inference.py"
@@ -199,6 +217,7 @@ function main() {
         infer_cmd="./build/bin/run_mistral"
         model_name="mistralai/Mistral-7B-v0.1"
         input_model="/tf_dataset2/models/pytorch/Mistral-7B-v0.1"
+        requirements_file="scripts/requirements/mistral.txt"
     elif [[ "${model}" == "qwen-7b" ]]; then
         quant_script="./build/bin/quant_qwen"
         infer_cmd="./build/bin/run_qwen"
@@ -250,7 +269,7 @@ function main() {
     cd ..
 
     ## prepare example requiement
-    pip install -r requirements.txt
+    pip install -r "$requirements_file"
     if [[ "${model}" == "baichuan"* ]] || [[ "${model}" == "mistral-7b" ]]; then
         pip install --force-reinstall transformers==4.33.1
     fi

diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/chatglm-6b.txt b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/chatglm-6b.txt
@@ -0,0 +1,3 @@
+# To avoid the error: 'ChatGLMTokenizer' object has no attribute 'sp_tokenizer'
+-r common.txt
+transformers==4.33.1
diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/common.txt
@@ -0,0 +1,11 @@
+torch
+transformers
+numpy
+sentencepiece
+protobuf<3.20
+einops
+accelerate
+peft
+datasets
+transformers_stream_generator
+tiktoken
diff --git a/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/mistral.txt b/intel_extension_for_transformers/llm/runtime/graph/scripts/requirements/mistral.txt
@@ -0,0 +1,2 @@
+-r common.txt
+transformers>=4.34.0
diff --git a/setup.py b/setup.py
@@ -47,7 +47,7 @@ def check_env_flag(name: str, default: bool = False) -> bool:
 
 # define install requirements
 install_requires_list = ['packaging', 'numpy', 'schema', 'pyyaml']
-opt_install_requires_list = ['neural_compressor', 'transformers==4.34.1']
+opt_install_requires_list = ['neural_compressor', 'transformers']
 
 
 packages_list = find_packages()