From ad4becdc982793275b2f3ef9bd68a89a34c9b2e4 Mon Sep 17 00:00:00 2001 From: jiafu zhang Date: Wed, 23 Aug 2023 09:35:51 +0800 Subject: [PATCH] CI add inference test for mosaicml-mpt-7b-chat (#157) Signed-off-by: jiafu zhang --- .../chatbot-inference-llama-2-7b-chat-hf.yml | 12 +++--- .../chatbot-inference-mpt-7b-chat.yml | 41 +++++++++++++++++++ .github/workflows/chatbot-test.yml | 5 +++ workflows/chatbot/inference/generate.py | 10 ++--- 4 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/chatbot-inference-mpt-7b-chat.yml diff --git a/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml b/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml index c74d28c7741..ae514f36c77 100644 --- a/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml +++ b/.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml @@ -3,9 +3,8 @@ name: Chatbot inference on llama-2-7b-chat-hf on: workflow_call: -# If there is a new commit, the previous jobs will be canceled concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf-lla-7b cancel-in-progress: true jobs: @@ -16,18 +15,21 @@ jobs: - name: Checkout uses: actions/checkout@v2 + - name: Load environment variables + run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV + - name: Build Docker Image - run: docker build ./ --target cpu --build-arg http_proxy="$HTTP_PROXY_IMAGE_BUILD" --build-arg https_proxy="$HTTPS_PROXY_IMAGE_BUILD" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune + run: docker build ./ --target cpu --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune - name: Start Docker Container run: | cid=$(docker ps -q --filter "name=chatbotinfer") if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi - docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="$HTTP_PROXY_CONTAINER_RUN" -e https_proxy="$HTTPS_PROXY_CONTAINER_RUN" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest + docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest - name: Run Inference Test run: | - docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"$HF_ACCESS_TOKEN\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" " + docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"meta-llama/Llama-2-7b-chat-hf\" --hf_access_token \"${{ env.HF_ACCESS_TOKEN }}\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" " - name: Stop Container if: success() || failure() diff --git a/.github/workflows/chatbot-inference-mpt-7b-chat.yml b/.github/workflows/chatbot-inference-mpt-7b-chat.yml new file mode 100644 index 00000000000..aa9ff338243 --- /dev/null +++ b/.github/workflows/chatbot-inference-mpt-7b-chat.yml @@ -0,0 +1,41 @@ +name: Chatbot inference on mosaicml/mpt-7b-chat + +on: + workflow_call: + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-inf-mpt-7b + cancel-in-progress: true + +jobs: + inference: + name: inference test + runs-on: lms-lab + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Load environment variables + run: cat ~/itrex-actions-runner/.env >> $GITHUB_ENV + + - name: Build Docker Image + run: docker build ./ --target cpu --build-arg http_proxy="${{ env.HTTP_PROXY_IMAGE_BUILD }}" --build-arg https_proxy="${{ env.HTTPS_PROXY_IMAGE_BUILD }}" -f workflows/chatbot/inference/docker/Dockerfile -t chatbotinfer:latest && yes | docker container prune && yes | docker image prune + + - name: Start Docker Container + run: | + cid=$(docker ps -q --filter "name=chatbotinfer") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi + docker run -tid -v /mnt/DP_disk1/huggingface/cache/:/root/.cache/huggingface/hub -v .:/root/chatbot -e http_proxy="${{ env.HTTP_PROXY_CONTAINER_RUN }}" -e https_proxy="${{ env.HTTPS_PROXY_CONTAINER_RUN }}" --name="chatbotinfer" --hostname="chatbotinfer-container" chatbotinfer:latest + + - name: Run Inference Test + run: | + docker exec "chatbotinfer" bash -c "cd /root/chatbot && source activate && conda activate chatbot-demo; python workflows/chatbot/inference/generate.py --base_model_path \"mosaicml/mpt-7b-chat\" --instructions \"Transform the following sentence into one that shows contrast. The tree is rotten.\" " + + - name: Stop Container + if: success() || failure() + run: | + cid=$(docker ps -q --filter "name=chatbotinfer") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid; fi + + - name: Test Summary + run: echo "Inference completed successfully" diff --git a/.github/workflows/chatbot-test.yml b/.github/workflows/chatbot-test.yml index 15f2e57ab6c..afc365dfe72 100644 --- a/.github/workflows/chatbot-test.yml +++ b/.github/workflows/chatbot-test.yml @@ -8,6 +8,7 @@ on: - './requirements.txt' - '.github/workflows/chatbot-test.yml' - '.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml' + - '.github/workflows/chatbot-inference-mpt-7b-chat.yml' - 'intel_extension_for_transformers/**' - 'workflows/chatbot/inference/**' - 'workflows/dlsa/**' @@ -25,3 +26,7 @@ jobs: call-inference-llama-2-7b-chat-hf: uses: ./.github/workflows/chatbot-inference-llama-2-7b-chat-hf.yml + call-inference-mpt-7b-chat: + uses: ./.github/workflows/chatbot-inference-mpt-7b-chat.yml + + diff --git a/workflows/chatbot/inference/generate.py b/workflows/chatbot/inference/generate.py index f340446e5b2..7af13146933 100644 --- a/workflows/chatbot/inference/generate.py +++ b/workflows/chatbot/inference/generate.py @@ -370,12 +370,12 @@ def load_model( tokenizer_name, use_fast=False if (re.search("llama", model_name, re.IGNORECASE) or re.search("neural-chat-7b-v2", model_name, re.IGNORECASE)) else True, - token=hf_access_token, + use_auth_token=hf_access_token, ) if re.search("flan-t5", model_name, re.IGNORECASE): with smart_context_manager(use_deepspeed=use_deepspeed): model = AutoModelForSeq2SeqLM.from_pretrained( - model_name, low_cpu_mem_usage=True, token=hf_access_token + model_name, low_cpu_mem_usage=True, use_auth_token=hf_access_token ) elif (re.search("mpt", model_name, re.IGNORECASE) or re.search("neural-chat-7b-v1", model_name, re.IGNORECASE)): @@ -388,7 +388,7 @@ def load_model( torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, torchscript=cpu_jit, - token=hf_access_token, + use_auth_token=hf_access_token, ) elif ( re.search("gpt", model_name, re.IGNORECASE) @@ -399,7 +399,7 @@ def load_model( ): with smart_context_manager(use_deepspeed=use_deepspeed): model = AutoModelForCausalLM.from_pretrained( - model_name, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, token=hf_access_token + model_name, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, use_auth_token=hf_access_token ) else: raise ValueError( @@ -477,7 +477,7 @@ def load_model( from models.mpt.mpt_trace import jit_trace_mpt_7b, MPTTSModelForCausalLM model = jit_trace_mpt_7b(model) - config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, token=hf_access_token) + config = AutoConfig.from_pretrained(model_name, trust_remote_code=True, use_auth_token=hf_access_token) model = MPTTSModelForCausalLM( model, config, use_cache=use_cache, model_dtype=torch.bfloat16 )