From 11484c3ebad9f868d0179a46de3d1330d9011822 Mon Sep 17 00:00:00 2001 From: Jing Xu Date: Thu, 16 Nov 2023 06:36:21 +0900 Subject: [PATCH] add dockerfile for llm env setup (#2229) --- docker/Dockerfile.compile | 11 +- docker/Dockerfile.prebuilt | 2 +- examples/cpu/inference/python/llm/Dockerfile | 64 +++++++ examples/cpu/inference/python/llm/README.md | 151 +++++++--------- .../python/llm/tools/env_activate.sh | 26 +++ .../inference/python/llm/tools/env_setup.sh | 143 +++++++++++++++ scripts/compile_bundle.sh | 171 ++++++++++++++---- 7 files changed, 443 insertions(+), 125 deletions(-) create mode 100644 examples/cpu/inference/python/llm/Dockerfile create mode 100644 examples/cpu/inference/python/llm/tools/env_activate.sh create mode 100644 examples/cpu/inference/python/llm/tools/env_setup.sh diff --git a/docker/Dockerfile.compile b/docker/Dockerfile.compile index 279c5461d..30e5acc5a 100644 --- a/docker/Dockerfile.compile +++ b/docker/Dockerfile.compile @@ -17,10 +17,17 @@ RUN apt update && \ vim \ ccache \ numactl \ + gcc-12 \ + g++-12 \ make \ libjpeg-dev \ libpng-dev \ - && rm -rf /var/lib/apt/lists/* + && rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 + RUN useradd -m ubuntu RUN echo 'ubuntu ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers USER ubuntu @@ -31,7 +38,7 @@ RUN curl -fsSL -v -o miniconda.sh -O https://repo.anaconda.com/miniconda/Minicon rm miniconda.sh && \ echo "source ~/miniconda3/bin/activate" >> ~/.bashrc -RUN curl -fsSL -v -o compile_bundle.sh -O https://github.com/intel/intel-extension-for-pytorch/raw/v2.1.0%2Bcpu/scripts/compile_bundle.sh && \ +RUN curl -fsSL -v -o compile_bundle.sh -O https://github.com/intel/intel-extension-for-pytorch/raw/v2.1.100%2Bcpu/scripts/compile_bundle.sh && \ . ~/miniconda3/bin/activate && \ conda create -y -n py310 python=3.10 && \ conda activate py310 && \ diff --git a/docker/Dockerfile.prebuilt b/docker/Dockerfile.prebuilt index 8a6b22a75..cf4e64ee8 100644 --- a/docker/Dockerfile.prebuilt +++ b/docker/Dockerfile.prebuilt @@ -27,7 +27,7 @@ RUN ${PYTHON} -m pip --no-cache-dir install --upgrade \ # Some TF tools expect a "python" binary RUN ln -s $(which ${PYTHON}) /usr/local/bin/python -ARG IPEX_VERSION=2.1.0 +ARG IPEX_VERSION=2.1.100 ARG PYTORCH_VERSION=2.1.0 ARG TORCHAUDIO_VERSION=2.1.0 ARG TORCHVISION_VERSION=0.16.0 diff --git a/examples/cpu/inference/python/llm/Dockerfile b/examples/cpu/inference/python/llm/Dockerfile new file mode 100644 index 000000000..b11ef7680 --- /dev/null +++ b/examples/cpu/inference/python/llm/Dockerfile @@ -0,0 +1,64 @@ +# NOTE: To build this you will need a docker version >= 19.03 and DOCKER_BUILDKIT=1 +# +# If you do not use buildkit you are not going to have a good time +# +# For reference: +# https://docs.docker.com/develop/develop-images/build_enhancements/ + +ARG BASE_IMAGE=ubuntu:22.04 +FROM ${BASE_IMAGE} AS base +RUN apt update && \ + apt full-upgrade -y && \ + DEBIAN_FRONTEND=noninteractive apt install -y \ + sudo \ + numactl \ + wget \ + vim \ + git \ + gcc-12 \ + g++-12 \ + make \ + curl && \ + rm -rf /var/lib/apt/lists/* && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-12 100 && \ + update-alternatives --install /usr/bin/cc cc /usr/bin/gcc 100 && \ + update-alternatives --install /usr/bin/c++ c++ /usr/bin/g++ 100 +RUN useradd -m ubuntu && \ + echo 'ubuntu ALL=(ALL) NOPASSWD: ALL' >> /etc/sudoers +WORKDIR /home/ubuntu + +FROM base AS dev +COPY . /home/ubuntu/llm +RUN chown -R ubuntu:ubuntu /home/ubuntu/llm && \ + rm /home/ubuntu/llm/Dockerfile + +USER ubuntu + +RUN curl -fsSL -v -o miniconda.sh -O https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash miniconda.sh -b -p ~/miniconda3 && \ + rm miniconda.sh && \ + echo "source ~/miniconda3/bin/activate" >> ~/.bashrc + +RUN . ~/miniconda3/bin/activate && \ + conda create -y -n compile_py310 python=3.10 && \ + conda activate compile_py310 && \ + cd llm && \ + bash tools/env_setup.sh 2 && \ + conda deactivate && \ + conda remove -y -n compile_py310 --all && \ + conda create -y -n py310 python=3.10 && \ + conda activate py310 && \ + bash tools/env_setup.sh 1 && \ + echo "conda activate py310" >> ~/.bashrc + +FROM base AS deploy +USER ubuntu +COPY --from=dev /home/ubuntu/miniconda3 /home/ubuntu/miniconda3 +COPY --from=dev /home/ubuntu/llm /home/ubuntu/llm +COPY --from=dev /home/ubuntu/.bashrc /home/ubuntu/.bashrc +RUN sudo chown -R ubuntu:ubuntu ~/miniconda3 ~/llm ~/.bashrc && \ + sudo mv ~/llm/oneCCL/build/_install /opt/oneCCL && \ + sudo chown -R root:root /opt/oneCCL && \ + rm -rf ~/llm/oneCCL && \ + sed -i "s|ONECCL_PATH=.*|ONECCL_PATH=/opt/oneCCL|" ~/llm/tools/env_activate.sh diff --git a/examples/cpu/inference/python/llm/README.md b/examples/cpu/inference/python/llm/README.md index 477448262..9ba7c57cc 100644 --- a/examples/cpu/inference/python/llm/README.md +++ b/examples/cpu/inference/python/llm/README.md @@ -1,103 +1,80 @@ # Text Generation + We provide the inference benchmarking scripts for large language models text generation.
Support large language model families, including GPT-J, LLaMA, GPT-Neox, OPT, Falcon, CodeGen.
The scripts include both single instance and distributed (DeepSpeed) use cases.
The scripts cover model generation inference with low precions cases for different models with best perf and accuracy (bf16 AMP,static quantization and weight only quantization).
-# Setup +# Supported Model List + +| MODEL FAMILY | Verified < MODEL ID > (Huggingface hub)| FP32/BF16 | Weight only quantzation INT8 | Weight only quantization INT4| Static quantization INT8 | +|---|:---:|:---:|:---:|:---:|:---:| +|LLAMA| "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-13b-hf", "meta-llama/Llama-2-70b-hf" | ✅ | ✅ | ✅ | ✅ | +|GPT-J| "EleutherAI/gpt-j-6b" | ✅ | ✅ | ✅ | ✅ | +|GPT-NEOX| "EleutherAI/gpt-neox-20b" | ✅ | ✅ | ✅ | ❎ \*\* | +|FALCON\*|"tiiuae/falcon-40b" | ✅ | ✅ | ✅ | ❎ \*\*| +|OPT|"facebook/opt-30b", "facebook/opt-1.3b"| ✅ | ✅ | ✅ | ❎ \*\*| +|CodeGen|"Salesforce/codegen-2B-multi"| ✅ | ✅ | ✅ | ❎ \*\*| + +\* For Falcon models from remote hub, we need to modify the config.json to use the modeling_falcon.py in transformers. Therefore, in the following scripts, we need to pass an extra configuration file like "--config-file=model_config/tiiuae_falcon-40b_config.json". This is optional for FP32/BF16 but needed for quantizations. + +\*\* For GPT-NEOX/FALCON/OPT/CodeGen models, the accuracy recipes of static quantization INT8 are not ready thus they will be skipped in our coverage. + +*Note*: The above verified models (including other models in the same model family, like "codellama/CodeLlama-7b-hf" from LLAMA family) are well supported with all optimizations like indirect access KV cache, fused ROPE, and prepacked TPP Linear (fp32/bf16). For other LLM model families, we are working in progress to cover those optimizations, which will expand the model list above. + +# Environment Setup + +1. Get the Intel® Extension for PyTorch\* source code + ```bash -WORK_DIR=$PWD -# GCC 12.3 is required, please set it firstly -# Create environment (conda recommended) -conda create -n llm python=3.9 -y -# install deps -conda install cmake ninja mkl mkl-include -y -conda install gperftools -c conda-forge -y - -# Install PyTorch 2.1 release -python -m pip install torch==2.1 --index-url https://download.pytorch.org/whl/cpu - -# Install IPEX 2.1 release -python -m pip install intel_extension_for_pytorch - -# Used for accuracy test only -git clone https://github.com/EleutherAI/lm-evaluation-harness -cd lm-evaluation-harness -pip install -e . - -# Install transformers -pip install transformers==4.31.0 -# Install others deps -pip install cpuid accelerate datasets sentencepiece protobuf==3.20.3 - -# Setup environment variables for performance on Xeon -export LD_PRELOAD=${CONDA_PREFIX}/lib/libstdc++.so.6 -export KMP_BLOCKTIME=INF -export KMP_TPAUSE=0 -export KMP_SETTINGS=1 -export KMP_FORJOIN_BARRIER_PATTERN=dist,dist -export KMP_PLAIN_BARRIER_PATTERN=dist,dist -export KMP_REDUCTION_BARRIER_PATTERN=dist,dist -export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so # Intel OpenMP -# Tcmalloc is a recommended malloc implementation that emphasizes fragmentation avoidance and scalable concurrency support. -export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so - -# [Optional] install neural-compressor for GPT-J static quantization and running GPTQ (see below) -pip install neural-compressor==2.3.1 - -# [Optional] The following is only for DeepSpeed case -#Install oneccl-bind-pt(also named torch-ccl) -git clone https://github.com/intel/torch-ccl.git -cd torch-ccl && git checkout v2.1.0+cpu -git submodule sync && git submodule update --init --recursive -python setup.py install -cd ../ -#Install DeepSpeed -git clone https://github.com/delock/DeepSpeedSYCLSupport -cd DeepSpeedSYCLSupport -git checkout gma/run-opt-branch -python -m pip install -r requirements/requirements.txt -python setup.py install -cd ../ -#Install OneCCL -git clone https://github.com/oneapi-src/oneCCL.git -cd oneCCL -mkdir build -cd build -cmake .. -make -j install -source _install/env/setvars.sh -cd ../.. +git clone https://github.com/intel/intel-extension-for-pytorch.git +cd intel-extension-for-pytorch +git checkout v2.1.100+cpu +cd examples/cpu/inference/python/llm +``` -# Get the sample prompt.json -# Make sure the downloaded prompt.json file is under the same directory as that of the python scripts mentioned above. -wget https://intel-extension-for-pytorch.s3.amazonaws.com/miscellaneous/llm/prompt.json +2.a. It is highly recommended to build a Docker container from the provided `Dockerfile`. +```bash +# Build an image with the provided Dockerfile +docker build -t ipex-llm:2.1.100 . + +# Run the container with command below +docker run --rm -it --privileged ipex-llm:2.1.100 bash + +# When the command prompt shows inside the docker container, enter llm examples directory +cd llm ``` -# Supported Model List +2.b. Alternatively, you can take advantage of a provided environment configuration script to setup an environment without using a docker container. -| MODEL FAMILY | Verified < MODEL ID > (Huggingface hub)| FP32/BF16 | Weight only quantzation INT8 | Weight only quantization INT4| Static quantization INT8 | -|---|:---:|:---:|:---:|:---:|:---:| -|LLAMA| "meta-llama/Llama-2-7b-hf", "meta-llama/Llama-2-13b-hf", "meta-llama/Llama-2-70b-hf" | ✅ | ✅ | ✅ | ✅ | -|GPT-J| "EleutherAI/gpt-j-6b" | ✅ | ✅ | ✅ | ✅ | -|GPT-NEOX| "EleutherAI/gpt-neox-20b" | ✅ | ✅ | ✅ | ❎ ** | -|FALCON*|"tiiuae/falcon-40b" | ✅ | ✅ | ✅ | ❎ **| -|OPT|"facebook/opt-30b", "facebook/opt-1.3b"| ✅ | ✅ | ✅ | ❎ **| -|CodeGen|"Salesforce/codegen-2B-multi"| ✅ | ✅ | ✅ | ❎ **| +```bash +# GCC 12.3 is required. Installation can be taken care of by the environment configuration script. +# Create a conda environment +conda create -n llm python=3.9 -y +conda activate llm -*For Falcon models from remote hub, we need to modify the config.json to use the modeling_falcon.py in transformers. Therefore, in the following scripts, we need to pass an extra configuration file like "--config-file=model_config/tiiuae_falcon-40b_config.json". This is optional for FP32/BF16 but needed for quantizations. +# Setup the environment with the provided script +bash ./tools/env_setup.sh +``` -** For GPT-NEOX/FALCON/OPT/CodeGen models, the accuracy recipes of static quantization INT8 are not ready thus they will be skipped in our coverage. +3. Once an environment is configured with either method above, set necessary environment variables with an environment variables activation script and download the sample `prompt.json`. -*Note*: The above verified models (including other models in the same model family, like "codellama/CodeLlama-7b-hf" from LLAMA family) are well supported with all optimizations like indirect access KV cache, fused ROPE, and prepacked TPP Linear (fp32/bf16). For other LLM model families, we are working in progress to cover those optimizations, which will expand the model list above. +```bash +# Activate environment variables +source ./tools/env_activate.sh + +# Get the sample prompt.json +wget https://intel-extension-for-pytorch.s3.amazonaws.com/miscellaneous/llm/prompt.json + +``` # Run Models Generations -| Benchmark mode | FP32/BF16 | Weight only quantzation INT8 | Weight only quantization INT4 | Static quantization INT8 | +| Benchmark mode | FP32/BF16 | Weight only quantzation INT8 | Weight only quantization INT4 | Static quantization INT8 | |---|:---:|:---:|:---:|:---:| -|Single instance | ✅ | ✅ | ✅ | ✅ | -| Distributed (autotp) | ✅ | ✅ | ❎ | ❎ | +|Single instance | ✅ | ✅ | ✅ | ✅ | +| Distributed (autotp) | ✅ | ✅ | ❎ | ❎ | You can run LLM with a one-click Python script "run.py" for all inference cases. ``` @@ -107,7 +84,7 @@ python run.py --help # for more detailed usages ### Single Instance Performance ```bash # Get prompt file to the path of scripts -mv PATH/TO/prompt.json ./single_instance +cp prompt.json ./single_instance export WORK_DIR=./ # bf16 benchmark @@ -134,7 +111,7 @@ Notes: ### Distributed Performance with DeepSpeed (autoTP) ```bash # Get prompt file to the path of scripts -mv PATH/TO/prompt.json ./distributed +cp prompt.json ./distributed export WORK_DIR=./ unset KMP_AFFINITY @@ -158,13 +135,13 @@ Notes: # Get prompt file to the path of scripts export WORK_DIR=./ cd single_instance -mv PATH/TO/prompt.json ./ +cp PATH/TO/prompt.json ./ # bfloat16 benchmark OMP_NUM_THREADS= numactl -m -C python run_generation.py --benchmark -m --dtype bfloat16 --ipex --deployment-mode # quantization benchmark #To run quantization performance, you need to firstly get the quantized model with the following step (1) and then run the performance benchmark with the following step (2) -## (1) Do quantization to get the quantized model +## (1) Do quantization to get the quantized model ## note: llama/gptj we have both IPEX smooth quant and weight-only-quantization, while for rest models, we recommend weight-only-quantization mkdir saved_results @@ -180,7 +157,7 @@ python run_gpt-neox_quantization.py --ipex-weight-only-quantization --output-dir ## Falcon quantization (example of config-file: utils/model_config/tiiuae_falcon-40b_config.json) python run_falcon_quantization.py --ipex-weight-only-quantization --output-dir "saved_results" --int8-bf16-mixed -m --config-file ## OPT quantization -python run_opt_quantization.py --ipex-weight-only-quantization --output-dir "saved_results" --int8-bf16-mixed -m +python run_opt_quantization.py --ipex-weight-only-quantization --output-dir "saved_results" --int8-bf16-mixed -m ## CodeGen quantization python run_codegen_quantization.py --ipex-weight-only-quantization --output-dir "saved_results" --int8-bf16-mixed -m @@ -289,7 +266,7 @@ deepspeed --bind_cores_to_rank run_generation_with_deepspeed.py --benchmark -m E ## Distributed Accuracy with DeepSpeed (autoTP) ```bash -# Run distributed accuracy with 2 ranks of one node for bfloat16 with ipex and jit +# Run distributed accuracy with 2 ranks of one node for bfloat16 with ipex and jit source ${ONECCL_DIR}/build/_install/env/setvars.sh export LD_PRELOAD=${CONDA_PREFIX}/lib/libiomp5.so:${CONDA_PREFIX}/lib/libtcmalloc.so diff --git a/examples/cpu/inference/python/llm/tools/env_activate.sh b/examples/cpu/inference/python/llm/tools/env_activate.sh new file mode 100644 index 000000000..e37c9967e --- /dev/null +++ b/examples/cpu/inference/python/llm/tools/env_activate.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Setup environment variables for performance on Xeon +export KMP_BLOCKTIME=INF +export KMP_TPAUSE=0 +export KMP_SETTINGS=1 +export KMP_FORJOIN_BARRIER_PATTERN=dist,dist +export KMP_PLAIN_BARRIER_PATTERN=dist,dist +export KMP_REDUCTION_BARRIER_PATTERN=dist,dist + +env | grep CONDA_PREFIX > /dev/null +if [ $? -eq 0 ]; then + export LD_PRELOAD=${CONDA_PREFIX}/lib/libstdc++.so.6 + export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libiomp5.so # Intel OpenMP + # Tcmalloc is a recommended malloc implementation that emphasizes fragmentation avoidance and scalable concurrency support. + export LD_PRELOAD=${LD_PRELOAD}:${CONDA_PREFIX}/lib/libtcmalloc.so +else + echo "Conda environment is not available. You need to set environment variable LD_PRELOAD to dynamic libraries of Intel OpenMP and TcMalloc manually." +fi + +ONECCL_PATH=./oneCCL/build/_install +if [ ! -d ${ONECCL_PATH} ]; then + echo "oneCCL is not available." +else + source ${ONECCL_PATH}/env/setvars.sh +fi diff --git a/examples/cpu/inference/python/llm/tools/env_setup.sh b/examples/cpu/inference/python/llm/tools/env_setup.sh new file mode 100644 index 000000000..492c47d69 --- /dev/null +++ b/examples/cpu/inference/python/llm/tools/env_setup.sh @@ -0,0 +1,143 @@ +#!/usr/bin/env bash +set -e + +LM_EVA_COMMIT=cc9778fbe4fa1a709be2abed9deb6180fd40e7e2 +# gma/run-opt-branch +DS_SYCL_COMMIT=57ff508ea592ff752fd323b383c32177d5bce7b5 +ONECCL_COMMIT=bfc879266e870b732bd165e399897419c44ad13d +VER_TORCH=2.1.0+cpu +VER_IPEX=2.1.100 +VER_TORCHCCL=2.1.0+cpu +VER_GCC=12.3.0 +AUX_INSTALL_SCRIPT=aux_install.sh + +# Mode: Select to compile projects into wheel files or install wheel files compiled. +# High bit: 8 7 6 5 4 3 2 1 :Low bit +# | | | | | | | └- Install wheel files +# | | | | | | └--- Compile wheel files +# | | | | | └----- Undefined +# | | | | └------- Undefined +# | | | └--------- Undefined +# | | └----------- Undefined +# | └------------- Undefined +# └--------------- Undefined +MODE=0x03 +if [ $# -gt 0 ]; then + if [[ ! $1 =~ ^[0-9]+$ ]] && [[ ! $1 =~ ^0x[0-9a-fA-F]+$ ]]; then + echo "Warning: Unexpected argument. Using default value." + else + MODE=$1 + fi +fi +if [ ! -f ${AUX_INSTALL_SCRIPT} ] || + [ ! -d lm-evaluation-harness ] || + [ ! -d DeepSpeedSYCLSupport ]; then + (( MODE |= 0x02 )) +fi + +if [ $((${MODE} & 0x02)) -ne 0 ]; then + # Check existance of required Linux commands + for CMD in conda gcc g++ make git; do + command -v ${CMD} > /dev/null || (echo "Error: Command \"${CMD}\" is required."; exit 1;) + done + echo "#!/usr/bin/env bash" > ${AUX_INSTALL_SCRIPT} + + function ver_compare() { + VER_MAJOR_CUR=$(echo $1 | cut -d "." -f 1) + VER_MINOR_CUR=$(echo $1 | cut -d "." -f 2) + VER_PATCH_CUR=$(echo $1 | cut -d "." -f 3) + VER_MAJOR_REQ=$(echo $2 | cut -d "." -f 1) + VER_MINOR_REQ=$(echo $2 | cut -d "." -f 2) + VER_PATCH_REQ=$(echo $2 | cut -d "." -f 3) + RET=0 + if [[ ${VER_MAJOR_CUR} -lt ${VER_MAJOR_REQ} ]]; then + RET=1 + else + if [[ ${VER_MAJOR_CUR} -eq ${VER_MAJOR_REQ} ]] && + [[ ${VER_MINOR_CUR} -lt ${VER_MINOR_REQ} ]]; then + RET=2 + else + if [[ ${VER_MAJOR_CUR} -eq ${VER_MAJOR_REQ} ]] && + [[ ${VER_MINOR_CUR} -eq ${VER_MINOR_REQ} ]] && + [[ ${VER_PATCH_CUR} -lt ${VER_PATCH_REQ} ]]; then + RET=3 + fi + fi + fi + echo ${RET} + } + VER_COMP=$(ver_compare $(gcc -dumpfullversion) ${VER_GCC}) + if [ ${VER_COMP} -ne 0 ]; then + echo -e '\a' + echo "Warning: GCC version equal to or newer than ${VER_GCC} is required." + echo " Found GCC version $(gcc -dumpfullversion)" + echo " Installing gcc and g++ 12.3 with conda" + echo "" + conda install -y gcc==12.3 gxx==12.3 cxx-compiler -c conda-forge + conda update -y sysroot_linux-64 + export CC=${CONDA_PREFIX}/bin/gcc + export CXX=${CONDA_PREFIX}/bin/g++ + export PATH=${CONDA_PREFIX}/bin:${PATH} + export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} + fi + + # Install deps + conda install -y cmake ninja mkl mkl-include + + # Install PyTorch and Intel® Extension for PyTorch* + python -m pip install torch==${VER_TORCH} --index-url https://download.pytorch.org/whl/cpu + python -m pip install intel-extension-for-pytorch==${VER_IPEX} + echo "python -m pip install torch==${VER_TORCH} --index-url https://download.pytorch.org/whl/cpu" >> ${AUX_INSTALL_SCRIPT} + echo "python -m pip install intel-extension-for-pytorch==${VER_IPEX}" >> ${AUX_INSTALL_SCRIPT} + + # Used for accuracy test only + if [ -d lm-evaluation-harness ]; then + rm -rf lm-evaluation-harness + fi + git clone https://github.com/EleutherAI/lm-evaluation-harness + cd lm-evaluation-harness + git checkout ${LM_EVA_COMMIT} + python setup.py bdist_wheel + cd .. + + # The following is only for DeepSpeed case + #Install oneccl-bind-pt(also named torch-ccl) + python -m pip install oneccl-bind-pt==${VER_IPEX} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/ + echo "python -m pip install oneccl-bind-pt==${VER_IPEX} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/cpu/us/" >> ${AUX_INSTALL_SCRIPT} + + # Install DeepSpeed + if [ -d DeepSpeedSYCLSupport ]; then + rm -rf DeepSpeedSYCLSupport + fi + git clone https://github.com/delock/DeepSpeedSYCLSupport + cd DeepSpeedSYCLSupport + git checkout ${DS_SYCL_COMMIT} + python -m pip install -r requirements/requirements.txt + python setup.py bdist_wheel + cd .. + + # Install OneCCL + if [ -d oneCCL ]; then + rm -rf oneCCL + fi + git clone https://github.com/oneapi-src/oneCCL.git + cd oneCCL + git checkout ${ONECCL_COMMIT} + mkdir build + cd build + cmake .. + make -j install + cd ../.. +fi +if [ $((${MODE} & 0x01)) -ne 0 ]; then + conda install -y mkl + conda install -y gperftools -c conda-forge + bash ${AUX_INSTALL_SCRIPT} + python -m pip install cpuid accelerate datasets sentencepiece protobuf==3.20.3 transformers==4.31.0 neural-compressor==2.3.1 + python -m pip install lm-evaluation-harness/dist/*.whl + python -m pip install DeepSpeedSYCLSupport/dist/*.whl + + rm ${AUX_INSTALL_SCRIPT} + rm -rf lm-evaluation-harness + rm -rf DeepSpeedSYCLSupport +fi diff --git a/scripts/compile_bundle.sh b/scripts/compile_bundle.sh index 87237e979..cdb20f023 100644 --- a/scripts/compile_bundle.sh +++ b/scripts/compile_bundle.sh @@ -1,15 +1,74 @@ #!/bin/bash -set -x -set -e +set -eo pipefail -VER_LLVM="llvmorg-16.0.6" -VER_IPEX="v2.1.0+cpu" +VER_TORCH=2.1.0 +VER_TORCHVISION=0.16.0 +VER_TORCHAUDIO=2.1.0 +VER_LLVM=llvmorg-16.0.6 +VER_IPEX=v2.1.100+cpu +VER_GCC=12.3.0 + +# Mode: Select which components to install. PyTorch and Intel® Extension for PyTorch* are always installed. +# High bit: 8 7 6 5 4 3 2 1 :Low bit +# | | | | | | | └- TorchAudio +# | | | | | | └--- TorchVision +# | | | | | └----- Rebuild LLVM +# | | | | └------- Undefined +# | | | └--------- Undefined +# | | └----------- Undefined +# | └------------- Undefined +# └--------------- Undefined +MODE=0x03 +if [ $# -gt 0 ]; then + if [[ ! $1 =~ ^[0-9]+$ ]] && [[ ! $1 =~ ^0x[0-9a-fA-F]+$ ]]; then + echo "Warning: Unexpected argument. Using default value." + else + MODE=$1 + fi +fi # Check existance of required Linux commands -for CMD in conda git nproc make; do - command -v ${CMD} || (echo "Error: Command \"${CMD}\" not found." ; exit 4) +for CMD in conda git nproc gcc g++ make; do + command -v ${CMD} > /dev/null || (echo "Error: Command \"${CMD}\" not found." ; exit 1) done +function ver_compare() { + VER_MAJOR_CUR=$(echo $1 | cut -d "." -f 1) + VER_MINOR_CUR=$(echo $1 | cut -d "." -f 2) + VER_PATCH_CUR=$(echo $1 | cut -d "." -f 3) + VER_MAJOR_REQ=$(echo $2 | cut -d "." -f 1) + VER_MINOR_REQ=$(echo $2 | cut -d "." -f 2) + VER_PATCH_REQ=$(echo $2 | cut -d "." -f 3) + RET=0 + if [[ ${VER_MAJOR_CUR} -lt ${VER_MAJOR_REQ} ]]; then + RET=1 + else + if [[ ${VER_MAJOR_CUR} -eq ${VER_MAJOR_REQ} ]] && + [[ ${VER_MINOR_CUR} -lt ${VER_MINOR_REQ} ]]; then + RET=2 + else + if [[ ${VER_MAJOR_CUR} -eq ${VER_MAJOR_REQ} ]] && + [[ ${VER_MINOR_CUR} -eq ${VER_MINOR_REQ} ]] && + [[ ${VER_PATCH_CUR} -lt ${VER_PATCH_REQ} ]]; then + RET=3 + fi + fi + fi + echo ${RET} +} +VER_COMP=$(ver_compare $(gcc -dumpfullversion) ${VER_GCC}) +GCC_CONDA=0 +if [ ${VER_COMP} -ne 0 ]; then + echo -e '\a' + echo "Warning: GCC version equal to or newer than ${VER_GCC} is required." + echo " Found GCC version $(gcc -dumpfullversion)" + echo " Installing gcc and g++ 12.3 with conda" + echo "" + GCC_CONDA=1 + conda install -y gcc==12.3 gxx==12.3 cxx-compiler -c conda-forge + conda update -y sysroot_linux-64 +fi + MAX_JOBS_VAR=$(nproc) if [ ! -z "${MAX_JOBS}" ]; then MAX_JOBS_VAR=${MAX_JOBS} @@ -28,14 +87,22 @@ fi # Checkout required branch/commit and update submodules cd llvm-project -if [ ! -z ${VER_LLVM} ]; then +if [ ! -z "${VER_LLVM}" ]; then + git stash > /dev/null + git clean -fd > /dev/null + git checkout main > /dev/null + git pull > /dev/null git checkout ${VER_LLVM} fi git submodule sync git submodule update --init --recursive cd .. cd intel-extension-for-pytorch -if [ ! -z ${VER_IPEX} ]; then +if [ ! -z "${VER_IPEX}" ]; then + git stash > /dev/null + git clean -fd > /dev/null + git checkout main > /dev/null + git pull > /dev/null git checkout ${VER_IPEX} fi git submodule sync @@ -43,38 +110,49 @@ git submodule update --init --recursive cd .. # Install dependencies -conda install -y gcc==12.3 gxx==12.3 cxx-compiler -c conda-forge -conda update -y sysroot_linux-64 python -m pip install cmake -python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu +python -m pip uninstall -y torch torchvision torchaudio intel-extension-for-pytorch +python -m pip install torch==${VER_TORCH} --index-url https://download.pytorch.org/whl/cpu +if [ $((${MODE} & 0x02)) -ne 0 ]; then + python -m pip install torchvision==${VER_TORCHVISION} --index-url https://download.pytorch.org/whl/cpu +fi +if [ $((${MODE} & 0x01)) -ne 0 ]; then + python -m pip install torchaudio==${VER_TORCHAUDIO} --index-url https://download.pytorch.org/whl/cpu +fi ABI=$(python -c "import torch; print(int(torch._C._GLIBCXX_USE_CXX11_ABI))") # Compile individual component -export CC=${CONDA_PREFIX}/bin/gcc -export CXX=${CONDA_PREFIX}/bin/g++ -export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} +if [[ ${GCC_CONDA} -eq 1 ]]; then + export CC=${CONDA_PREFIX}/bin/gcc + export CXX=${CONDA_PREFIX}/bin/g++ + export LD_LIBRARY_PATH=${CONDA_PREFIX}/lib:${LD_LIBRARY_PATH} +fi # LLVM -cd llvm-project -LLVM_ROOT="$(pwd)/release" -if [ -d ${LLVM_ROOT} ]; then - rm -rf ${LLVM_ROOT} +LLVM_ROOT="$(pwd)/llvm-release" +if [ $((${MODE} & 0x04)) -ne 0 ]; then + if [ -d ${LLVM_ROOT} ]; then + rm -rf ${LLVM_ROOT} + fi fi +cd llvm-project if [ -d build ]; then rm -rf build fi -mkdir build -cd build -echo "***************************** cmake *****************************" > ../build.log -cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=${ABI}" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_BENCHMARKS=OFF ../llvm 2>&1 | tee -a ../build.log -echo "***************************** build *****************************" >> ../build.log -cmake --build . -j ${MAX_JOBS_VAR} 2>&1 | tee -a ../build.log -echo "**************************** install ****************************" >> ../build.log -cmake -DCMAKE_INSTALL_PREFIX=${LLVM_ROOT} -P cmake_install.cmake 2>&1 | tee -a ../build.log -#xargs rm -rf < install_manifest.txt -cd .. -rm -rf build -ln -s ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13 +if [ ! -d ${LLVM_ROOT} ]; then + mkdir build + cd build + echo "***************************** cmake *****************************" > ../build.log + cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=${ABI}" -DLLVM_TARGETS_TO_BUILD=X86 -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_INCLUDE_EXAMPLES=OFF -DLLVM_INCLUDE_BENCHMARKS=OFF ../llvm 2>&1 | tee -a ../build.log + echo "***************************** build *****************************" >> ../build.log + cmake --build . -j ${MAX_JOBS_VAR} 2>&1 | tee -a ../build.log + echo "**************************** install ****************************" >> ../build.log + cmake -DCMAKE_INSTALL_PREFIX=${LLVM_ROOT} -P cmake_install.cmake 2>&1 | tee -a ../build.log + #xargs rm -rf < install_manifest.txt + cd .. + rm -rf build + ln -s ${LLVM_ROOT}/bin/llvm-config ${LLVM_ROOT}/bin/llvm-config-13 +fi export PATH=${LLVM_ROOT}/bin:$PATH export LD_LIBRARY_PATH=${LLVM_ROOT}/lib:$LD_LIBRARY_PATH cd .. @@ -90,11 +168,34 @@ python setup.py bdist_wheel 2>&1 | tee build.log export CXXFLAGS=${CXXFLAGS_BK} unset DNNL_GRAPH_BUILD_COMPILER_BACKEND unset LLVM_DIR -python -m pip install --force-reinstall dist/*.whl +python -m pip uninstall -y mkl-static mkl-include +python -m pip install dist/*.whl cd .. # Sanity Test -set +x -export LD_PRELOAD=${CONDA_PREFIX}/lib/libstdc++.so -echo "Note: Should you experience \"version \`GLIBCXX_N.N.NN' not found\" error, run command \"export LD_PRELOAD=${CONDA_PREFIX}/lib/libstdc++.so\" and try again." -python -c "import torch; import torchvision; import torchaudio; import intel_extension_for_pytorch as ipex; print(f'torch_cxx11_abi: {torch._C._GLIBCXX_USE_CXX11_ABI}'); print(f'torch_version: {torch.__version__}'); print(f'torchvision_version: {torchvision.__version__}'); print(f'torchaudio_version: {torchaudio.__version__}'); print(f'ipex_version: {ipex.__version__}');" +if [ ! -z ${CONDA_PREFIX} ]; then + LIBSTDCPP_SYS=$(find /usr -regextype sed -regex ".*libstdc++\.so\.[[:digit:]]*\.[[:digit:]]*\.[[:digit:]]*") + LIBSTDCPP_CONDA=$(find ${CONDA_PREFIX}/lib -regextype sed -regex ".*libstdc++\.so\.[[:digit:]]*\.[[:digit:]]*\.[[:digit:]]*") + LIBSTDCPP_VER_SYS=$(echo ${LIBSTDCPP_SYS} | sed "s/.*libstdc++.so.//") + LIBSTDCPP_VER_CONDA=$(echo ${LIBSTDCPP_CONDA} | sed "s/.*libstdc++.so.//") + VER_COMP=$(ver_compare ${LIBSTDCPP_VER_CONDA} ${LIBSTDCPP_VER_SYS}) + LIBSTDCPP_ACTIVE="" + if [[ ${VER_COMP} -gt 0 ]]; then + LIBSTDCPP_ACTIVE=${LIBSTDCPP_SYS} + else + LIBSTDCPP_ACTIVE=${LIBSTDCPP_CONDA} + fi + export LD_PRELOAD=${LIBSTDCPP_ACTIVE} + echo "======================================================" + echo "Note: Set environment variable \"export LD_PRELOAD=${LIBSTDCPP_ACTIVE}\" to avoid the \"version \`GLIBCXX_N.N.NN' not found\" error." + echo "======================================================" +fi +CMD="import torch; print(f'torch_cxx11_abi: {torch._C._GLIBCXX_USE_CXX11_ABI}'); print(f'torch_version: {torch.__version__}');" +if [ $((${MODE} & 0x02)) -ne 0 ]; then + CMD="${CMD} import torchvision; print(f'torchvision_version: {torchvision.__version__}');" +fi +if [ $((${MODE} & 0x01)) -ne 0 ]; then + CMD="${CMD} import torchaudio; print(f'torchaudio_version: {torchaudio.__version__}');" +fi +CMD="${CMD} import intel_extension_for_pytorch as ipex; print(f'ipex_version: {ipex.__version__}');" +python -c "${CMD}"