Added build.sh to manually build multi-platform images

OpenCSGs · Oct 14, 2024 · 34162cd · 34162cd
1 parent eedcf1c
commit 34162cd
Show file tree

Hide file tree

Showing 8 changed files with 323 additions and 103 deletions.
diff --git a/docker/README.md b/docker/README.md
@@ -0,0 +1,22 @@
+# CSGHUB Server Base Images Building 
+
+## Login Container Registry
+```bash
+OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
+OPENCSG_ACR_USERNAME=""
+OPENCSG_ACR_PASSWORD=""
+echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
+```
+
+## Build Multi-Platform Images
+```bash
+export BUILDX_NO_DEFAULT_ATTESTATIONS=1
+export IMAGE_TAG=1.0
+docker buildx build --platform linux/amd64,linux/arm64 \
+  -t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-${IMAGE_TAG} \
+  -t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-latest \
+  -f Dockerfile.nginx \
+  --push .
+```
+*The above command will create `linux/amd64` and `linux/arm64` images with the tags `base-${IMAGE_TAG}` and `base-latest` at the same time.*
+
diff --git a/docker/finetune/Dockerfile.llamafactory b/docker/finetune/Dockerfile.llamafactory
@@ -1,55 +1,57 @@
-# pull from devel image instead of base
+# Pull from devel image instead of base
 FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
+
 # Set bash as the default shell
-ENV SHELL=/bin/bash
-ENV JUPYTERHUB_SERVICE_PREFIX=/proxy/
-ENV GRADIO_ROOT_PATH=/proxy/7860/
-ENV TZ=Asia/Shanghai
-ENV NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1
-ENV HF_HOME=/workspace/.cache
+ENV SHELL=/bin/bash \
+    JUPYTERHUB_SERVICE_PREFIX=/proxy/ \
+    GRADIO_ROOT_PATH=/proxy/7860/ \
+    TZ=Asia/Shanghai \
+    NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 \
+    HF_HOME=/workspace/.cache \
+    DEBIAN_FRONTEND=noninteractive
 
 # Build with some basic utilities
-RUN apt-get update && apt-get install -y \
-    python3-pip apt-utils \
-    wget curl vim \
-    git git-lfs \
-    supervisor \
-    unzip
-# set timezone
-ARG DEBIAN_FRONTEND=noninteractive
-RUN apt-get install -y tzdata \
-    && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
-    && echo $TZ > /etc/timezone \
-    && dpkg-reconfigure -f noninteractive tzdata
-
-# alias python='python3'
-RUN ln -s /usr/bin/python3 /usr/bin/python
-RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    python3-pip apt-utils wget curl vim \
+    git git-lfs supervisor unzip tzdata && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Set timezone
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
+    echo $TZ > /etc/timezone && \
+    dpkg-reconfigure -f noninteractive tzdata
 
 # Install the appropriate torch version 
-#RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121
-RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \
+RUN ln -sf /usr/bin/python3 /usr/bin/python && \
+    pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
+    pip install --no-cache-dir jupyterlab numpy==1.26.4 \
     torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 \
-    jupyter-server-proxy==4.2.0 
+    jupyter-server-proxy==4.2.0
+
 # Create a working directory
 WORKDIR /etc/csghub
-RUN git clone https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch
-RUN cd LLaMA-Factory && pip install --no-cache-dir -e ".[metrics,deepspeed]"
-# setup supervisord
-RUN mkdir -p /var/log/supervisord
+
+RUN git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch && cd LLaMA-Factory && \
+    pip install --no-cache-dir -e ".[metrics,deepspeed]"
+
+# Setup supervisord
 COPY script/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
 COPY script/jupyter_notebook_config.py /root/.jupyter/jupyter_notebook_config.py
 COPY script/ /etc/csghub/
 COPY script/handlers.py /usr/local/lib/python3.10/dist-packages/jupyter_server_proxy/handlers.py
-RUN chmod +x /etc/csghub/*.sh
-#use dark mode
-RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \
-	    echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \
-	    mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \
-	    echo '{"codeCellConfig":{"lineNumbers":true }}' >   /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings
-#fix gradio proxy issue
-RUN pip uninstall gradio && pip install https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl
+
+RUN mkdir -p /var/log/supervisord && \
+    chmod +x /etc/csghub/*.sh && \
+    mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \
+	echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \
+	mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \
+	echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings
+
+# Fix gradio proxy issue
+RUN pip uninstall -y gradio && \
+    pip install --no-cache-dir https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl
+
 # Create a working directory
 WORKDIR /workspace/
-ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
-EXPOSE 8000
+EXPOSE 8000
+ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
diff --git a/docker/finetune/README.md b/docker/finetune/README.md
@@ -1,30 +1,38 @@
-# CSGHUB finetune images
+# CSGHUB Finetune Images Building
 
-## base image
-https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
-https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags
+## Base Images
+- https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
+- https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags
 
-## build images
+## Login Container Registry
 ```bash
-docker build -f Dockerfile.llamafactory .
+OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
+OPENCSG_ACR_USERNAME=""
+OPENCSG_ACR_PASSWORD=""
+echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
 ```
 
-## push images
-```
-docker login opencsg-registry.cn-beijing.cr.aliyuncs.com
-docker push xxx
-```
-## latest images
-```
-#for llama-factory image
-opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
-```
-## Run image locally
+## Build Multi-Platform Images
+```bash
+export BUILDX_NO_DEFAULT_ATTESTATIONS=1
+export IMAGE_TAG=1.21-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
+docker buildx build --platform linux/amd64,linux/arm64 \
+  -t ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} \
+  -t ${OPENCSG_ACR}/public/llama-factory:latest \
+  -f Dockerfile.llamafactory \
+  --push .
 ```
+*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.*
 
-docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="OpenCSG/csg-wukong-1B"  -e HF_ENDPOINT=https://hub.opencsg.com/hf  opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
-
+## Run Finetune Image Locally
+```bash
+docker run -d \
+  -e ACCESS_TOKEN=xxx \
+  -e REPO_ID="OpenCSG/csg-wukong-1B" \
+  -e HF_ENDPOINT=https://opencsg.com/hf \
+  -p 8000:8000 \
+  ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG}
 ```
-Note: HF_ENDPOINT should be use the real csghub address
+*Note: HF_ENDPOINT should be use the real csghub address.*
 
 
diff --git a/docker/finetune/build.sh b/docker/finetune/build.sh
@@ -0,0 +1,47 @@
+#!/usr/bin/env bash
+
+if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
+    echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE_TAG>"
+    echo "Tag example: 1.22-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2"
+    exit 1
+fi
+
+OS=$(uname -s)
+echo "Enable docker buildx with QEMU for ${OS}"
+if [ "$OS" = "Darwin" ]; then
+    echo "QEMU enabled default..."
+elif [ "$OS" = "Linux" ]; then
+    echo "Install QEMU support..."
+    docker run --privileged --rm tonistiigi/binfmt --install all
+else
+    echo "Unknown OS: $OS"
+fi
+
+export DOCKER_BUILDKIT=1
+export BUILDX_NO_DEFAULT_ATTESTATIONS=1
+DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container)
+if [[ ! -z "$DOCKER_CONTAINERS" ]]; then
+    BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}')
+    docker buildx use ${BUILDER}
+else
+    docker buildx create --name container-builder --driver docker-container --use --bootstrap
+fi
+
+OPENCSG_ACR_USERNAME=$1
+OPENCSG_ACR_PASSWORD=$2
+OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"}
+OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"}
+DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE"
+
+echo "Logging in to OpenCSG ACR..."
+echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin
+
+echo "Building images..."
+export IMAGE_TAG=$3
+docker buildx build --platform linux/amd64,linux/arm64 \
+    -t ${DOCKER_IMAGE_PREFIX}/llama-factory:${IMAGE_TAG} \
+    -t ${DOCKER_IMAGE_PREFIX}/llama-factory:latest \
+    -f Dockerfile.llamafactory \
+    --push .
+
+echo "Done! New image pushed with tag: $NEW_TAG"
diff --git a/docker/inference/README.md b/docker/inference/README.md
@@ -1,34 +1,67 @@
-# CSGHUB inference images
+# CSGHUB Inference Images Building
 
-## build images
+## Login Container Registry
 ```bash
-docker build -f Dockerfile.vllm .
-docker build -f Dockerfile.tgi .
+OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
+OPENCSG_ACR_USERNAME=""
+OPENCSG_ACR_PASSWORD=""
+echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
 ```
 
-## push images
-```
-docker login opencsg-registry.cn-beijing.cr.aliyuncs.com
-docker push xxx
-```
-## latest images
-```
-#for vllm image
-opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
-#for vllm cpu only
-opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3
-#for tgi image
-opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6
-```
-## Run image locally
+## Build Multi-Platform Images
+```bash
+export BUILDX_NO_DEFAULT_ATTESTATIONS=1
+
+# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
+export IMAGE_TAG=2.8
+docker buildx build --platform linux/amd64,linux/arm64 \
+  -t ${OPENCSG_ACR}/public/vllm-local:${IMAGE_TAG} \
+  -t ${OPENCSG_ACR}/public/vllm-local:latest \
+  -f Dockerfile.vllm \
+  --push .
+
+# For vllm cpu only: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3
+export IMAGE_TAG=2.4
+docker buildx build --platform linux/amd64,linux/arm64 \
+  -t ${OPENCSG_ACR}/public/vllm-cpu:${IMAGE_TAG} \
+  -t ${OPENCSG_ACR}/public/vllm-cpu:latest \
+  -f Dockerfile.vllm-cpu \
+  --push .
+
+# For tgi: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi:2.2
+export IMAGE_TAG=2.2
+docker buildx build --platform linux/amd64 \
+  -t ${OPENCSG_ACR}/public/tgi:${IMAGE_TAG} \
+  -t ${OPENCSG_ACR}/public/tgi:latest \
+  -f Dockerfile.tgi \
+  --push .
 ```
-docker run -d -e ACCESS_TOKEN=xxx  -e REPO_ID="xzgan001/csg-wukong-1B" -e HF_ENDPOINT=https://hub-stg.opencsg.com/ --gpus device=1  opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
+*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.*
 
-docker run -d -v llm:/data -e ACCESS_TOKEN=xxx  -e REPO_ID="xzgan001/csg-wukong-1B"  -e HF_ENDPOINT=https://hub-stg.opencsg.com/hf --gpus device=7  opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6
+## Run Inference Image Locally
+```bash
+# Run VLLM
+docker run -d \
+  -e ACCESS_TOKEN=xxx \
+  -e REPO_ID="xzgan001/csg-wukong-1B" \
+  -e HF_ENDPOINT=https://opencsg.com/hf \
+  --gpus device=1 \
+  -p 8000:8000 \
+  ${OPENCSG_ACR}/public/vllm-local:2.8
 
+# Run TGI
+docker run -d \
+  -e ACCESS_TOKEN=xxx  \
+  -e REPO_ID="xzgan001/csg-wukong-1B" \
+  -e HF_ENDPOINT=https://opencsg.com/hf \
+  -v llm:/data \
+  --gpus device=7 \
+  -p 8000:8000
+  ${OPENCSG_ACR}/public/tgi:2.2
 ```
-Note: HF_ENDPOINT should be use the real csghub address
-## API to call inference
+*Note: HF_ENDPOINT should be use the real csghub address.*
+
+## API to Call Inference
 ```
 curl -H "Content-type: application/json" -X POST -d '{
   "model": "/data/xzgan/csg-wukong-1B",
@@ -46,8 +79,8 @@ curl -H "Content-type: application/json" -X POST -d '{
   "max_tokens": 20
 }' http://localhost:8000/v1/chat/completions
 ```
-VLLM and TGI has the same endpoint and request body
-More reference for tgi: 
-https://huggingface.co/docs/text-generation-inference/en/messages_api
-https://huggingface.github.io/text-generation-inference/
+*Note: VLLM and TGI has the same endpoint and request body.*
 
+More reference for TGI: 
+- [Text Generation Inference](https://huggingface.github.io/text-generation-inference/)
+- [Text Generation Inference Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
diff --git a/docker/inference/build.sh b/docker/inference/build.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
+    echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE>"
+    echo -e "IMAGE example: \n  vllm-local:2.9\n  vllm-cpu:2.5\n  tgi:2.3"
+    exit 1
+fi
+
+OS=$(uname -s)
+echo "Enable docker buildx with QEMU for ${OS}"
+if [ "$OS" = "Darwin" ]; then
+    echo "QEMU enabled default..."
+elif [ "$OS" = "Linux" ]; then
+    echo "Install QEMU support..."
+    docker run --privileged --rm tonistiigi/binfmt --install all
+else
+    echo "Unknown OS: $OS"
+fi
+
+export DOCKER_BUILDKIT=1
+export BUILDX_NO_DEFAULT_ATTESTATIONS=1
+DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container)
+if [[ ! -z "$DOCKER_CONTAINERS" ]]; then
+    BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}')
+    docker buildx use ${BUILDER}
+else
+    docker buildx create --name container-builder --driver docker-container --use --bootstrap
+fi
+
+OPENCSG_ACR_USERNAME=$1
+OPENCSG_ACR_PASSWORD=$2
+OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"}
+OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"}
+DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE"
+
+echo "Logging in to OpenCSG ACR..."
+echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin
+
+echo "Building images..."
+export IMAGE=$3
+export PLATFORMS="linux/amd64,linux/arm64"
+case "${IMAGE%:*}" in
+  vllm-local)
+    DOCKERFILE="Dockerfile.vllm"
+    ;;
+  vllm-cpu)
+    DOCKERFILE="Dockerfile.vllm-cpu"
+    ;;
+  tgi)
+    PLATFORMS="linux/amd64"
+    DOCKERFILE="Dockerfile.tgi"
+    ;;
+esac
+
+docker buildx build --platform ${PLATFORMS} \
+    -t ${DOCKER_IMAGE_PREFIX}/${IMAGE} \
+    -t ${DOCKER_IMAGE_PREFIX}/${IMAGE%:*}:latest \
+    -f ${DOCKERFILE} \
+    --push .
+
+echo "Done! New image pushed with tag: $NEW_TAG"