Skip to content

Commit

Permalink
Added build.sh to manually build multi-platform images
Browse files Browse the repository at this point in the history
  • Loading branch information
mason committed Oct 14, 2024
1 parent eedcf1c commit 34162cd
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 103 deletions.
22 changes: 22 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# CSGHUB Server Base Images Building

## Login Container Registry
```bash
OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
OPENCSG_ACR_USERNAME=""
OPENCSG_ACR_PASSWORD=""
echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
```

## Build Multi-Platform Images
```bash
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
export IMAGE_TAG=1.0
docker buildx build --platform linux/amd64,linux/arm64 \
-t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-${IMAGE_TAG} \
-t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-latest \
-f Dockerfile.nginx \
--push .
```
*The above command will create `linux/amd64` and `linux/arm64` images with the tags `base-${IMAGE_TAG}` and `base-latest` at the same time.*

82 changes: 42 additions & 40 deletions docker/finetune/Dockerfile.llamafactory
Original file line number Diff line number Diff line change
@@ -1,55 +1,57 @@
# pull from devel image instead of base
# Pull from devel image instead of base
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04

# Set bash as the default shell
ENV SHELL=/bin/bash
ENV JUPYTERHUB_SERVICE_PREFIX=/proxy/
ENV GRADIO_ROOT_PATH=/proxy/7860/
ENV TZ=Asia/Shanghai
ENV NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1
ENV HF_HOME=/workspace/.cache
ENV SHELL=/bin/bash \
JUPYTERHUB_SERVICE_PREFIX=/proxy/ \
GRADIO_ROOT_PATH=/proxy/7860/ \
TZ=Asia/Shanghai \
NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 \
HF_HOME=/workspace/.cache \
DEBIAN_FRONTEND=noninteractive

# Build with some basic utilities
RUN apt-get update && apt-get install -y \
python3-pip apt-utils \
wget curl vim \
git git-lfs \
supervisor \
unzip
# set timezone
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get install -y tzdata \
&& ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
&& echo $TZ > /etc/timezone \
&& dpkg-reconfigure -f noninteractive tzdata

# alias python='python3'
RUN ln -s /usr/bin/python3 /usr/bin/python
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-pip apt-utils wget curl vim \
git git-lfs supervisor unzip tzdata && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# Set timezone
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
echo $TZ > /etc/timezone && \
dpkg-reconfigure -f noninteractive tzdata

# Install the appropriate torch version
#RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121
RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \
RUN ln -sf /usr/bin/python3 /usr/bin/python && \
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
pip install --no-cache-dir jupyterlab numpy==1.26.4 \
torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 \
jupyter-server-proxy==4.2.0
jupyter-server-proxy==4.2.0

# Create a working directory
WORKDIR /etc/csghub
RUN git clone https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch
RUN cd LLaMA-Factory && pip install --no-cache-dir -e ".[metrics,deepspeed]"
# setup supervisord
RUN mkdir -p /var/log/supervisord

RUN git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch && cd LLaMA-Factory && \
pip install --no-cache-dir -e ".[metrics,deepspeed]"

# Setup supervisord
COPY script/supervisord.conf /etc/supervisor/conf.d/supervisord.conf
COPY script/jupyter_notebook_config.py /root/.jupyter/jupyter_notebook_config.py
COPY script/ /etc/csghub/
COPY script/handlers.py /usr/local/lib/python3.10/dist-packages/jupyter_server_proxy/handlers.py
RUN chmod +x /etc/csghub/*.sh
#use dark mode
RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \
echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \
echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings
#fix gradio proxy issue
RUN pip uninstall gradio && pip install https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl

RUN mkdir -p /var/log/supervisord && \
chmod +x /etc/csghub/*.sh && \
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \
echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \
echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings

# Fix gradio proxy issue
RUN pip uninstall -y gradio && \
pip install --no-cache-dir https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl

# Create a working directory
WORKDIR /workspace/
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
EXPOSE 8000
EXPOSE 8000
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
48 changes: 28 additions & 20 deletions docker/finetune/README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,38 @@
# CSGHUB finetune images
# CSGHUB Finetune Images Building

## base image
https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags
## Base Images
- https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
- https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags

## build images
## Login Container Registry
```bash
docker build -f Dockerfile.llamafactory .
OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
OPENCSG_ACR_USERNAME=""
OPENCSG_ACR_PASSWORD=""
echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
```

## push images
```
docker login opencsg-registry.cn-beijing.cr.aliyuncs.com
docker push xxx
```
## latest images
```
#for llama-factory image
opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
```
## Run image locally
## Build Multi-Platform Images
```bash
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
export IMAGE_TAG=1.21-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
docker buildx build --platform linux/amd64,linux/arm64 \
-t ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} \
-t ${OPENCSG_ACR}/public/llama-factory:latest \
-f Dockerfile.llamafactory \
--push .
```
*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.*

docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="OpenCSG/csg-wukong-1B" -e HF_ENDPOINT=https://hub.opencsg.com/hf opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2
## Run Finetune Image Locally
```bash
docker run -d \
-e ACCESS_TOKEN=xxx \
-e REPO_ID="OpenCSG/csg-wukong-1B" \
-e HF_ENDPOINT=https://opencsg.com/hf \
-p 8000:8000 \
${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG}
```
Note: HF_ENDPOINT should be use the real csghub address
*Note: HF_ENDPOINT should be use the real csghub address.*


47 changes: 47 additions & 0 deletions docker/finetune/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash

if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE_TAG>"
echo "Tag example: 1.22-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2"
exit 1
fi

OS=$(uname -s)
echo "Enable docker buildx with QEMU for ${OS}"
if [ "$OS" = "Darwin" ]; then
echo "QEMU enabled default..."
elif [ "$OS" = "Linux" ]; then
echo "Install QEMU support..."
docker run --privileged --rm tonistiigi/binfmt --install all
else
echo "Unknown OS: $OS"
fi

export DOCKER_BUILDKIT=1
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container)
if [[ ! -z "$DOCKER_CONTAINERS" ]]; then
BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}')
docker buildx use ${BUILDER}
else
docker buildx create --name container-builder --driver docker-container --use --bootstrap
fi

OPENCSG_ACR_USERNAME=$1
OPENCSG_ACR_PASSWORD=$2
OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"}
OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"}
DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE"

echo "Logging in to OpenCSG ACR..."
echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin

echo "Building images..."
export IMAGE_TAG=$3
docker buildx build --platform linux/amd64,linux/arm64 \
-t ${DOCKER_IMAGE_PREFIX}/llama-factory:${IMAGE_TAG} \
-t ${DOCKER_IMAGE_PREFIX}/llama-factory:latest \
-f Dockerfile.llamafactory \
--push .

echo "Done! New image pushed with tag: $NEW_TAG"
87 changes: 60 additions & 27 deletions docker/inference/README.md
Original file line number Diff line number Diff line change
@@ -1,34 +1,67 @@
# CSGHUB inference images
# CSGHUB Inference Images Building

## build images
## Login Container Registry
```bash
docker build -f Dockerfile.vllm .
docker build -f Dockerfile.tgi .
OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com"
OPENCSG_ACR_USERNAME=""
OPENCSG_ACR_PASSWORD=""
echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin
```

## push images
```
docker login opencsg-registry.cn-beijing.cr.aliyuncs.com
docker push xxx
```
## latest images
```
#for vllm image
opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
#for vllm cpu only
opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3
#for tgi image
opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6
```
## Run image locally
## Build Multi-Platform Images
```bash
export BUILDX_NO_DEFAULT_ATTESTATIONS=1

# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
export IMAGE_TAG=2.8
docker buildx build --platform linux/amd64,linux/arm64 \
-t ${OPENCSG_ACR}/public/vllm-local:${IMAGE_TAG} \
-t ${OPENCSG_ACR}/public/vllm-local:latest \
-f Dockerfile.vllm \
--push .

# For vllm cpu only: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3
export IMAGE_TAG=2.4
docker buildx build --platform linux/amd64,linux/arm64 \
-t ${OPENCSG_ACR}/public/vllm-cpu:${IMAGE_TAG} \
-t ${OPENCSG_ACR}/public/vllm-cpu:latest \
-f Dockerfile.vllm-cpu \
--push .

# For tgi: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi:2.2
export IMAGE_TAG=2.2
docker buildx build --platform linux/amd64 \
-t ${OPENCSG_ACR}/public/tgi:${IMAGE_TAG} \
-t ${OPENCSG_ACR}/public/tgi:latest \
-f Dockerfile.tgi \
--push .
```
docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="xzgan001/csg-wukong-1B" -e HF_ENDPOINT=https://hub-stg.opencsg.com/ --gpus device=1 opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7
*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.*

docker run -d -v llm:/data -e ACCESS_TOKEN=xxx -e REPO_ID="xzgan001/csg-wukong-1B" -e HF_ENDPOINT=https://hub-stg.opencsg.com/hf --gpus device=7 opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6
## Run Inference Image Locally
```bash
# Run VLLM
docker run -d \
-e ACCESS_TOKEN=xxx \
-e REPO_ID="xzgan001/csg-wukong-1B" \
-e HF_ENDPOINT=https://opencsg.com/hf \
--gpus device=1 \
-p 8000:8000 \
${OPENCSG_ACR}/public/vllm-local:2.8

# Run TGI
docker run -d \
-e ACCESS_TOKEN=xxx \
-e REPO_ID="xzgan001/csg-wukong-1B" \
-e HF_ENDPOINT=https://opencsg.com/hf \
-v llm:/data \
--gpus device=7 \
-p 8000:8000
${OPENCSG_ACR}/public/tgi:2.2
```
Note: HF_ENDPOINT should be use the real csghub address
## API to call inference
*Note: HF_ENDPOINT should be use the real csghub address.*

## API to Call Inference
```
curl -H "Content-type: application/json" -X POST -d '{
"model": "/data/xzgan/csg-wukong-1B",
Expand All @@ -46,8 +79,8 @@ curl -H "Content-type: application/json" -X POST -d '{
"max_tokens": 20
}' http://localhost:8000/v1/chat/completions
```
VLLM and TGI has the same endpoint and request body
More reference for tgi:
https://huggingface.co/docs/text-generation-inference/en/messages_api
https://huggingface.github.io/text-generation-inference/
*Note: VLLM and TGI has the same endpoint and request body.*

More reference for TGI:
- [Text Generation Inference](https://huggingface.github.io/text-generation-inference/)
- [Text Generation Inference Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api)
61 changes: 61 additions & 0 deletions docker/inference/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bash

if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then
echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE>"
echo -e "IMAGE example: \n vllm-local:2.9\n vllm-cpu:2.5\n tgi:2.3"
exit 1
fi

OS=$(uname -s)
echo "Enable docker buildx with QEMU for ${OS}"
if [ "$OS" = "Darwin" ]; then
echo "QEMU enabled default..."
elif [ "$OS" = "Linux" ]; then
echo "Install QEMU support..."
docker run --privileged --rm tonistiigi/binfmt --install all
else
echo "Unknown OS: $OS"
fi

export DOCKER_BUILDKIT=1
export BUILDX_NO_DEFAULT_ATTESTATIONS=1
DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container)
if [[ ! -z "$DOCKER_CONTAINERS" ]]; then
BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}')
docker buildx use ${BUILDER}
else
docker buildx create --name container-builder --driver docker-container --use --bootstrap
fi

OPENCSG_ACR_USERNAME=$1
OPENCSG_ACR_PASSWORD=$2
OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"}
OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"}
DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE"

echo "Logging in to OpenCSG ACR..."
echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin

echo "Building images..."
export IMAGE=$3
export PLATFORMS="linux/amd64,linux/arm64"
case "${IMAGE%:*}" in
vllm-local)
DOCKERFILE="Dockerfile.vllm"
;;
vllm-cpu)
DOCKERFILE="Dockerfile.vllm-cpu"
;;
tgi)
PLATFORMS="linux/amd64"
DOCKERFILE="Dockerfile.tgi"
;;
esac

docker buildx build --platform ${PLATFORMS} \
-t ${DOCKER_IMAGE_PREFIX}/${IMAGE} \
-t ${DOCKER_IMAGE_PREFIX}/${IMAGE%:*}:latest \
-f ${DOCKERFILE} \
--push .

echo "Done! New image pushed with tag: $NEW_TAG"
Loading

0 comments on commit 34162cd

Please sign in to comment.