diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 00000000..b880fa2d --- /dev/null +++ b/docker/README.md @@ -0,0 +1,22 @@ +# CSGHUB Server Base Images Building + +## Login Container Registry +```bash +OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" +OPENCSG_ACR_USERNAME="" +OPENCSG_ACR_PASSWORD="" +echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin +``` + +## Build Multi-Platform Images +```bash +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +export IMAGE_TAG=1.0 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-latest \ + -f Dockerfile.nginx \ + --push . +``` +*The above command will create `linux/amd64` and `linux/arm64` images with the tags `base-${IMAGE_TAG}` and `base-latest` at the same time.* + diff --git a/docker/finetune/Dockerfile.llamafactory b/docker/finetune/Dockerfile.llamafactory index b940fa67..f7579b09 100644 --- a/docker/finetune/Dockerfile.llamafactory +++ b/docker/finetune/Dockerfile.llamafactory @@ -1,55 +1,57 @@ -# pull from devel image instead of base +# Pull from devel image instead of base FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 + # Set bash as the default shell -ENV SHELL=/bin/bash -ENV JUPYTERHUB_SERVICE_PREFIX=/proxy/ -ENV GRADIO_ROOT_PATH=/proxy/7860/ -ENV TZ=Asia/Shanghai -ENV NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 -ENV HF_HOME=/workspace/.cache +ENV SHELL=/bin/bash \ + JUPYTERHUB_SERVICE_PREFIX=/proxy/ \ + GRADIO_ROOT_PATH=/proxy/7860/ \ + TZ=Asia/Shanghai \ + NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 \ + HF_HOME=/workspace/.cache \ + DEBIAN_FRONTEND=noninteractive # Build with some basic utilities -RUN apt-get update && apt-get install -y \ - python3-pip apt-utils \ - wget curl vim \ - git git-lfs \ - supervisor \ - unzip -# set timezone -ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get install -y tzdata \ - && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \ - && echo $TZ > /etc/timezone \ - && dpkg-reconfigure -f noninteractive tzdata - -# alias python='python3' -RUN ln -s /usr/bin/python3 /usr/bin/python -RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple +RUN apt-get update && apt-get install -y --no-install-recommends \ + python3-pip apt-utils wget curl vim \ + git git-lfs supervisor unzip tzdata && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Set timezone +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \ + echo $TZ > /etc/timezone && \ + dpkg-reconfigure -f noninteractive tzdata # Install the appropriate torch version -#RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 -RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \ +RUN ln -sf /usr/bin/python3 /usr/bin/python && \ + pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ + pip install --no-cache-dir jupyterlab numpy==1.26.4 \ torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 \ - jupyter-server-proxy==4.2.0 + jupyter-server-proxy==4.2.0 + # Create a working directory WORKDIR /etc/csghub -RUN git clone https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch -RUN cd LLaMA-Factory && pip install --no-cache-dir -e ".[metrics,deepspeed]" -# setup supervisord -RUN mkdir -p /var/log/supervisord + +RUN git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch && cd LLaMA-Factory && \ + pip install --no-cache-dir -e ".[metrics,deepspeed]" + +# Setup supervisord COPY script/supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY script/jupyter_notebook_config.py /root/.jupyter/jupyter_notebook_config.py COPY script/ /etc/csghub/ COPY script/handlers.py /usr/local/lib/python3.10/dist-packages/jupyter_server_proxy/handlers.py -RUN chmod +x /etc/csghub/*.sh -#use dark mode -RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \ - echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \ - mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \ - echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings -#fix gradio proxy issue -RUN pip uninstall gradio && pip install https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl + +RUN mkdir -p /var/log/supervisord && \ + chmod +x /etc/csghub/*.sh && \ + mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \ + echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \ + mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \ + echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings + +# Fix gradio proxy issue +RUN pip uninstall -y gradio && \ + pip install --no-cache-dir https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl + # Create a working directory WORKDIR /workspace/ -ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] -EXPOSE 8000 \ No newline at end of file +EXPOSE 8000 +ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] \ No newline at end of file diff --git a/docker/finetune/README.md b/docker/finetune/README.md index dbe37ac8..43271972 100644 --- a/docker/finetune/README.md +++ b/docker/finetune/README.md @@ -1,30 +1,38 @@ -# CSGHUB finetune images +# CSGHUB Finetune Images Building -## base image -https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html -https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags +## Base Images +- https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html +- https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags -## build images +## Login Container Registry ```bash -docker build -f Dockerfile.llamafactory . +OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" +OPENCSG_ACR_USERNAME="" +OPENCSG_ACR_PASSWORD="" +echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin ``` -## push images -``` -docker login opencsg-registry.cn-beijing.cr.aliyuncs.com -docker push xxx -``` -## latest images -``` -#for llama-factory image -opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 -``` -## Run image locally +## Build Multi-Platform Images +```bash +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +export IMAGE_TAG=1.21-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/public/llama-factory:latest \ + -f Dockerfile.llamafactory \ + --push . ``` +*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.* -docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="OpenCSG/csg-wukong-1B" -e HF_ENDPOINT=https://hub.opencsg.com/hf opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 - +## Run Finetune Image Locally +```bash +docker run -d \ + -e ACCESS_TOKEN=xxx \ + -e REPO_ID="OpenCSG/csg-wukong-1B" \ + -e HF_ENDPOINT=https://opencsg.com/hf \ + -p 8000:8000 \ + ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} ``` -Note: HF_ENDPOINT should be use the real csghub address +*Note: HF_ENDPOINT should be use the real csghub address.* diff --git a/docker/finetune/build.sh b/docker/finetune/build.sh new file mode 100644 index 00000000..a8367b43 --- /dev/null +++ b/docker/finetune/build.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "Usage: $0 " + echo "Tag example: 1.22-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2" + exit 1 +fi + +OS=$(uname -s) +echo "Enable docker buildx with QEMU for ${OS}" +if [ "$OS" = "Darwin" ]; then + echo "QEMU enabled default..." +elif [ "$OS" = "Linux" ]; then + echo "Install QEMU support..." + docker run --privileged --rm tonistiigi/binfmt --install all +else + echo "Unknown OS: $OS" +fi + +export DOCKER_BUILDKIT=1 +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container) +if [[ ! -z "$DOCKER_CONTAINERS" ]]; then + BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}') + docker buildx use ${BUILDER} +else + docker buildx create --name container-builder --driver docker-container --use --bootstrap +fi + +OPENCSG_ACR_USERNAME=$1 +OPENCSG_ACR_PASSWORD=$2 +OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"} +OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"} +DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE" + +echo "Logging in to OpenCSG ACR..." +echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin + +echo "Building images..." +export IMAGE_TAG=$3 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${DOCKER_IMAGE_PREFIX}/llama-factory:${IMAGE_TAG} \ + -t ${DOCKER_IMAGE_PREFIX}/llama-factory:latest \ + -f Dockerfile.llamafactory \ + --push . + +echo "Done! New image pushed with tag: $NEW_TAG" diff --git a/docker/inference/README.md b/docker/inference/README.md index bd65b718..ce6f9dab 100644 --- a/docker/inference/README.md +++ b/docker/inference/README.md @@ -1,34 +1,67 @@ -# CSGHUB inference images +# CSGHUB Inference Images Building -## build images +## Login Container Registry ```bash -docker build -f Dockerfile.vllm . -docker build -f Dockerfile.tgi . +OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" +OPENCSG_ACR_USERNAME="" +OPENCSG_ACR_PASSWORD="" +echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin ``` -## push images -``` -docker login opencsg-registry.cn-beijing.cr.aliyuncs.com -docker push xxx -``` -## latest images -``` -#for vllm image -opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7 -#for vllm cpu only -opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3 -#for tgi image -opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6 -``` -## Run image locally +## Build Multi-Platform Images +```bash +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 + +# For vllm: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7 +export IMAGE_TAG=2.8 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${OPENCSG_ACR}/public/vllm-local:${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/public/vllm-local:latest \ + -f Dockerfile.vllm \ + --push . + +# For vllm cpu only: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-cpu:2.3 +export IMAGE_TAG=2.4 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${OPENCSG_ACR}/public/vllm-cpu:${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/public/vllm-cpu:latest \ + -f Dockerfile.vllm-cpu \ + --push . + +# For tgi: opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi:2.2 +export IMAGE_TAG=2.2 +docker buildx build --platform linux/amd64 \ + -t ${OPENCSG_ACR}/public/tgi:${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/public/tgi:latest \ + -f Dockerfile.tgi \ + --push . ``` -docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="xzgan001/csg-wukong-1B" -e HF_ENDPOINT=https://hub-stg.opencsg.com/ --gpus device=1 opencsg-registry.cn-beijing.cr.aliyuncs.com/public/vllm-local:2.7 +*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.* -docker run -d -v llm:/data -e ACCESS_TOKEN=xxx -e REPO_ID="xzgan001/csg-wukong-1B" -e HF_ENDPOINT=https://hub-stg.opencsg.com/hf --gpus device=7 opencsg-registry.cn-beijing.cr.aliyuncs.com/public/tgi-local:1.6 +## Run Inference Image Locally +```bash +# Run VLLM +docker run -d \ + -e ACCESS_TOKEN=xxx \ + -e REPO_ID="xzgan001/csg-wukong-1B" \ + -e HF_ENDPOINT=https://opencsg.com/hf \ + --gpus device=1 \ + -p 8000:8000 \ + ${OPENCSG_ACR}/public/vllm-local:2.8 +# Run TGI +docker run -d \ + -e ACCESS_TOKEN=xxx \ + -e REPO_ID="xzgan001/csg-wukong-1B" \ + -e HF_ENDPOINT=https://opencsg.com/hf \ + -v llm:/data \ + --gpus device=7 \ + -p 8000:8000 + ${OPENCSG_ACR}/public/tgi:2.2 ``` -Note: HF_ENDPOINT should be use the real csghub address -## API to call inference +*Note: HF_ENDPOINT should be use the real csghub address.* + +## API to Call Inference ``` curl -H "Content-type: application/json" -X POST -d '{ "model": "/data/xzgan/csg-wukong-1B", @@ -46,8 +79,8 @@ curl -H "Content-type: application/json" -X POST -d '{ "max_tokens": 20 }' http://localhost:8000/v1/chat/completions ``` -VLLM and TGI has the same endpoint and request body -More reference for tgi: -https://huggingface.co/docs/text-generation-inference/en/messages_api -https://huggingface.github.io/text-generation-inference/ +*Note: VLLM and TGI has the same endpoint and request body.* +More reference for TGI: +- [Text Generation Inference](https://huggingface.github.io/text-generation-inference/) +- [Text Generation Inference Messages API](https://huggingface.co/docs/text-generation-inference/en/messages_api) diff --git a/docker/inference/build.sh b/docker/inference/build.sh new file mode 100644 index 00000000..a60f48d2 --- /dev/null +++ b/docker/inference/build.sh @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "Usage: $0 " + echo -e "IMAGE example: \n vllm-local:2.9\n vllm-cpu:2.5\n tgi:2.3" + exit 1 +fi + +OS=$(uname -s) +echo "Enable docker buildx with QEMU for ${OS}" +if [ "$OS" = "Darwin" ]; then + echo "QEMU enabled default..." +elif [ "$OS" = "Linux" ]; then + echo "Install QEMU support..." + docker run --privileged --rm tonistiigi/binfmt --install all +else + echo "Unknown OS: $OS" +fi + +export DOCKER_BUILDKIT=1 +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container) +if [[ ! -z "$DOCKER_CONTAINERS" ]]; then + BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}') + docker buildx use ${BUILDER} +else + docker buildx create --name container-builder --driver docker-container --use --bootstrap +fi + +OPENCSG_ACR_USERNAME=$1 +OPENCSG_ACR_PASSWORD=$2 +OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"} +OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"} +DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE" + +echo "Logging in to OpenCSG ACR..." +echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin + +echo "Building images..." +export IMAGE=$3 +export PLATFORMS="linux/amd64,linux/arm64" +case "${IMAGE%:*}" in + vllm-local) + DOCKERFILE="Dockerfile.vllm" + ;; + vllm-cpu) + DOCKERFILE="Dockerfile.vllm-cpu" + ;; + tgi) + PLATFORMS="linux/amd64" + DOCKERFILE="Dockerfile.tgi" + ;; +esac + +docker buildx build --platform ${PLATFORMS} \ + -t ${DOCKER_IMAGE_PREFIX}/${IMAGE} \ + -t ${DOCKER_IMAGE_PREFIX}/${IMAGE%:*}:latest \ + -f ${DOCKERFILE} \ + --push . + +echo "Done! New image pushed with tag: $NEW_TAG" diff --git a/docker/spaces/README.md b/docker/spaces/README.md index 9abe8cc0..ac4e79dd 100644 --- a/docker/spaces/README.md +++ b/docker/spaces/README.md @@ -1,22 +1,22 @@ -# CSGHUB nginx images +# CSGHUB Nginx Images Building -## build images +## Login Container Registry ```bash -docker build -f Dockerfile.nginx . +OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" +OPENCSG_ACR_USERNAME="" +OPENCSG_ACR_PASSWORD="" +echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin ``` -## push images -``` -docker login registry.cn-beijing.aliyuncs.com -docker push xxx -``` -## environment -``` -ACCESS_TOKEN=xxx -REPO_ID=xxx +## Build Multi-Platform Images +```bash +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +export IMAGE_TAG=1.2 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${OPENCSG_ACR}/opencsg_space/csg-nginx:${IMAGE_TAG} \ + -t ${OPENCSG_ACR}/opencsg_space/csg-nginx:latest \ + -f Dockerfile.nginx \ + --push . ``` -## latest images -registry.cn-beijing.aliyuncs.com/opencsg_space/csg-nginx:1.2 -opencsg-registry.cn-beijing.cr.aliyuncs.com/opencsg_space/csg-nginx:1.2 - +*The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.* diff --git a/docker/spaces/build.sh b/docker/spaces/build.sh new file mode 100644 index 00000000..92b3143c --- /dev/null +++ b/docker/spaces/build.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash + +if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then + echo "Usage: $0 " + echo "Tag example: 1.3" + exit 1 +fi + +OS=$(uname -s) +echo "Enable docker buildx with QEMU for ${OS}" +if [ "$OS" = "Darwin" ]; then + echo "QEMU enabled default..." +elif [ "$OS" = "Linux" ]; then + echo "Install QEMU support..." + docker run --privileged --rm tonistiigi/binfmt --install all +else + echo "Unknown OS: $OS" +fi + +export DOCKER_BUILDKIT=1 +export BUILDX_NO_DEFAULT_ATTESTATIONS=1 +DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container) +if [[ ! -z "$DOCKER_CONTAINERS" ]]; then + BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}') + docker buildx use ${BUILDER} +else + docker buildx create --name container-builder --driver docker-container --use --bootstrap +fi + +OPENCSG_ACR_USERNAME=$1 +OPENCSG_ACR_PASSWORD=$2 +OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"} +OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"opencsg_space"} +DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE" + +echo "Logging in to OpenCSG ACR..." +echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin + +echo "Building images..." +export IMAGE_TAG=$3 +docker buildx build --platform linux/amd64,linux/arm64 \ + -t ${DOCKER_IMAGE_PREFIX}/csg-nginx:${IMAGE_TAG} \ + -t ${DOCKER_IMAGE_PREFIX}/csg-nginx:latest \ + -f Dockerfile.nginx \ + --push . + +echo "Done! New image pushed with tag: $NEW_TAG"