-
Notifications
You must be signed in to change notification settings - Fork 128
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added build.sh to manually build multi-platform images
- Loading branch information
mason
committed
Oct 14, 2024
1 parent
eedcf1c
commit 34162cd
Showing
8 changed files
with
323 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# CSGHUB Server Base Images Building | ||
|
||
## Login Container Registry | ||
```bash | ||
OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" | ||
OPENCSG_ACR_USERNAME="" | ||
OPENCSG_ACR_PASSWORD="" | ||
echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin | ||
``` | ||
|
||
## Build Multi-Platform Images | ||
```bash | ||
export BUILDX_NO_DEFAULT_ATTESTATIONS=1 | ||
export IMAGE_TAG=1.0 | ||
docker buildx build --platform linux/amd64,linux/arm64 \ | ||
-t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-${IMAGE_TAG} \ | ||
-t ${OPENCSG_ACR}/opencsg_public/csghub_server:base-latest \ | ||
-f Dockerfile.nginx \ | ||
--push . | ||
``` | ||
*The above command will create `linux/amd64` and `linux/arm64` images with the tags `base-${IMAGE_TAG}` and `base-latest` at the same time.* | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,55 +1,57 @@ | ||
# pull from devel image instead of base | ||
# Pull from devel image instead of base | ||
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 | ||
|
||
# Set bash as the default shell | ||
ENV SHELL=/bin/bash | ||
ENV JUPYTERHUB_SERVICE_PREFIX=/proxy/ | ||
ENV GRADIO_ROOT_PATH=/proxy/7860/ | ||
ENV TZ=Asia/Shanghai | ||
ENV NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 | ||
ENV HF_HOME=/workspace/.cache | ||
ENV SHELL=/bin/bash \ | ||
JUPYTERHUB_SERVICE_PREFIX=/proxy/ \ | ||
GRADIO_ROOT_PATH=/proxy/7860/ \ | ||
TZ=Asia/Shanghai \ | ||
NCCL_IB_DISABLE=1 NCCL_P2P_DISABLE=1 \ | ||
HF_HOME=/workspace/.cache \ | ||
DEBIAN_FRONTEND=noninteractive | ||
|
||
# Build with some basic utilities | ||
RUN apt-get update && apt-get install -y \ | ||
python3-pip apt-utils \ | ||
wget curl vim \ | ||
git git-lfs \ | ||
supervisor \ | ||
unzip | ||
# set timezone | ||
ARG DEBIAN_FRONTEND=noninteractive | ||
RUN apt-get install -y tzdata \ | ||
&& ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \ | ||
&& echo $TZ > /etc/timezone \ | ||
&& dpkg-reconfigure -f noninteractive tzdata | ||
|
||
# alias python='python3' | ||
RUN ln -s /usr/bin/python3 /usr/bin/python | ||
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple | ||
RUN apt-get update && apt-get install -y --no-install-recommends \ | ||
python3-pip apt-utils wget curl vim \ | ||
git git-lfs supervisor unzip tzdata && \ | ||
apt-get clean && rm -rf /var/lib/apt/lists/* | ||
|
||
# Set timezone | ||
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \ | ||
echo $TZ > /etc/timezone && \ | ||
dpkg-reconfigure -f noninteractive tzdata | ||
|
||
# Install the appropriate torch version | ||
#RUN pip install torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121 | ||
RUN pip install --no-cache-dir jupyterlab numpy==1.26.4 \ | ||
RUN ln -sf /usr/bin/python3 /usr/bin/python && \ | ||
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \ | ||
pip install --no-cache-dir jupyterlab numpy==1.26.4 \ | ||
torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 \ | ||
jupyter-server-proxy==4.2.0 | ||
jupyter-server-proxy==4.2.0 | ||
|
||
# Create a working directory | ||
WORKDIR /etc/csghub | ||
RUN git clone https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch | ||
RUN cd LLaMA-Factory && pip install --no-cache-dir -e ".[metrics,deepspeed]" | ||
# setup supervisord | ||
RUN mkdir -p /var/log/supervisord | ||
|
||
RUN git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git --branch v0.8.3 --single-branch && cd LLaMA-Factory && \ | ||
pip install --no-cache-dir -e ".[metrics,deepspeed]" | ||
|
||
# Setup supervisord | ||
COPY script/supervisord.conf /etc/supervisor/conf.d/supervisord.conf | ||
COPY script/jupyter_notebook_config.py /root/.jupyter/jupyter_notebook_config.py | ||
COPY script/ /etc/csghub/ | ||
COPY script/handlers.py /usr/local/lib/python3.10/dist-packages/jupyter_server_proxy/handlers.py | ||
RUN chmod +x /etc/csghub/*.sh | ||
#use dark mode | ||
RUN mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \ | ||
echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \ | ||
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \ | ||
echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings | ||
#fix gradio proxy issue | ||
RUN pip uninstall gradio && pip install https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl | ||
|
||
RUN mkdir -p /var/log/supervisord && \ | ||
chmod +x /etc/csghub/*.sh && \ | ||
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension && \ | ||
echo '{"theme":"JupyterLab Dark"}' > /root/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings && \ | ||
mkdir -p /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension && \ | ||
echo '{"codeCellConfig":{"lineNumbers":true }}' > /root/.jupyter/lab/user-settings/@jupyterlab/notebook-extension/tracker.jupyterlab-settings | ||
|
||
# Fix gradio proxy issue | ||
RUN pip uninstall -y gradio && \ | ||
pip install --no-cache-dir https://git-devops.opencsg.com/opensource/gradio/-/raw/3a207a08755b4820541915e9ea63e6abc1b4b424/gradio-4.41.0-py3-none-any.whl | ||
|
||
# Create a working directory | ||
WORKDIR /workspace/ | ||
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] | ||
EXPOSE 8000 | ||
EXPOSE 8000 | ||
ENTRYPOINT ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,30 +1,38 @@ | ||
# CSGHUB finetune images | ||
# CSGHUB Finetune Images Building | ||
|
||
## base image | ||
https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html | ||
https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags | ||
## Base Images | ||
- https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html | ||
- https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags | ||
|
||
## build images | ||
## Login Container Registry | ||
```bash | ||
docker build -f Dockerfile.llamafactory . | ||
OPENCSG_ACR="opencsg-registry.cn-beijing.cr.aliyuncs.com" | ||
OPENCSG_ACR_USERNAME="" | ||
OPENCSG_ACR_PASSWORD="" | ||
echo "$OPENCSG_ACR_PASSWORD" | docker login $OPENCSG_ACR -u $OPENCSG_ACR_USERNAME --password-stdin | ||
``` | ||
|
||
## push images | ||
``` | ||
docker login opencsg-registry.cn-beijing.cr.aliyuncs.com | ||
docker push xxx | ||
``` | ||
## latest images | ||
``` | ||
#for llama-factory image | ||
opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 | ||
``` | ||
## Run image locally | ||
## Build Multi-Platform Images | ||
```bash | ||
export BUILDX_NO_DEFAULT_ATTESTATIONS=1 | ||
export IMAGE_TAG=1.21-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 | ||
docker buildx build --platform linux/amd64,linux/arm64 \ | ||
-t ${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} \ | ||
-t ${OPENCSG_ACR}/public/llama-factory:latest \ | ||
-f Dockerfile.llamafactory \ | ||
--push . | ||
``` | ||
*Note: The above command will create `linux/amd64` and `linux/arm64` images with the tags `${IMAGE_TAG}` and `latest` at the same time.* | ||
|
||
docker run -d -e ACCESS_TOKEN=xxx -e REPO_ID="OpenCSG/csg-wukong-1B" -e HF_ENDPOINT=https://hub.opencsg.com/hf opencsg-registry.cn-beijing.cr.aliyuncs.com/public/llama-factory:1.20-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2 | ||
## Run Finetune Image Locally | ||
```bash | ||
docker run -d \ | ||
-e ACCESS_TOKEN=xxx \ | ||
-e REPO_ID="OpenCSG/csg-wukong-1B" \ | ||
-e HF_ENDPOINT=https://opencsg.com/hf \ | ||
-p 8000:8000 \ | ||
${OPENCSG_ACR}/public/llama-factory:${IMAGE_TAG} | ||
``` | ||
Note: HF_ENDPOINT should be use the real csghub address | ||
*Note: HF_ENDPOINT should be use the real csghub address.* | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#!/usr/bin/env bash | ||
|
||
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then | ||
echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE_TAG>" | ||
echo "Tag example: 1.22-cuda12.1-devel-ubuntu22.04-py310-torch2.1.2" | ||
exit 1 | ||
fi | ||
|
||
OS=$(uname -s) | ||
echo "Enable docker buildx with QEMU for ${OS}" | ||
if [ "$OS" = "Darwin" ]; then | ||
echo "QEMU enabled default..." | ||
elif [ "$OS" = "Linux" ]; then | ||
echo "Install QEMU support..." | ||
docker run --privileged --rm tonistiigi/binfmt --install all | ||
else | ||
echo "Unknown OS: $OS" | ||
fi | ||
|
||
export DOCKER_BUILDKIT=1 | ||
export BUILDX_NO_DEFAULT_ATTESTATIONS=1 | ||
DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container) | ||
if [[ ! -z "$DOCKER_CONTAINERS" ]]; then | ||
BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}') | ||
docker buildx use ${BUILDER} | ||
else | ||
docker buildx create --name container-builder --driver docker-container --use --bootstrap | ||
fi | ||
|
||
OPENCSG_ACR_USERNAME=$1 | ||
OPENCSG_ACR_PASSWORD=$2 | ||
OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"} | ||
OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"} | ||
DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE" | ||
|
||
echo "Logging in to OpenCSG ACR..." | ||
echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin | ||
|
||
echo "Building images..." | ||
export IMAGE_TAG=$3 | ||
docker buildx build --platform linux/amd64,linux/arm64 \ | ||
-t ${DOCKER_IMAGE_PREFIX}/llama-factory:${IMAGE_TAG} \ | ||
-t ${DOCKER_IMAGE_PREFIX}/llama-factory:latest \ | ||
-f Dockerfile.llamafactory \ | ||
--push . | ||
|
||
echo "Done! New image pushed with tag: $NEW_TAG" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
#!/usr/bin/env bash | ||
|
||
if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then | ||
echo "Usage: $0 <OPENCSG_ACR_USERNAME> <OPENCSG_ACR_PASSWORD> <IMAGE>" | ||
echo -e "IMAGE example: \n vllm-local:2.9\n vllm-cpu:2.5\n tgi:2.3" | ||
exit 1 | ||
fi | ||
|
||
OS=$(uname -s) | ||
echo "Enable docker buildx with QEMU for ${OS}" | ||
if [ "$OS" = "Darwin" ]; then | ||
echo "QEMU enabled default..." | ||
elif [ "$OS" = "Linux" ]; then | ||
echo "Install QEMU support..." | ||
docker run --privileged --rm tonistiigi/binfmt --install all | ||
else | ||
echo "Unknown OS: $OS" | ||
fi | ||
|
||
export DOCKER_BUILDKIT=1 | ||
export BUILDX_NO_DEFAULT_ATTESTATIONS=1 | ||
DOCKER_CONTAINERS=$(docker buildx ls | grep docker-container) | ||
if [[ ! -z "$DOCKER_CONTAINERS" ]]; then | ||
BUILDER=$(echo "$DOCKER_CONTAINERS" | awk 'NR==1{gsub(/\*$/, "", $1); print $1}') | ||
docker buildx use ${BUILDER} | ||
else | ||
docker buildx create --name container-builder --driver docker-container --use --bootstrap | ||
fi | ||
|
||
OPENCSG_ACR_USERNAME=$1 | ||
OPENCSG_ACR_PASSWORD=$2 | ||
OPENCSG_ACR=${OPENCSG_ACR:-"opencsg-registry.cn-beijing.cr.aliyuncs.com"} | ||
OPENCSG_ACR_NAMESPACE=${OPENCSG_ACR_NAMESPACE:-"public"} | ||
DOCKER_IMAGE_PREFIX="$OPENCSG_ACR/$OPENCSG_ACR_NAMESPACE" | ||
|
||
echo "Logging in to OpenCSG ACR..." | ||
echo "$OPENCSG_ACR_PASSWORD" | docker login "$OPENCSG_ACR" -u "$OPENCSG_ACR_USERNAME" --password-stdin | ||
|
||
echo "Building images..." | ||
export IMAGE=$3 | ||
export PLATFORMS="linux/amd64,linux/arm64" | ||
case "${IMAGE%:*}" in | ||
vllm-local) | ||
DOCKERFILE="Dockerfile.vllm" | ||
;; | ||
vllm-cpu) | ||
DOCKERFILE="Dockerfile.vllm-cpu" | ||
;; | ||
tgi) | ||
PLATFORMS="linux/amd64" | ||
DOCKERFILE="Dockerfile.tgi" | ||
;; | ||
esac | ||
|
||
docker buildx build --platform ${PLATFORMS} \ | ||
-t ${DOCKER_IMAGE_PREFIX}/${IMAGE} \ | ||
-t ${DOCKER_IMAGE_PREFIX}/${IMAGE%:*}:latest \ | ||
-f ${DOCKERFILE} \ | ||
--push . | ||
|
||
echo "Done! New image pushed with tag: $NEW_TAG" |
Oops, something went wrong.