Skip to content

Commit

Permalink
fix+feat: docker compose (#264)
Browse files Browse the repository at this point in the history
* Add docker-compose.yml and corresponding .env

* Remove the now unnecessary entrypoint

* Remove the now unnecessary entrypoint

* Add example .env for mixtral-instruct-awq

* improved rework

* quick entrypoint fix

* quick Dockerfile fix

* another Dockerfile fix

* only build-essential's essentials

* add build context (commented out)

* Fix Dockerfile

Entrypoint exec form doesn't do variable substitution automatically ($HOME)

* Fix Dockerfile

Make entrypoint executable

* Update .env

Changed SSL path to work for non-root user

* Update docker-compose.yml

Changed SSL path to work for non-root user

* Update entrypoint.sh

Changed SSL path to work for non-root user

* Fix OpenAI endpoint in entrypoint.sh

Make it work when ENDPOINT is undefined

* make entrypoint script executable

---------

Co-authored-by: Stefan Schwarz <s.schwarz@mps-solutions.de>
Co-authored-by: AlpinDale <alpindale@gmail.com>
  • Loading branch information
3 people committed Feb 29, 2024
1 parent 1661578 commit 810ca83
Show file tree
Hide file tree
Showing 4 changed files with 93 additions and 66 deletions.
22 changes: 22 additions & 0 deletions docker/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#HUGGING_FACE_HUB_TOKEN=<secret>

#HF_CACHE=~/.cache/huggingface
#UID=1000
#GID=0
#TZ=UTC

#ENDPOINT=openai
#PORT=5000
#API_KEY=sk-example # ENDPOINT != kobold
#SSL_KEYFILE=~/ssl/server.key # ENDPOINT != kobold
#SSL_CERTFILE=~/ssl/server.crt # ENDPOINT != kobold
#MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.2
#REVISION=main
#DATATYPE=half # FP16. Recommended for AWQ quantization.
#KVCACHE=fp8_e5m2 # It reduces the GPU memory footprint and boosts the performance. But it may cause slight accuracy drop.
#CONTEXT_LENGTH=32768 # If unspecified, will be automatically derived from the model.
#NUM_GPUS=1
#GPU_MEMORY_UTILIZATION=0.8 # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
#QUANTIZATION=awq
#ENFORCE_EAGER=true # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode.
#CMD_ADDITIONAL_ARGUMENTS="--seed 0"
42 changes: 23 additions & 19 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,35 +1,39 @@
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04

WORKDIR /workspace/aphrodite-engine
ENV HOME=/app/aphrodite-engine

# Upgrade OS Packages
WORKDIR $HOME

# Upgrade OS Packages + Prepare Python Environment
RUN set -eux; \
apt-get update \
export DEBIAN_FRONTEND=noninteractive \
&& apt-get update \
&& apt-get upgrade -y \
&& rm -rf /var/lib/apt/lists/*

# Preparing Conda Environment
RUN apt-get update \
&& apt-get install -y git build-essential \
&& apt-get install python3 python3-pip -y \
&& apt-get install -y bzip2 g++ git make python3-pip tzdata \
&& rm -fr /var/lib/apt/lists/*

COPY entrypoint.sh /workspace/aphrodite-engine

ENV PATH /opt/conda/envs/aphrodite-engine/bin:$PATH

# alias python3 to python
# Alias python3 to python
RUN ln -s /usr/bin/python3 /usr/bin/python

RUN python3 -m pip install --upgrade pip
RUN python3 -m pip install --no-cache-dir --upgrade pip

RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine
RUN mv /tmp/aphrodite-engine/* /workspace/aphrodite-engine/
RUN rm -rf /tmp/aphrodite-engine
RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \
&& mv /tmp/aphrodite-engine/* . \
&& rm -fr /tmp/aphrodite-engine \
&& chmod +x docker/entrypoint.sh

# Export the CUDA_HOME variable correctly
ENV CUDA_HOME=/usr/local/cuda

ENV HF_HOME=/tmp

RUN python3 -m pip install --no-cache-dir -e .

ENTRYPOINT [ "/app/aphrodite-engine/entrypoint.sh" ]
# Entrypoint exec form doesn't do variable substitution automatically ($HOME)
ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"]

EXPOSE 5000

USER 1000:0

VOLUME ["/tmp"]
46 changes: 23 additions & 23 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
version: "3.7"

services:
aphrodite-engine:
build:
context: .
restart: on-failure:5
environment:
- NUM_GPUS=${NUM_GPUS}
- MODEL_NAME=${MODEL_NAME}
- REVISION=${REVISION}
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN}
- QUANTIZATION=${QUANTIZATION}
- KVCACHE=${KVCACHE}
- API_KEY=${API_KEY}
- CONTEXT_LENGTH=${CONTEXT_LENGTH}
- GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION}
- ENFORCE_EAGER=${ENFORCE_EAGER}
volumes:
- ${HOME}/.cache:/root/.cache/
shm_size: 10g
ports:
- "7860:7860"
- "2242:2242"
#build:
# context: .
container_name: aphrodite-engine
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
- capabilities: [gpu]
count: all
driver: nvidia
env_file: .env
hostname: aphrodite-engine
image: alpindale/aphrodite-engine
ipc: host
ports:
- "${PORT:-5000}:5000"
restart: on-failure:5
user: "${UID:-1000}:${GID:-0}"
volumes:
- ${HF_CACHE:-hf-cache}:/tmp
- ${SSL_CERTFILE:-/dev/null}:/app/aphrodite-engine/server.crt:ro
- ${SSL_KEYFILE:-/dev/null}:/app/aphrodite-engine/server.key:ro

volumes:
hf-cache:
49 changes: 25 additions & 24 deletions docker/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
#!/bin/bash
#!/bin/bash -e

set -xe

cd /app/aphrodite-engine
echo 'Starting Aphrodite Engine API server...'
CMD="python3 -m aphrodite.endpoints.openai.api_server \
--host 0.0.0.0 \
--port 7860 \
--model $MODEL_NAME \
--tensor-parallel-size $NUM_GPUS \
--dtype $DATATYPE \
--max-model-len $CONTEXT_LENGTH \
--gmu $GPU_MEMORY_UTILIZATION"

if [ -n "$QUANTIZATION" ]; then
CMD="$CMD --quantization $QUANTIZATION --dtype half"
fi
if [ -n "$API_KEY" ]; then
CMD="$CMD --api-keys $API_KEY"
fi
if [ -n "$ENFORCE_EAGER" ]; then
CMD="$CMD --enforce-eager"
fi
if [ -n "$KVCACHE" ]; then
CMD="$CMD --kv-cache-dtype $KVCACHE"
CMD="python3 -m aphrodite.endpoints.${ENDPOINT:-openai}.api_server
--host 0.0.0.0
--port 5000
--download-dir ${HF_HOME:?}/hub
${MODEL_NAME:+--model $MODEL_NAME}
${REVISION:+--revision $REVISION}
${DATATYPE:+--dtype $DATATYPE}
${KVCACHE:+--kv-cache-dtype $KVCACHE}
${CONTEXT_LENGTH:+--max-model-len $CONTEXT_LENGTH}
${NUM_GPUS:+--tensor-parallel-size $NUM_GPUS}
${GPU_MEMORY_UTILIZATION:+--gpu-memory-utilization $GPU_MEMORY_UTILIZATION}
${QUANTIZATION:+--quantization $QUANTIZATION}
${ENFORCE_EAGER:+--enforce-eager}
${CMD_ADDITIONAL_ARGUMENTS}"

# Only the 'openai' endpoint currently supports api-keys and ssl
if [ "${ENDPOINT:-openai}" = "openai" ]; then
CMD+=" ${API_KEY:+--api-keys "$API_KEY"} ${SSL_KEYFILE:+--ssl-keyfile server.key} ${SSL_CERTFILE:+--ssl-certfile server.crt}"
fi

exec $CMD
# set umask to ensure group read / write at runtime
umask 002

set -x

exec $CMD

0 comments on commit 810ca83

Please sign in to comment.