-
-
Notifications
You must be signed in to change notification settings - Fork 104
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add docker-compose.yml and corresponding .env * Remove the now unnecessary entrypoint * Remove the now unnecessary entrypoint * Add example .env for mixtral-instruct-awq * improved rework * quick entrypoint fix * quick Dockerfile fix * another Dockerfile fix * only build-essential's essentials * add build context (commented out) * Fix Dockerfile Entrypoint exec form doesn't do variable substitution automatically ($HOME) * Fix Dockerfile Make entrypoint executable * Update .env Changed SSL path to work for non-root user * Update docker-compose.yml Changed SSL path to work for non-root user * Update entrypoint.sh Changed SSL path to work for non-root user * Fix OpenAI endpoint in entrypoint.sh Make it work when ENDPOINT is undefined * make entrypoint script executable --------- Co-authored-by: Stefan Schwarz <s.schwarz@mps-solutions.de> Co-authored-by: AlpinDale <alpindale@gmail.com>
- Loading branch information
1 parent
1661578
commit 810ca83
Showing
4 changed files
with
93 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#HUGGING_FACE_HUB_TOKEN=<secret> | ||
|
||
#HF_CACHE=~/.cache/huggingface | ||
#UID=1000 | ||
#GID=0 | ||
#TZ=UTC | ||
|
||
#ENDPOINT=openai | ||
#PORT=5000 | ||
#API_KEY=sk-example # ENDPOINT != kobold | ||
#SSL_KEYFILE=~/ssl/server.key # ENDPOINT != kobold | ||
#SSL_CERTFILE=~/ssl/server.crt # ENDPOINT != kobold | ||
#MODEL_NAME=mistralai/Mistral-7B-Instruct-v0.2 | ||
#REVISION=main | ||
#DATATYPE=half # FP16. Recommended for AWQ quantization. | ||
#KVCACHE=fp8_e5m2 # It reduces the GPU memory footprint and boosts the performance. But it may cause slight accuracy drop. | ||
#CONTEXT_LENGTH=32768 # If unspecified, will be automatically derived from the model. | ||
#NUM_GPUS=1 | ||
#GPU_MEMORY_UTILIZATION=0.8 # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode. | ||
#QUANTIZATION=awq | ||
#ENFORCE_EAGER=true # If you are running out of memory, consider decreasing 'gpu_memory_utilization' or enforcing eager mode. | ||
#CMD_ADDITIONAL_ARGUMENTS="--seed 0" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,39 @@ | ||
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04 | ||
|
||
WORKDIR /workspace/aphrodite-engine | ||
ENV HOME=/app/aphrodite-engine | ||
|
||
# Upgrade OS Packages | ||
WORKDIR $HOME | ||
|
||
# Upgrade OS Packages + Prepare Python Environment | ||
RUN set -eux; \ | ||
apt-get update \ | ||
export DEBIAN_FRONTEND=noninteractive \ | ||
&& apt-get update \ | ||
&& apt-get upgrade -y \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Preparing Conda Environment | ||
RUN apt-get update \ | ||
&& apt-get install -y git build-essential \ | ||
&& apt-get install python3 python3-pip -y \ | ||
&& apt-get install -y bzip2 g++ git make python3-pip tzdata \ | ||
&& rm -fr /var/lib/apt/lists/* | ||
|
||
COPY entrypoint.sh /workspace/aphrodite-engine | ||
|
||
ENV PATH /opt/conda/envs/aphrodite-engine/bin:$PATH | ||
|
||
# alias python3 to python | ||
# Alias python3 to python | ||
RUN ln -s /usr/bin/python3 /usr/bin/python | ||
|
||
RUN python3 -m pip install --upgrade pip | ||
RUN python3 -m pip install --no-cache-dir --upgrade pip | ||
|
||
RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine | ||
RUN mv /tmp/aphrodite-engine/* /workspace/aphrodite-engine/ | ||
RUN rm -rf /tmp/aphrodite-engine | ||
RUN git clone https://github.com/PygmalionAI/aphrodite-engine.git /tmp/aphrodite-engine \ | ||
&& mv /tmp/aphrodite-engine/* . \ | ||
&& rm -fr /tmp/aphrodite-engine \ | ||
&& chmod +x docker/entrypoint.sh | ||
|
||
# Export the CUDA_HOME variable correctly | ||
ENV CUDA_HOME=/usr/local/cuda | ||
|
||
ENV HF_HOME=/tmp | ||
|
||
RUN python3 -m pip install --no-cache-dir -e . | ||
|
||
ENTRYPOINT [ "/app/aphrodite-engine/entrypoint.sh" ] | ||
# Entrypoint exec form doesn't do variable substitution automatically ($HOME) | ||
ENTRYPOINT ["/app/aphrodite-engine/docker/entrypoint.sh"] | ||
|
||
EXPOSE 5000 | ||
|
||
USER 1000:0 | ||
|
||
VOLUME ["/tmp"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,29 @@ | ||
version: "3.7" | ||
|
||
services: | ||
aphrodite-engine: | ||
build: | ||
context: . | ||
restart: on-failure:5 | ||
environment: | ||
- NUM_GPUS=${NUM_GPUS} | ||
- MODEL_NAME=${MODEL_NAME} | ||
- REVISION=${REVISION} | ||
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN} | ||
- QUANTIZATION=${QUANTIZATION} | ||
- KVCACHE=${KVCACHE} | ||
- API_KEY=${API_KEY} | ||
- CONTEXT_LENGTH=${CONTEXT_LENGTH} | ||
- GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION} | ||
- ENFORCE_EAGER=${ENFORCE_EAGER} | ||
volumes: | ||
- ${HOME}/.cache:/root/.cache/ | ||
shm_size: 10g | ||
ports: | ||
- "7860:7860" | ||
- "2242:2242" | ||
#build: | ||
# context: . | ||
container_name: aphrodite-engine | ||
deploy: | ||
resources: | ||
reservations: | ||
devices: | ||
- driver: nvidia | ||
count: all | ||
capabilities: [gpu] | ||
- capabilities: [gpu] | ||
count: all | ||
driver: nvidia | ||
env_file: .env | ||
hostname: aphrodite-engine | ||
image: alpindale/aphrodite-engine | ||
ipc: host | ||
ports: | ||
- "${PORT:-5000}:5000" | ||
restart: on-failure:5 | ||
user: "${UID:-1000}:${GID:-0}" | ||
volumes: | ||
- ${HF_CACHE:-hf-cache}:/tmp | ||
- ${SSL_CERTFILE:-/dev/null}:/app/aphrodite-engine/server.crt:ro | ||
- ${SSL_KEYFILE:-/dev/null}:/app/aphrodite-engine/server.key:ro | ||
|
||
volumes: | ||
hf-cache: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,29 +1,30 @@ | ||
#!/bin/bash | ||
#!/bin/bash -e | ||
|
||
set -xe | ||
|
||
cd /app/aphrodite-engine | ||
echo 'Starting Aphrodite Engine API server...' | ||
CMD="python3 -m aphrodite.endpoints.openai.api_server \ | ||
--host 0.0.0.0 \ | ||
--port 7860 \ | ||
--model $MODEL_NAME \ | ||
--tensor-parallel-size $NUM_GPUS \ | ||
--dtype $DATATYPE \ | ||
--max-model-len $CONTEXT_LENGTH \ | ||
--gmu $GPU_MEMORY_UTILIZATION" | ||
|
||
if [ -n "$QUANTIZATION" ]; then | ||
CMD="$CMD --quantization $QUANTIZATION --dtype half" | ||
fi | ||
if [ -n "$API_KEY" ]; then | ||
CMD="$CMD --api-keys $API_KEY" | ||
fi | ||
if [ -n "$ENFORCE_EAGER" ]; then | ||
CMD="$CMD --enforce-eager" | ||
fi | ||
if [ -n "$KVCACHE" ]; then | ||
CMD="$CMD --kv-cache-dtype $KVCACHE" | ||
CMD="python3 -m aphrodite.endpoints.${ENDPOINT:-openai}.api_server | ||
--host 0.0.0.0 | ||
--port 5000 | ||
--download-dir ${HF_HOME:?}/hub | ||
${MODEL_NAME:+--model $MODEL_NAME} | ||
${REVISION:+--revision $REVISION} | ||
${DATATYPE:+--dtype $DATATYPE} | ||
${KVCACHE:+--kv-cache-dtype $KVCACHE} | ||
${CONTEXT_LENGTH:+--max-model-len $CONTEXT_LENGTH} | ||
${NUM_GPUS:+--tensor-parallel-size $NUM_GPUS} | ||
${GPU_MEMORY_UTILIZATION:+--gpu-memory-utilization $GPU_MEMORY_UTILIZATION} | ||
${QUANTIZATION:+--quantization $QUANTIZATION} | ||
${ENFORCE_EAGER:+--enforce-eager} | ||
${CMD_ADDITIONAL_ARGUMENTS}" | ||
|
||
# Only the 'openai' endpoint currently supports api-keys and ssl | ||
if [ "${ENDPOINT:-openai}" = "openai" ]; then | ||
CMD+=" ${API_KEY:+--api-keys "$API_KEY"} ${SSL_KEYFILE:+--ssl-keyfile server.key} ${SSL_CERTFILE:+--ssl-certfile server.crt}" | ||
fi | ||
|
||
exec $CMD | ||
# set umask to ensure group read / write at runtime | ||
umask 002 | ||
|
||
set -x | ||
|
||
exec $CMD |