-
Notifications
You must be signed in to change notification settings - Fork 152
/
Copy pathdocker-compose.gpu.yml
100 lines (93 loc) · 3.76 KB
/
docker-compose.gpu.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
services:
web:
build:
context: ./app # Keep the build context as the root directory
dockerfile: Dockerfile.gpu # Specify the new path to the GPU Dockerfile
container_name: fastapi_app
command: >
bash -c "
if [ \"$APP_ENV\" = 'production' ]; then
uvicorn main:app --host 0.0.0.0 --port 8000;
else
uvicorn main:app --host 0.0.0.0 --port 8000 --reload;
fi"
ports:
- "8000:8000"
environment:
- CELERY_BROKER_URL=${CELERY_BROKER_URL-redis://redis:6379/0}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND-redis://redis:6379/0}
- LLM_PULL_API_URL=${LLM_PULL_API_URL-http://web:8000/llm_pull}
- LLM_GENEREATE_API_URL=${LLM_GENEREATE_API_URL-http://web:8000/llm_generate}
- OLLAMA_HOST=${OLLAMA_HOST-http://ollama:11434}
- APP_ENV=${APP_ENV-development} # Default to development mode
- STORAGE_PROFILE_PATH=${STORAGE_PROFILE_PATH-/storage_profiles} # Add the storage profile path
- LIST_FILES_URL=${LIST_FILES_URL-http://localhost:8000/storage/list}
- LOAD_FILE_URL=${LOAD_FILE_URL-http://localhost:8000/storage/load}
- DELETE_FILE_URL=${DELETE_FILE_URL-http://localhost:8000/storage/delete}
- LLAMA_VISION_PROMPT=${LLAMA_VISION_PROMPT-"You are OCR. Convert image to markdown."}
depends_on:
- redis
- ollama
volumes:
- ./storage_profiles:/storage_profiles # Mount the storage profiles to enable file uploads
- ./storage:/storage # Mount the storage directory to enable file uploads
- ./app:/app # Mount the app directory to enable auto-reloading
deploy:
resources:
reservations:
devices:
- capabilities: [gpu] # Request GPU support
celery_worker:
build:
context: ./app # Keep the build context as the root directory
dockerfile: Dockerfile.gpu # Specify the new path to the GPU Dockerfile
container_name: celery_worker
command: celery -A main.celery worker --loglevel=info --pool=solo
environment:
- OLLAMA_HOST=${OLLAMA_HOST-http://ollama:11434}
- CELERY_BROKER_URL=${CELERY_BROKER_URL-redis://redis:6379/0}
- CELERY_RESULT_BACKEND=${CELERY_RESULT_BACKEND-redis://redis:6379/0}
- STORAGE_PROFILE_PATH=${STORAGE_PROFILE_PATH-/storage_profiles} # Add the storage profile path
- LIST_FILES_URL=${LIST_FILES_URL-http://localhost:8000/storage/list}
- LOAD_FILE_URL=${LOAD_FILE_URL-http://localhost:8000/storage/load}
- DELETE_FILE_URL=${DELETE_FILE_URL-http://localhost:8000/storage/delete}
- LLAMA_VISION_PROMPT=${LLAMA_VISION_PROMPT-"You are OCR. Convert image to markdown."}
depends_on:
- redis
volumes:
- ./storage_profiles:/storage_profiles # Mount the storage profiles to enable file uploads
- ./storage:/storage # Mount the storage directory to enable file uploads
- ./app:/app
deploy:
resources:
reservations:
devices:
- capabilities: [gpu] # Request GPU support
redis:
image: redis:7.2.4-alpine
container_name: redis
ports:
- "6379:6379"
ollama:
image: ollama/ollama # Use the official Ollama image
container_name: ollama
pull_policy: always
tty: true
restart: always
ports:
- "11434:11434" # Expose Ollama's API port, changing internal to external port
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:11434/"] # Assumes health endpoint exists
interval: 30s
timeout: 10s
retries: 3
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
environment:
- NVIDIA_VISIBLE_DEVICES=all
- NVIDIA_DRIVER_CAPABILITIES=compute,utility