generated from kbase/kbase-template
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdocker-compose.yaml
201 lines (189 loc) · 6.39 KB
/
docker-compose.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
version: '3'
# This docker-compose is for developer convenience, not for running in production.
services:
yarn-resourcemanager:
image: ghcr.io/kbase/cdm-prototype-yarn:pr-8
container_name: yarn-resourcemanager
# Images from the ghcr.io/kbase registry are exclusively available for Linux/AMD64 platforms.
platform: linux/amd64
ports:
- 8088:8088 # web ui
environment:
- YARN_MODE=resourcemanager
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=yarnuser
- MINIO_SECRET_KEY=yarnpass
networks:
- cdm-jupyterhub-network
yarn-nodemanager:
image: ghcr.io/kbase/cdm-prototype-yarn:pr-8
container_name: yarn-nodemanager
platform: linux/amd64
ports:
- 8042:8042 # web ui
environment:
- YARN_MODE=nodemanager
- YARN_RESOURCEMANAGER_HOSTNAME=yarn-resourcemanager
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=yarnuser
- MINIO_SECRET_KEY=yarnpass
networks:
- cdm-jupyterhub-network
spark-master:
# The latest image from cdm-jupyterhub that includes spark standalone mode
image: ghcr.io/kbase/cdm-jupyterhub:pr-74
container_name: spark-master
platform: linux/amd64
ports:
- "8090:8090"
environment:
- SPARK_MODE=master
- SPARK_MASTER_WEBUI_PORT=8090
- MAX_EXECUTORS=4
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
volumes:
- ./cdr/cdm/jupyter/cdm_shared_workspace:/cdm_shared_workspace
networks:
- cdm-jupyterhub-network
spark-worker-1:
# The latest image from cdm-jupyterhub that includes spark standalone mode
image: ghcr.io/kbase/cdm-jupyterhub:pr-74
container_name: spark-worker-1
platform: linux/amd64
ports:
- "8081:8081"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_WEBUI_PORT=8081
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
volumes:
- ./cdr/cdm/jupyter/cdm_shared_workspace:/cdm_shared_workspace
networks:
- cdm-jupyterhub-network
spark-worker-2:
# The latest image from cdm-jupyterhub that includes spark standalone mode
image: ghcr.io/kbase/cdm-jupyterhub:pr-74
container_name: spark-worker-2
platform: linux/amd64
ports:
- "8082:8082"
environment:
- SPARK_MODE=worker
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_WORKER_CORES=2
- SPARK_WORKER_MEMORY=1G
- SPARK_WORKER_WEBUI_PORT=8082
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
volumes:
- ./cdr/cdm/jupyter/cdm_shared_workspace:/cdm_shared_workspace
networks:
- cdm-jupyterhub-network
minio:
image: minio/minio
container_name: spark-minio
ports:
- "9002:9002"
# MinIO Console is available at http://localhost:9003
- "9003:9003"
environment:
MINIO_ROOT_USER: minio
MINIO_ROOT_PASSWORD: minio123
healthcheck:
# reference: https://github.com/rodrigobdz/docker-compose-healthchecks?tab=readme-ov-file#minio-release2023-11-01t18-37-25z-and-older
test: timeout 5s bash -c ':> /dev/tcp/127.0.0.1/9002' || exit 1
interval: 1s
timeout: 10s
retries: 5
# Note there is no bucket by default
command: server --address 0.0.0.0:9002 --console-address 0.0.0.0:9003 /data
networks:
- cdm-jupyterhub-network
minio-create-bucket:
image: minio/mc
depends_on:
minio:
condition: service_healthy
entrypoint: /scripts/minio_create_bucket_entrypoint.sh
volumes:
- ./config/cdm-read-only-policy.json:/config/cdm-read-only-policy.json
- ./config/cdm-read-write-policy.json:/config/cdm-read-write-policy.json
- ./config/yarn-write-policy.json:/config/yarn-write-policy.json
- ./scripts/minio_create_bucket_entrypoint.sh:/scripts/minio_create_bucket_entrypoint.sh
networks:
- cdm-jupyterhub-network
cdm_jupyterhub:
build:
context: .
dockerfile: Dockerfile
container_name: cdm-jupyterhub
platform: linux/amd64
ports:
- "4043:4043"
depends_on:
- minio-create-bucket
environment:
- NOTEBOOK_PORT=4043
- JUPYTER_MODE=jupyterhub
- YARN_RESOURCE_MANAGER_URL=http://yarn-resourcemanager:8032
- SPARK_MASTER_URL=spark://spark-master:7077
- SPARK_DRIVER_HOST=cdm-jupyterhub
- MINIO_URL=http://minio:9002
- MINIO_ACCESS_KEY=minio-readonly
- MINIO_SECRET_KEY=minio123
- MINIO_RW_ACCESS_KEY=minio-readwrite
- MINIO_RW_SECRET_KEY=minio123
- S3_YARN_BUCKET=yarn
- JUPYTER_MODE=jupyterhub
- MAX_EXECUTORS=4
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
- POSTGRES_URL=postgres:5432
- JUPYTERHUB_ADMIN_PASSWORD=testpassword123
- NETWORK_NAME=cdm-jupyterhub-network
- JUPYTERHUB_USER_IMAGE=cdm-jupyterhub-cdm_jupyterhub:latest
- JUPYTERHUB_MOUNT_BASE_DIR=/cdr/cdm/jupyter
- ENVIRONMENT=dev
volumes:
- ./cdr/cdm/jupyter/cdm_shared_workspace:/cdm_shared_workspace
- ./cdr/cdm/jupyter/jupyterhub_secrets:/jupyterhub_secrets
- ./cdr/cdm/jupyter/jupyterhub/users_home:/jupyterhub/users_home
- /var/run/docker.sock:/var/run/docker.sock
networks:
- cdm-jupyterhub-network
postgres:
image: postgres:16.3
restart: always
container_name: postgres
# To avoid incorrect user permissions, manually create the volume directory before running Docker.
# export UID=$(id -u)
# export GID=$(id -g)
# mkdir -p cdr/cdm/jupyter/cdm-postgres
# reference: https://forums.docker.com/t/systemd-coredump-taking-ownership-of-tmp-db-directory-and-contents-in-rails-app/93609
user: "${UID}:${GID}"
ports:
- "5432:5432"
environment:
- POSTGRES_USER=hive
- POSTGRES_PASSWORD=hivepassword
- POSTGRES_DB=hive
volumes:
- ./cdr/cdm/jupyter/cdm-postgres:/var/lib/postgresql/data # For local development only. In Rancher development, PostgreSQL data shouldn't be stored in a shared mount.
networks:
- cdm-jupyterhub-network
networks:
# Created by `docker network create cdm-jupyterhub-network`
cdm-jupyterhub-network:
external: true