Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(helm,docker-compose): add registry service #435

Merged
merged 10 commits into from
Mar 19, 2024
6 changes: 6 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,9 @@ SOCAT_IMAGE=alpine/socat
SOCAT_VERSION=1.7.4.4-r0
SOCAT_HOST=socat
SOCAT_PORT=2375

# registry
REGISTRY_IMAGE=registry
REGISTRY_VERSION=2.8.3
REGISTRY_HOST=registry
REGISTRY_PORT=5000
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ HELM_RELEASE_NAME := core

.PHONY: all
all: ## Launch all services with their up-to-date release version
@docker inspect --type=image instill/ray:${RAY_RELEASE_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5
@make build-release BUILD=${BUILD}
@if [ ! -f "$$(echo ${SYSTEM_CONFIG_PATH}/user_uid)" ]; then \
mkdir -p ${SYSTEM_CONFIG_PATH} && \
docker run --rm --name uuidgen ${INSTILL_CORE_IMAGE_NAME}:${INSTILL_CORE_VERSION} uuidgen > ${SYSTEM_CONFIG_PATH}/user_uid; \
fi
ifeq (${NVIDIA_GPU_AVAILABLE}, true)
@docker inspect --type=image instill/ray:${RAY_SERVER_VERSION} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5
@cat docker-compose-nvidia.yml | yq '.services.ray_server.deploy.resources.reservations.devices[0].device_ids |= (strenv(NVIDIA_VISIBLE_DEVICES) | split(",")) | ..style="double"' | \
EDITION=$${EDITION:=local-ce} DEFAULT_USER_UID=$$(cat ${SYSTEM_CONFIG_PATH}/user_uid) RAY_RELEASE_TAG=${RAY_RELEASE_TAG} docker compose ${COMPOSE_FILES} -f - up -d --quiet-pull
else
Expand All @@ -62,13 +62,13 @@ endif

.PHONY: latest
latest: ## Lunch all dependent services with their latest codebase
@docker inspect --type=image instill/ray:${RAY_LATEST_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5
@make build-latest PROFILE=${PROFILE} BUILD=${BUILD}
@if [ ! -f "$$(echo ${SYSTEM_CONFIG_PATH}/user_uid)" ]; then \
mkdir -p ${SYSTEM_CONFIG_PATH} && \
docker run --rm --name uuidgen ${INSTILL_CORE_IMAGE_NAME}:latest uuidgen > ${SYSTEM_CONFIG_PATH}/user_uid; \
fi
ifeq (${NVIDIA_GPU_AVAILABLE}, true)
@docker inspect --type=image instill/ray:${RAY_LATEST_TAG} >/dev/null 2>&1 || printf "\033[1;33mINFO:\033[0m This may take a while due to the enormous size of the Ray server image, but the image pulling process should be just a one-time effort.\n" && sleep 5
@cat docker-compose-nvidia.yml | yq '.services.ray_server.deploy.resources.reservations.devices[0].device_ids |= (strenv(NVIDIA_VISIBLE_DEVICES) | split(",")) | ..style="double"' | \
COMPOSE_PROFILES=${PROFILE} EDITION=$${EDITION:=local-ce:latest} DEFAULT_USER_UID=$$(cat ${SYSTEM_CONFIG_PATH}/user_uid) RAY_LATEST_TAG=${RAY_LATEST_TAG} docker compose ${COMPOSE_FILES} -f docker-compose-latest.yml -f - up -d --quiet-pull
else
Expand Down
19 changes: 19 additions & 0 deletions charts/core/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,21 @@ openfga
{{- printf "%s-openfga" (include "core.fullname" .) -}}
{{- end -}}

{{/*
registry
*/}}
{{- define "core.registry" -}}
{{- printf "%s-registry" (include "core.fullname" .) -}}
{{- end -}}

{{- define "core.registry.port" -}}
{{- printf "5000" -}}
{{- end -}}

{{- define "core.registry.metricsPort" -}}
{{- printf "5001" -}}
{{- end -}}

{{/*
internal TLS secret names
*/}}
Expand Down Expand Up @@ -446,6 +461,10 @@ Persistent Volume Claims
{{- printf "%s-ray-conda-data-volume" (include "core.fullname" .) -}}
{{- end -}}

{{- define "core.registryDataVolume" -}}
{{- printf "%s-registry-data-volume" (include "core.fullname" .) -}}
{{- end -}}

{{- define "core.databaseDataVolume" -}}
{{- printf "%s-database-data-volume" (include "core.fullname" .) -}}
{{- end -}}
Expand Down
3 changes: 3 additions & 0 deletions charts/core/templates/model-backend/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,6 @@ data:
openfga:
host: {{ template "core.openfga" . }}
port: 8080
registry:
host: {{ template "core.registry" . }}
port: {{ template "core.registry.port" . }}
31 changes: 31 additions & 0 deletions charts/core/templates/pvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,37 @@ spec:
{{- end }}
{{- end }}
{{- end }}
{{- $registry := .Values.persistence.persistentVolumeClaim.registry -}}
{{- if not $registry.existingClaim }}
---
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: {{ template "core.registryDataVolume" . }}
annotations:
{{- range $key, $value := $registry.annotations }}
{{ $key }}: {{ $value | quote }}
{{- end }}
{{- if eq .Values.persistence.resourcePolicy "keep" }}
helm.sh/resource-policy: keep
{{- end }}
labels:
{{- include "core.labels" . | nindent 4 }}
app.kubernetes.io/component: registry
spec:
accessModes:
- {{ $registry.accessMode }}
resources:
requests:
storage: {{ $registry.size }}
{{- if $registry.storageClass }}
{{- if eq "-" $registry.storageClass }}
storageClassName: ""
{{- else }}
storageClassName: {{ $registry.storageClass }}
{{- end }}
{{- end }}
{{- end }}
{{- $database := .Values.persistence.persistentVolumeClaim.database -}}
{{- if and (not $database.existingClaim) .Values.database.enabled }}
---
Expand Down
58 changes: 58 additions & 0 deletions charts/core/templates/ray-service/ray-service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,24 @@ spec:
items:
- key: cp_conda_env.sh
path: cp_conda_env.sh
- name: podman-configmap
configMap:
name: podman
defaultMode: 0666
items:
- key: registries.conf
path: registries.conf
- key: policy.json
path: policy.json
- key: storage.conf
path: storage.conf
containers:
- name: ray-head
image: {{ .Values.rayService.image.repository }}:{{ .Values.rayService.image.tag }}
securityContext:
# for mounting /dev/fuse
# TODO: maybe implement a fuse-device-plugin-daemonset
privileged: true
imagePullPolicy: Always
{{- if .Values.rayService.headGroupSpec.resources }}
resources:
Expand All @@ -70,11 +85,15 @@ spec:
value: http://core-grafana:80
- name: RAY_PROMETHEUS_HOST
value: http://core-prometheus:9090
- name: RAY_worker_register_timeout_seconds
value: "360"
volumeMounts:
- mountPath: /ray-conda-pack
name: ray-conda
- mountPath: /home/ray/script
name: cp-conda-env-configmap
- mountPath: /etc/containers/
name: podman-configmap
ports:
- containerPort: 6379
name: gcs-server
Expand Down Expand Up @@ -134,6 +153,15 @@ spec:
items:
- key: start_ray_serve.sh
path: start_ray_serve.sh
- name: podman-configmap
configMap:
name: podman
defaultMode: 0666
items:
- key: registries.conf
path: registries.conf
- key: policy.json
path: policy.json
containers:
- name: ray-worker
image: {{ $.Values.rayService.image.repository }}:{{ $.Values.rayService.image.tag }}
Expand Down Expand Up @@ -162,6 +190,8 @@ spec:
name: start-ray-serve-configmap
- mountPath: /model-repository
name: model-repository
- mountPath: /etc/containers/
name: podman-configmap
{{- end }}
---
apiVersion: v1
Expand Down Expand Up @@ -212,3 +242,31 @@ data:
serve start --http-host=0.0.0.0 --grpc-port 9000 --grpc-servicer-functions ray_pb2_grpc.add_RayServiceServicer_to_server

echo "INFO: Start ray serve"
---
apiVersion: v1
kind: ConfigMap
metadata:
name: podman
data:
registries.conf: |
unqualified-search-registries = ["{{ template "core.registry" . }}:{{ template "core.registry.port" . }}", "docker.io", "quay.io"]

[[registry]]
location = "{{ template "core.registry" . }}:{{ template "core.registry.port" . }}"
insecure = true
policy.json: |
{
"default": [
{
"type": "insecureAcceptAnything"
}
],
"transports": {
"docker-daemon": {
"": [{ "type": "insecureAcceptAnything" }]
}
}
}
storage.conf: |
[storage]
driver = "overlay"
28 changes: 28 additions & 0 deletions charts/core/templates/registry/configmap.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "core.registry" . }}
labels:
{{- include "core.labels" . | nindent 4 }}
app.kubernetes.io/component: registry
data:
config.yaml: |+
version: {{ .Values.registry.config.version }}
log:
{{- toYaml .Values.registry.config.log | nindent 6 }}
storage:
{{- toYaml .Values.registry.config.storage | nindent 6 }}
http:
{{- toYaml .Values.registry.config.http | nindent 6 }}
redis:
{{- toYaml .Values.registry.config.redis | nindent 6 }}
health:
storagedriver:
enabled: true
interval: 10s
threshold: 3
tcp:
- addr: {{ template "core.redis.addr" . }}
timeout: 3s
interval: 30s
threshold: 3
70 changes: 70 additions & 0 deletions charts/core/templates/registry/cronjob.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{{- if .Values.registry.garbageCollect.enabled }}
apiVersion: batch/v1
kind: CronJob
metadata:
name: {{ template "core.registry" . }}-garbage-collector
labels:
{{- include "core.labels" . | nindent 4 }}
app.kubernetes.io/component: registry
spec:
concurrencyPolicy: Forbid
schedule: {{ .Values.registry.garbageCollect.schedule | quote }}
jobTemplate:
metadata:
labels:
{{- include "core.matchLabels" . | nindent 8 }}
app.kubernetes.io/component: registry
annotations:
checksum/config: {{ include (print $.Template.BasePath "/registry/configmap.yaml") . | sha256sum }}
{{- with .Values.registry.podAnnotations }}
{{- toYaml . | nindent 8 }}
{{- end }}
spec:
template:
spec:
{{- if .Values.registry.serviceAccountName }}
serviceAccountName: {{ .Values.registry.serviceAccountName }}
{{- end }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 12 }}
{{- end }}
securityContext:
runAsUser: 65534
runAsGroup: 65534
fsGroup: 65534
containers:
- name: garbage-collect
image: {{ .Values.registry.image.repository }}:{{ .Values.registry.image.tag }}
imagePullPolicy: {{ .Values.registry.image.pullPolicy }}
command:
- /bin/registry
- garbage-collect
- --delete-untagged={{ .Values.registry.garbageCollect.deleteUntagged }}
- /etc/docker/registry/config.yml
securityContext:
runAsUser: 65534
runAsGroup: 65534
fsGroup: 65534
volumeMounts:
- name: config
mountPath: {{ .Values.registry.configPath }}
subPath: config.yaml
restartPolicy: OnFailure
{{- with .Values.registry.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.registry.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.registry.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: config
configMap:
name: {{ template "core.registry" . }}
{{- end }}
Loading
Loading