diff --git a/charts/castai-hosted-model/Chart.yaml b/charts/castai-hosted-model/Chart.yaml index 9d3cacc7..d086c010 100644 --- a/charts/castai-hosted-model/Chart.yaml +++ b/charts/castai-hosted-model/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: castai-hosted-model description: CAST AI hosted model deployment chart. type: application -version: 0.0.7 +version: 0.0.8 appVersion: "v0.0.1" dependencies: - name: ollama diff --git a/charts/castai-hosted-model/README.md b/charts/castai-hosted-model/README.md index 8d6964c2..eaa8fd7a 100644 --- a/charts/castai-hosted-model/README.md +++ b/charts/castai-hosted-model/README.md @@ -13,5 +13,11 @@ CAST AI hosted model deployment chart. | Key | Type | Default | Description | |-----|------|---------|-------------| -| ollama.enabled | bool | `true` | | -| vllm.enabled | bool | `false` | | \ No newline at end of file +| ollama.enabled | bool | `true` | Specifies if Ollama model should be deployed | +| placementJob.enabled | bool | `false` | Specifies if a node placement job should be deployed | +| placementJob.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| placementJob.image.repository | string | `"busybox"` | The image to use for the job | +| placementJob.image.tag | string | `"1.37.0"` | The image tag | +| placementJob.requiredGPUTotalMemoryMiB | string | `nil` | Total GPU memory MiB (GPU count * GPU memory MiB) of the node that should be provisioned for this job | +| placementJob.resources | object | `{}` | Resources for the job | +| vllm.enabled | bool | `false` | Specifies if vLLM model should be deployed | \ No newline at end of file diff --git a/charts/castai-hosted-model/templates/placement-job.yaml b/charts/castai-hosted-model/templates/placement-job.yaml new file mode 100644 index 00000000..4b2d7d11 --- /dev/null +++ b/charts/castai-hosted-model/templates/placement-job.yaml @@ -0,0 +1,55 @@ +{{- if .Values.placementJob.enabled }} +{{- $nodeTemplateName := required "placementJob.nodeTemplateName is required" .Values.placementJob.nodeTemplateName }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Release.Name }}-placement-job + labels: + app.kubernetes.io/name: {{ .Chart.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: placement-job +spec: + backoffLimit: 0 + template: + metadata: + labels: + app.kubernetes.io/name: {{ .Chart.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + app.kubernetes.io/component: placement-job + spec: + restartPolicy: Never + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nvidia.com/gpu.total-memory + operator: Gt + values: + - "{{ required "placementJob.requiredGPUTotalMemoryMiB is required" .Values.placementJob.requiredGPUTotalMemoryMiB }}" + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: scheduling.cast.ai/spot + operator: Exists + containers: + - name: placement-job + image: "{{ .Values.placementJob.image.repository }}:{{ .Values.placementJob.image.tag }}" + imagePullPolicy: {{ .Values.placementJob.image.pullPolicy | quote }} + command: ["/bin/sh", "-c", "echo Node placement job finished."] + resources: + {{- toYaml .Values.routerResources | nindent 12 }} + nodeSelector: + scheduling.cast.ai/node-template: "{{ $nodeTemplateName }}" + tolerations: + - key: scheduling.cast.ai/node-template + value: "{{ $nodeTemplateName }}" + operator: Equal + effect: NoSchedule + - key: scheduling.cast.ai/spot + operator: Exists + - key: nvidia.com/gpu + effect: NoSchedule + operator: Exists +{{- end }} \ No newline at end of file diff --git a/charts/castai-hosted-model/values.yaml b/charts/castai-hosted-model/values.yaml index 5ea442c4..b9aafcfd 100644 --- a/charts/castai-hosted-model/values.yaml +++ b/charts/castai-hosted-model/values.yaml @@ -1,4 +1,23 @@ ollama: + # -- Specifies if Ollama model should be deployed enabled: true vllm: + # -- Specifies if vLLM model should be deployed enabled: false +placementJob: + # -- Specifies if a node placement job should be deployed + enabled: false + + # -- Total GPU memory MiB (GPU count * GPU memory MiB) of the node that should be provisioned for this job + requiredGPUTotalMemoryMiB: + + image: + # -- The image to use for the job + repository: busybox + # -- The image tag + tag: "1.37.0" + # -- Image pull policy + pullPolicy: IfNotPresent + + # -- Resources for the job + resources: {} \ No newline at end of file