Skip to content

Commit

Permalink
Updated script
Browse files Browse the repository at this point in the history
  • Loading branch information
Tom Augspurger committed Oct 31, 2023
1 parent a394a20 commit 677f14f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 22 deletions.
41 changes: 41 additions & 0 deletions scripts/nvidia-device-plugin-ds-staging.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: staging
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: nvidia-device-plugin-ds
spec:
# Use a nodeSelector to only schedule on GPU pods
nodeSelector:
accelerator: nvidia
tolerations:
- key: CriticalAddonsOnly
operator: Exists
- key: "hub.jupyter.org_dedicated"
operator: "Equal"
value: "user"
effect: "NoSchedule"
priorityClassName: "system-node-critical"
containers:
- image: mcr.microsoft.com/oss/nvidia/k8s-device-plugin:v0.14.1
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
41 changes: 19 additions & 22 deletions scripts/nvidia-device-plugin-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,28 @@ spec:
labels:
name: nvidia-device-plugin-ds
spec:
priorityClassName: "system-cluster-critical"
# Use a nodeSelector to only schedule on GPU pods
nodeSelector:
accelerator: nvidia
tolerations:
# Allow this pod to be rescheduled while the node is in "critical add-ons only" mode.
# This, along with the annotation above marks this pod as a critical add-on.
- key: CriticalAddonsOnly
operator: Exists
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
- key: "sku"
operator: "Equal"
value: "gpu"
effect: "NoSchedule"
- key: CriticalAddonsOnly
operator: Exists
- key: "hub.jupyter.org_dedicated"
operator: "Equal"
value: "user"
effect: "NoSchedule"
priorityClassName: "system-node-critical"
containers:
- image: mcr.microsoft.com/oss/nvidia/k8s-device-plugin:v0.14.1
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
- image: mcr.microsoft.com/oss/nvidia/k8s-device-plugin:v0.14.1
name: nvidia-device-plugin-ctr
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins

0 comments on commit 677f14f

Please sign in to comment.