diff --git a/class/defaults.yml b/class/defaults.yml index 5177b2c..de60fb9 100644 --- a/class/defaults.yml +++ b/class/defaults.yml @@ -1,4 +1,54 @@ parameters: driver_amdgpu: =_metadata: {} - namespace: syn-driver-amdgpu + + namespace: + annotations: {} + labels: + pod-security.kubernetes.io/warn: privileged + pod-security.kubernetes.io/enforce: privileged + name: syn-driver-amdgpu + + charts: + amd_gpu: + source: https://rocm.github.io/k8s-device-plugin/ + version: 0.12.0 + feature_discovery: + source: https://kubernetes-sigs.github.io/node-feature-discovery/charts + version: 0.15.1 + + tolerations: + - key: CriticalAddonsOnly + operator: Exists + + nodeSelector: + kubernetes.io/arch: amd64 + + discovery: + enabled: false + config: + core: + sources: + - custom + sources: + custom: + - name: amd-gpu + labels: + feature.node.kubernetes.io/gpu: amd + matchFeatures: + - feature: pci.device + matchExpressions: + vendor: {op: In, value: ["1002"]} + class: {op: In, value: ["0300"]} + ## AMD Cezanne / Vega Integrated GPU + # device: {op: In, value: ["1638"]} + + helmValues: + amdgpu: + tolerations: ${driver_amdgpu:tolerations} + node_selector: ${driver_amdgpu:nodeSelector} + + discovery: + fullnameOverride: feature-discovery + worker: + config: ${driver_amdgpu:discovery:config} diff --git a/class/driver-amdgpu.yml b/class/driver-amdgpu.yml index 954ba3d..9e5adac 100644 --- a/class/driver-amdgpu.yml +++ b/class/driver-amdgpu.yml @@ -1,5 +1,33 @@ parameters: + =_config: + discovery: + 'True': + input_paths: + - ${_base_directory}/helmcharts/node-feature-discovery/${driver_amdgpu:charts:feature_discovery:version} + input_type: helm + output_path: driver-amdgpu//10_helmchart + helm_values: ${driver_amdgpu:helmValues:discovery} + helm_params: + name: feature-discovery + namespace: ${driver_amdgpu:namespace:name} + 'False': + input_paths: [] + input_type: jsonnet + output_path: '' + kapitan: + dependencies: + - type: helm + source: ${driver_amdgpu:charts:feature_discovery:source} + chart_name: node-feature-discovery + version: ${driver_amdgpu:charts:feature_discovery:version} + output_path: ${_base_directory}/helmcharts/node-feature-discovery/${driver_amdgpu:charts:feature_discovery:version}/ + - type: helm + source: ${driver_amdgpu:charts:amd_gpu:source} + chart_name: amd-gpu + version: ${driver_amdgpu:charts:amd_gpu:version} + output_path: ${_base_directory}/helmcharts/amd-gpu/${driver_amdgpu:charts:amd_gpu:version}/ + compile: - input_paths: - ${_base_directory}/component/app.jsonnet @@ -9,3 +37,13 @@ parameters: - ${_base_directory}/component/main.jsonnet input_type: jsonnet output_path: driver-amdgpu/ + # Helmchart + - ${_config:discovery:${driver_amdgpu:discovery:enabled}} + - input_paths: + - ${_base_directory}/helmcharts/amd-gpu/${driver_amdgpu:charts:amd_gpu:version} + input_type: helm + output_path: driver-amdgpu//10_helmchart + helm_values: ${driver_amdgpu:helmValues:amdgpu} + helm_params: + name: amd-gpu + namespace: ${driver_amdgpu:namespace:name} diff --git a/component/app.jsonnet b/component/app.jsonnet index 11ba3f7..d2a665d 100644 --- a/component/app.jsonnet +++ b/component/app.jsonnet @@ -3,7 +3,7 @@ local inv = kap.inventory(); local params = inv.parameters.driver_amdgpu; local argocd = import 'lib/argocd.libjsonnet'; -local app = argocd.App('driver-amdgpu', params.namespace); +local app = argocd.App('driver-amdgpu', params.namespace.name); { 'driver-amdgpu': app, diff --git a/component/main.jsonnet b/component/main.jsonnet index da0c2b1..cb12b16 100644 --- a/component/main.jsonnet +++ b/component/main.jsonnet @@ -5,6 +5,14 @@ local inv = kap.inventory(); // The hiera parameters for the component local params = inv.parameters.driver_amdgpu; +local namespace = kube.Namespace(params.namespace.name) { + metadata+: { + annotations+: params.namespace.annotations, + labels+: params.namespace.labels, + }, +}; + // Define outputs below { + '00_namespace': namespace, } diff --git a/tests/golden/defaults/driver-amdgpu/driver-amdgpu/00_namespace.yaml b/tests/golden/defaults/driver-amdgpu/driver-amdgpu/00_namespace.yaml new file mode 100644 index 0000000..c196c19 --- /dev/null +++ b/tests/golden/defaults/driver-amdgpu/driver-amdgpu/00_namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + annotations: {} + labels: + name: syn-driver-amdgpu + pod-security.kubernetes.io/enforce: privileged + pod-security.kubernetes.io/warn: privileged + name: syn-driver-amdgpu diff --git a/tests/golden/defaults/driver-amdgpu/driver-amdgpu/10_helmchart/amd-gpu/templates/deviceplugin-daemonset.yaml b/tests/golden/defaults/driver-amdgpu/driver-amdgpu/10_helmchart/amd-gpu/templates/deviceplugin-daemonset.yaml new file mode 100644 index 0000000..15abc48 --- /dev/null +++ b/tests/golden/defaults/driver-amdgpu/driver-amdgpu/10_helmchart/amd-gpu/templates/deviceplugin-daemonset.yaml @@ -0,0 +1,39 @@ +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: amd-gpu-device-plugin-daemonset + namespace: kube-system +spec: + selector: + matchLabels: + name: amd-gpu-dp-ds + template: + metadata: + labels: + name: amd-gpu-dp-ds + spec: + containers: + - image: docker.io/rocm/k8s-device-plugin:1.25.2.7 + name: amd-gpu-dp-cntr + resources: {} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + volumeMounts: + - mountPath: /var/lib/kubelet/device-plugins + name: dp + - mountPath: /sys + name: sys + priorityClassName: system-node-critical + tolerations: + - key: CriticalAddonsOnly + operator: Exists + volumes: + - hostPath: + path: /var/lib/kubelet/device-plugins + name: dp + - hostPath: + path: /sys + name: sys