From f3ca387e5403cba3c1d58623dacd73a1f02854f2 Mon Sep 17 00:00:00 2001 From: Ashish Singh Dev Date: Mon, 12 Jun 2023 09:15:47 +0530 Subject: [PATCH] fix gce-pd-csi driver (#10208) * fix gce-pd-csi driver * fixed, 1. reading replicas value from defaults.yml, and 2. corrected gcp-pd-csi driver version in README.md --- README.md | 2 +- roles/download/defaults/main.yml | 2 +- .../csi_driver/gcp_pd/tasks/main.yml | 2 + .../templates/gcp-pd-csi-controller.yml.j2 | 100 +++++++++++- .../gcp_pd/templates/gcp-pd-csi-node.yml.j2 | 3 +- .../templates/gcp-pd-csi-sc-regional.yml.j2 | 9 ++ .../templates/gcp-pd-csi-sc-zonal.yml.j2 | 8 + .../gcp_pd/templates/gcp-pd-csi-setup.yml.j2 | 147 ++++++++++++++---- 8 files changed, 237 insertions(+), 36 deletions(-) create mode 100644 roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2 create mode 100644 roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2 diff --git a/README.md b/README.md index 6c110f20bed..cea84d04b61 100644 --- a/README.md +++ b/README.md @@ -191,7 +191,7 @@ Note: Upstart/SysV init based OS types are not supported. - [aws-ebs-csi-plugin](https://github.com/kubernetes-sigs/aws-ebs-csi-driver) v0.5.0 - [azure-csi-plugin](https://github.com/kubernetes-sigs/azuredisk-csi-driver) v1.10.0 - [cinder-csi-plugin](https://github.com/kubernetes/cloud-provider-openstack/blob/master/docs/cinder-csi-plugin/using-cinder-csi-plugin.md) v1.22.0 - - [gcp-pd-csi-plugin](https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver) v1.4.0 + - [gcp-pd-csi-plugin](https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver) v1.9.2 - [local-path-provisioner](https://github.com/rancher/local-path-provisioner) v0.0.23 - [local-volume-provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) v2.5.0 diff --git a/roles/download/defaults/main.yml b/roles/download/defaults/main.yml index f07110a87b7..fb75c2036e8 100644 --- a/roles/download/defaults/main.yml +++ b/roles/download/defaults/main.yml @@ -1107,7 +1107,7 @@ aws_ebs_csi_plugin_version: "v0.5.0" aws_ebs_csi_plugin_image_repo: "{{ docker_image_repo }}/amazon/aws-ebs-csi-driver" aws_ebs_csi_plugin_image_tag: "{{ aws_ebs_csi_plugin_version }}" -gcp_pd_csi_plugin_version: "v1.4.0" +gcp_pd_csi_plugin_version: "v1.9.2" gcp_pd_csi_plugin_image_repo: "{{ kube_image_repo }}/cloud-provider-gcp/gcp-compute-persistent-disk-csi-driver" gcp_pd_csi_plugin_image_tag: "{{ gcp_pd_csi_plugin_version }}" diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml b/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml index 59a99f74ed7..be511caa457 100644 --- a/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/tasks/main.yml @@ -28,6 +28,8 @@ - {name: gcp-pd-csi-setup, file: gcp-pd-csi-setup.yml} - {name: gcp-pd-csi-controller, file: gcp-pd-csi-controller.yml} - {name: gcp-pd-csi-node, file: gcp-pd-csi-node.yml} + - {name: gcp-pd-csi-sc-regional, file: gcp-pd-csi-sc-regional.yml} + - {name: gcp-pd-csi-sc-zonal, file: gcp-pd-csi-sc-zonal.yml} register: gcp_pd_csi_manifests when: inventory_hostname == groups['kube_control_plane'][0] diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2 index 4762093dc1e..61157d8fc60 100644 --- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2 +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-controller.yml.j2 @@ -1,10 +1,9 @@ -kind: StatefulSet +kind: Deployment apiVersion: apps/v1 metadata: name: csi-gce-pd-controller namespace: kube-system spec: - serviceName: "csi-gce-pd" replicas: {{ gcp_pd_csi_controller_replicas }} selector: matchLabels: @@ -19,6 +18,8 @@ spec: # this requirement when issue is resolved and before any exposure of # metrics ports hostNetwork: true + nodeSelector: + kubernetes.io/os: linux serviceAccountName: csi-gce-pd-controller-sa priorityClassName: csi-gce-pd-controller containers: @@ -28,9 +29,32 @@ spec: - "--v=5" - "--csi-address=/csi/csi.sock" - "--feature-gates=Topology=true" + - "--http-endpoint=:22011" + - "--leader-election-namespace=$(PDCSI_NAMESPACE)" + - "--timeout=250s" + - "--extra-create-metadata" + # - "--run-controller-service=false" # disable the controller service of the CSI driver + # - "--run-node-service=false" # disable the node service of the CSI driver + - "--leader-election" - "--default-fstype=ext4" - # - "--run-controller-service=false" # disable the controller service of the CSI driver - # - "--run-node-service=false" # disable the node service of the CSI driver + - "--controller-publish-readonly=true" + env: + - name: PDCSI_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - containerPort: 22011 + name: http-endpoint + protocol: TCP + livenessProbe: + failureThreshold: 1 + httpGet: + path: /healthz/leader-election + port: http-endpoint + initialDelaySeconds: 10 + timeoutSeconds: 10 + periodSeconds: 20 volumeMounts: - name: socket-dir mountPath: /csi @@ -39,6 +63,27 @@ spec: args: - "--v=5" - "--csi-address=/csi/csi.sock" + - "--http-endpoint=:22012" + - "--leader-election" + - "--leader-election-namespace=$(PDCSI_NAMESPACE)" + - "--timeout=250s" + env: + - name: PDCSI_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - containerPort: 22012 + name: http-endpoint + protocol: TCP + livenessProbe: + failureThreshold: 1 + httpGet: + path: /healthz/leader-election + port: http-endpoint + initialDelaySeconds: 10 + timeoutSeconds: 10 + periodSeconds: 20 volumeMounts: - name: socket-dir mountPath: /csi @@ -47,6 +92,44 @@ spec: args: - "--v=5" - "--csi-address=/csi/csi.sock" + - "--http-endpoint=:22013" + - "--leader-election" + - "--leader-election-namespace=$(PDCSI_NAMESPACE)" + - "--handle-volume-inuse-error=false" + env: + - name: PDCSI_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + ports: + - containerPort: 22013 + name: http-endpoint + protocol: TCP + livenessProbe: + failureThreshold: 1 + httpGet: + path: /healthz/leader-election + port: http-endpoint + initialDelaySeconds: 10 + timeoutSeconds: 10 + periodSeconds: 20 + volumeMounts: + - name: socket-dir + mountPath: /csi + - name: csi-snapshotter + image: {{ csi_snapshotter_image_repo }}:{{ csi_snapshotter_image_tag }} + args: + - "--v=5" + - "--csi-address=/csi/csi.sock" + - "--metrics-address=:22014" + - "--leader-election" + - "--leader-election-namespace=$(PDCSI_NAMESPACE)" + - "--timeout=300s" + env: + - name: PDCSI_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace volumeMounts: - name: socket-dir mountPath: /csi @@ -72,4 +155,11 @@ spec: - name: cloud-sa-volume secret: secretName: cloud-sa - volumeClaimTemplates: [] +--- +apiVersion: storage.k8s.io/v1 +kind: CSIDriver +metadata: + name: pd.csi.storage.gke.io +spec: + attachRequired: true + podInfoOnMount: false \ No newline at end of file diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2 index 204ff972e75..9aad6206936 100644 --- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2 +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-node.yml.j2 @@ -49,6 +49,7 @@ spec: args: - "--v=5" - "--endpoint=unix:/csi/csi.sock" + - "--run-controller-service=false" volumeMounts: - name: kubelet-dir mountPath: /var/lib/kubelet @@ -108,4 +109,4 @@ spec: # See "special case". This will tolerate everything. Node component should # be scheduled on all nodes. tolerations: - - operator: Exists + - operator: Exists \ No newline at end of file diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2 new file mode 100644 index 00000000000..57a8675e451 --- /dev/null +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-regional.yml.j2 @@ -0,0 +1,9 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: csi-gce-pd-regional +provisioner: pd.csi.storage.gke.io +parameters: + type: pd-balanced + replication-type: regional-pd +volumeBindingMode: WaitForFirstConsumer \ No newline at end of file diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2 new file mode 100644 index 00000000000..e9bedaf83c5 --- /dev/null +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-sc-zonal.yml.j2 @@ -0,0 +1,8 @@ +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: csi-gce-pd-zonal +provisioner: pd.csi.storage.gke.io +parameters: + type: pd-balanced +volumeBindingMode: WaitForFirstConsumer \ No newline at end of file diff --git a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2 b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2 index 4c693b3fded..610baf33b71 100644 --- a/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2 +++ b/roles/kubernetes-apps/csi_driver/gcp_pd/templates/gcp-pd-csi-setup.yml.j2 @@ -38,8 +38,21 @@ rules: - apiGroups: [""] resources: ["nodes"] verbs: ["get", "list", "watch"] - + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshots"] + verbs: ["get", "list"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["get", "list"] + # Access to volumeattachments is only needed when the CSI driver + # has the PUBLISH_UNPUBLISH_VOLUME controller capability. + # In that case, external-provisioner will watch volumeattachments + # to determine when it is safe to delete a volume. + - apiGroups: ["storage.k8s.io"] + resources: ["volumeattachments"] + verbs: ["get", "list", "watch"] --- + kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: @@ -130,6 +143,10 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["list", "watch", "create", "update", "patch"] + # If handle-volume-inuse-error=true, the pod specific rbac is needed + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "watch"] --- kind: ClusterRoleBinding @@ -144,34 +161,30 @@ roleRef: kind: ClusterRole name: csi-gce-pd-resizer-role apiGroup: rbac.authorization.k8s.io - --- -apiVersion: policy/v1beta1 -kind: PodSecurityPolicy -metadata: - name: csi-gce-pd-node-psp -spec: - seLinux: - rule: RunAsAny - supplementalGroups: - rule: RunAsAny - runAsUser: - rule: RunAsAny - fsGroup: - rule: RunAsAny - privileged: true - volumes: - - '*' - hostNetwork: true - allowedHostPaths: - - pathPrefix: "/var/lib/kubelet/plugins_registry/" - - pathPrefix: "/var/lib/kubelet" - - pathPrefix: "/var/lib/kubelet/plugins/pd.csi.storage.gke.io/" - - pathPrefix: "/dev" - - pathPrefix: "/etc/udev" - - pathPrefix: "/lib/udev" - - pathPrefix: "/run/udev" - - pathPrefix: "/sys" +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-gce-pd-controller-deploy +rules: + - apiGroups: ["policy"] + resources: ["podsecuritypolicies"] + verbs: ["use"] + resourceNames: + - csi-gce-pd-controller-psp +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: csi-gce-pd-controller-deploy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: csi-gce-pd-controller-deploy +subjects: + - kind: ServiceAccount + name: csi-gce-pd-controller-sa + namespace: kube-system --- kind: ClusterRole @@ -198,3 +211,81 @@ subjects: - kind: ServiceAccount name: csi-gce-pd-node-sa namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: csi-gce-pd-controller +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: csi-gce-pd-node-deploy +subjects: +- kind: ServiceAccount + name: csi-gce-pd-controller-sa + namespace: kube-system + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: csi-gce-pd-snapshotter-role +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["list", "watch", "create", "update", "patch"] + # Secrets resource omitted since GCE PD snapshots does not require them + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotclasses"] + verbs: ["get", "list", "watch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents"] + verbs: ["create", "get", "list", "watch", "update", "delete", "patch"] + - apiGroups: ["snapshot.storage.k8s.io"] + resources: ["volumesnapshotcontents/status"] + verbs: ["update", "patch"] +--- + +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-gce-pd-controller-snapshotter-binding +subjects: + - kind: ServiceAccount + name: csi-gce-pd-controller-sa + namespace: kube-system +roleRef: + kind: ClusterRole + name: csi-gce-pd-snapshotter-role + apiGroup: rbac.authorization.k8s.io +--- + +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-gce-pd-leaderelection-role + namespace: kube-system + labels: + k8s-app: gcp-compute-persistent-disk-csi-driver +rules: +- apiGroups: ["coordination.k8s.io"] + resources: ["leases"] + verbs: ["get", "watch", "list", "delete", "update", "create"] + +--- + +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: csi-gce-pd-controller-leaderelection-binding + namespace: kube-system + labels: + k8s-app: gcp-compute-persistent-disk-csi-driver +subjects: +- kind: ServiceAccount + name: csi-gce-pd-controller-sa + namespace: kube-system +roleRef: + kind: Role + name: csi-gce-pd-leaderelection-role + apiGroup: rbac.authorization.k8s.io \ No newline at end of file