From 51720650b6b57be3e9eb80e6b4430d21fa74031d Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:24:04 -0400 Subject: [PATCH 01/13] decommission - Refactoring/rephrasing/renaming --- .../db.orange.com_cassandraclusters_crd.yaml | 3082 +++++++++-------- .../db/v1alpha1/cassandracluster_types.go | 25 +- .../cassandracluster/cassandra_status.go | 82 +- .../cassandracluster/cassandra_status_test.go | 14 +- .../cassandracluster_controller.go | 3 +- .../cassandracluster/node_operations_test.go | 34 +- .../cassandracluster/pod_operation.go | 40 +- pkg/controller/cassandracluster/reconcile.go | 28 +- .../cassandracluster/reconcile_test.go | 36 +- .../cassandracluster/statefulset.go | 27 +- 10 files changed, 1698 insertions(+), 1673 deletions(-) diff --git a/deploy/crds/db.orange.com_cassandraclusters_crd.yaml b/deploy/crds/db.orange.com_cassandraclusters_crd.yaml index f2abdd546..dbcce0d1b 100644 --- a/deploy/crds/db.orange.com_cassandraclusters_crd.yaml +++ b/deploy/crds/db.orange.com_cassandraclusters_crd.yaml @@ -1,4 +1,4 @@ -apiVersion: apiextensions.k8s.io/v1beta1 +apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: name: cassandraclusters.db.orange.com @@ -10,1623 +10,1667 @@ spec: plural: cassandraclusters singular: cassandracluster scope: Namespaced - validation: - openAPIV3Schema: - description: CassandraCluster is the Schema for the cassandraclusters API - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - metadata: - type: object - spec: - properties: - autoPilot: - description: AutoPilot defines if the Operator can fly alone or if we - need human action to trigger Actions on specific Cassandra nodes If - autoPilot=true, the operator will set labels pod-operation-status=To-Do - on Pods which allows him to automatically triggers Action If autoPilot=false, - the operator will set labels pod-operation-status=Manual on Pods which - won't automatically triggers Action - type: boolean - autoUpdateSeedList: - description: AutoUpdateSeedList defines if the Operator automatically - update the SeedList according to new cluster CRD topology by default - a boolean is false - type: boolean - bootstrapImage: - description: 'Image used for bootstrapping cluster (use the form : base:version)' - type: string - cassandraImage: - description: Image + version to use for Cassandra - type: string - configMapName: - description: Name of the ConfigMap for Cassandra configuration (cassandra.yaml) - If this is empty, operator will uses default cassandra.yaml from the - baseImage If this is not empty, operator will uses the cassandra.yaml - from the Configmap instead - type: string - dataCapacity: - description: Define the Capacity for Persistent Volume Claims in the - local storage - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ - type: string - dataStorageClass: - description: Define StorageClass for Persistent Volume Claims in the - local storage. - type: string - debug: - description: Debug is used to surcharge Cassandra pod command to not - directly start cassandra but starts an infinite wait to allow user - to connect a bash into the pod to make some diagnoses. - type: boolean - deletePVC: - description: DeletePVC defines if the PVC must be deleted when the cluster - is deleted it is false by default - type: boolean - gcStdout: - description: 'GCStdout set the parameter CASSANDRA_GC_STDOUT which configure - the JVM -Xloggc: true by default' - type: boolean - hardAntiAffinity: - description: HardAntiAffinity defines if the PodAntiAffinity of the - statefulset has to be hard (it's soft by default) - type: boolean - imageJolokiaSecret: - description: JMX Secret if Set is used to set JMX_USER and JMX_PASSWORD - properties: - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid?' - type: string - type: object - imagePullSecret: - description: Name of the secret to uses to authenticate on Docker registries - If this is empty, operator do nothing If this is not empty, propagate - the imagePullSecrets to the statefulsets - properties: - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid?' - type: string - type: object - imagepullpolicy: - description: ImagePullPolicy define the pull policy for C* docker image - type: string - initContainerCmd: - description: Command to execute in the initContainer in the targeted - image - type: string - initContainerImage: - description: 'Image used in the initContainer (use the form : base:version)' - type: string - livenessFailureThreshold: - description: 'LivenessFailureThreshold defines failure threshold for - the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - livenessHealthCheckPeriod: - description: 'LivenessHealthCheckPeriod defines health check period - for the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - livenessHealthCheckTimeout: - description: 'LivenessHealthCheckTimeout defines health check timeout - for the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - livenessInitialDelaySeconds: - description: 'LivenessInitialDelaySeconds defines initial delay for - the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - livenessSuccessThreshold: - description: 'LivenessSuccessThreshold defines success threshold for - the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - maxPodUnavailable: - format: int32 - type: integer - noCheckStsAreEqual: - type: boolean - nodesPerRacks: - description: 'Number of nodes to deploy for a Cassandra deployment in - each Racks. Default: 1. If NodesPerRacks = 2 and there is 3 racks, - the cluster will have 6 Cassandra Nodes' - format: int32 - type: integer - pod: - description: PodPolicy defines the policy for pods owned by CassKop - operator. - properties: - annotations: - additionalProperties: + versions: + - name: v1alpha1 + schema: + openAPIV3Schema: + description: CassandraCluster is the Schema for the cassandraclusters API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + properties: + autoPilot: + description: AutoPilot defines if the Operator can fly alone or if + we need human action to trigger Actions on specific Cassandra nodes + If autoPilot=true, the operator will set labels pod-operation-status=To-Do + on Pods which allows him to automatically triggers Action If autoPilot=false, + the operator will set labels pod-operation-status=Manual on Pods + which won't automatically triggers Action + type: boolean + autoUpdateSeedList: + description: AutoUpdateSeedList defines if the Operator automatically + update the SeedList according to new cluster CRD topology by default + a boolean is false + type: boolean + bootstrapImage: + description: 'Image used for bootstrapping cluster (use the form : + base:version)' + type: string + cassandraImage: + description: Image + version to use for Cassandra + type: string + configMapName: + description: Name of the ConfigMap for Cassandra configuration (cassandra.yaml) + If this is empty, operator will uses default cassandra.yaml from + the baseImage If this is not empty, operator will uses the cassandra.yaml + from the Configmap instead + type: string + dataCapacity: + description: Define the Capacity for Persistent Volume Claims in the + local storage + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ + type: string + dataStorageClass: + description: Define StorageClass for Persistent Volume Claims in the + local storage. + type: string + debug: + description: Debug is used to surcharge Cassandra pod command to not + directly start cassandra but starts an infinite wait to allow user + to connect a bash into the pod to make some diagnoses. + type: boolean + deletePVC: + description: DeletePVC defines if the PVC must be deleted when the + cluster is deleted it is false by default + type: boolean + gcStdout: + description: 'GCStdout set the parameter CASSANDRA_GC_STDOUT which + configure the JVM -Xloggc: true by default' + type: boolean + hardAntiAffinity: + description: HardAntiAffinity defines if the PodAntiAffinity of the + statefulset has to be hard (it's soft by default) + type: boolean + imageJolokiaSecret: + description: JMX Secret if Set is used to set JMX_USER and JMX_PASSWORD + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid?' type: string - description: Annotations specifies the annotations to attach to - headless service the CassKop operator creates - type: object - tolerations: - description: Tolerations specifies the tolerations to attach to - the pods the CassKop operator creates - items: - description: The pod this Toleration is attached to tolerates - any taint that matches the triple using the - matching operator . + type: object + imagePullSecret: + description: Name of the secret to uses to authenticate on Docker + registries If this is empty, operator do nothing If this is not + empty, propagate the imagePullSecrets to the statefulsets + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, uid?' + type: string + type: object + imagepullpolicy: + description: ImagePullPolicy define the pull policy for C* docker + image + type: string + initContainerCmd: + description: Command to execute in the initContainer in the targeted + image + type: string + initContainerImage: + description: 'Image used in the initContainer (use the form : base:version)' + type: string + livenessFailureThreshold: + description: 'LivenessFailureThreshold defines failure threshold for + the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + livenessHealthCheckPeriod: + description: 'LivenessHealthCheckPeriod defines health check period + for the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + livenessHealthCheckTimeout: + description: 'LivenessHealthCheckTimeout defines health check timeout + for the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + livenessInitialDelaySeconds: + description: 'LivenessInitialDelaySeconds defines initial delay for + the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + livenessSuccessThreshold: + description: 'LivenessSuccessThreshold defines success threshold for + the liveness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + maxPodUnavailable: + format: int32 + type: integer + noCheckStsAreEqual: + type: boolean + nodesPerRacks: + description: 'Number of nodes to deploy for a Cassandra deployment + in each Racks. Default: 1. If NodesPerRacks = 2 and there is 3 racks, + the cluster will have 6 Cassandra Nodes' + format: int32 + type: integer + pod: + description: PodPolicy defines the policy for pods owned by CassKop + operator. + properties: + annotations: + additionalProperties: + type: string + description: Annotations specifies the annotations to attach to + headless service the CassKop operator creates + type: object + tolerations: + description: Tolerations specifies the tolerations to attach to + the pods the CassKop operator creates + items: + description: The pod this Toleration is attached to tolerates + any taint that matches the triple using + the matching operator . + properties: + effect: + description: Effect indicates the taint effect to match. + Empty means match all taint effects. When specified, allowed + values are NoSchedule, PreferNoSchedule and NoExecute. + type: string + key: + description: Key is the taint key that the toleration applies + to. Empty means match all taint keys. If the key is empty, + operator must be Exists; this combination means to match + all values and all keys. + type: string + operator: + description: Operator represents a key's relationship to + the value. Valid operators are Exists and Equal. Defaults + to Equal. Exists is equivalent to wildcard for value, + so that a pod can tolerate all taints of a particular + category. + type: string + tolerationSeconds: + description: TolerationSeconds represents the period of + time the toleration (which must be of effect NoExecute, + otherwise this field is ignored) tolerates the taint. + By default, it is not set, which means tolerate the taint + forever (do not evict). Zero and negative values will + be treated as 0 (evict immediately) by the system. + format: int64 + type: integer + value: + description: Value is the taint value the toleration matches + to. If the operator is Exists, the value should be empty, + otherwise just a regular string. + type: string + type: object + type: array + type: object + readOnlyRootFilesystem: + description: Make the pod as Readonly + type: boolean + readinessFailureThreshold: + description: 'ReadinessFailureThreshold defines failure threshold + for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + readinessHealthCheckPeriod: + description: 'ReadinessHealthCheckPeriod defines health check period + for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + readinessHealthCheckTimeout: + description: 'ReadinessHealthCheckTimeout defines health check timeout + for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + readinessInitialDelaySeconds: + description: 'ReadinessInitialDelaySeconds defines initial delay for + the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + readinessSuccessThreshold: + description: 'ReadinessSuccessThreshold defines success threshold + for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' + format: int32 + type: integer + resources: + description: Pod defines the policy for pods owned by cassandra operator. + This field cannot be updated once the CR is created. Pod *PodPolicy `json:"pod,omitempty"` + properties: + limits: + description: CPUAndMem defines how many cpu and ram the container + will request/limit properties: - effect: - description: Effect indicates the taint effect to match. Empty - means match all taint effects. When specified, allowed values - are NoSchedule, PreferNoSchedule and NoExecute. + cpu: + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ type: string - key: - description: Key is the taint key that the toleration applies - to. Empty means match all taint keys. If the key is empty, - operator must be Exists; this combination means to match - all values and all keys. + memory: + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ type: string - operator: - description: Operator represents a key's relationship to the - value. Valid operators are Exists and Equal. Defaults to - Equal. Exists is equivalent to wildcard for value, so that - a pod can tolerate all taints of a particular category. + required: + - cpu + - memory + type: object + requests: + description: CPUAndMem defines how many cpu and ram the container + will request/limit + properties: + cpu: + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ type: string - tolerationSeconds: - description: TolerationSeconds represents the period of time - the toleration (which must be of effect NoExecute, otherwise - this field is ignored) tolerates the taint. By default, - it is not set, which means tolerate the taint forever (do - not evict). Zero and negative values will be treated as - 0 (evict immediately) by the system. - format: int64 - type: integer - value: - description: Value is the taint value the toleration matches - to. If the operator is Exists, the value should be empty, - otherwise just a regular string. + memory: + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ type: string + required: + - cpu + - memory type: object - type: array - type: object - readOnlyRootFilesystem: - description: Make the pod as Readonly - type: boolean - readinessFailureThreshold: - description: 'ReadinessFailureThreshold defines failure threshold for - the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - readinessHealthCheckPeriod: - description: 'ReadinessHealthCheckPeriod defines health check period - for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - readinessHealthCheckTimeout: - description: 'ReadinessHealthCheckTimeout defines health check timeout - for the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - readinessInitialDelaySeconds: - description: 'ReadinessInitialDelaySeconds defines initial delay for - the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - readinessSuccessThreshold: - description: 'ReadinessSuccessThreshold defines success threshold for - the readiness probe of the main cassandra container : https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#configure-probes' - format: int32 - type: integer - resources: - description: Pod defines the policy for pods owned by cassandra operator. - This field cannot be updated once the CR is created. Pod *PodPolicy `json:"pod,omitempty"` - properties: - limits: - description: CPUAndMem defines how many cpu and ram the container - will request/limit - properties: - cpu: - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ - type: string - memory: - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ + type: object + restartCountBeforePodDeletion: + description: RestartCountBeforePodDeletion defines the number of restart + allowed for a cassandra container allowed before deleting the pod to + force its restart from scratch. if set to 0 or omit, no action will + be performed based on restart count. + format: int32 + type: integer + runAsUser: + description: RunAsUser define the id of the user to run in the Cassandra + image + format: int64 + minimum: 1 + type: integer + service: + description: PodPolicy defines the policy for headless service owned + by CassKop operator. + properties: + annotations: + additionalProperties: type: string - required: - - cpu - - memory - type: object - requests: - description: CPUAndMem defines how many cpu and ram the container - will request/limit + description: Annotations specifies the annotations to attach to + headless service the CassKop operator creates + type: object + type: object + shareProcessNamespace: + description: 'When process namespace sharing is enabled, processes + in a container are visible to all other containers in that pod. + https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/ + Optional: Default to false.' + type: boolean + sidecarConfigs: + description: SidecarsConfig defines additional sidecar configurations + items: + description: A single application container that you want to run + within a pod. properties: - cpu: - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ - type: string - memory: - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ - type: string - required: - - cpu - - memory - type: object - type: object - restartCountBeforePodDeletion: - description: RestartCountBeforePodDeletion defines the number of restart - allowed for a cassandra container allowed before deleting the pod to - force its restart from scratch. if set to 0 or omit, no action will - be performed based on restart count. - format: int32 - type: integer - runAsUser: - description: RunAsUser define the id of the user to run in the Cassandra - image - format: int64 - minimum: 1 - type: integer - service: - description: PodPolicy defines the policy for headless service owned - by CassKop operator. - properties: - annotations: - additionalProperties: - type: string - description: Annotations specifies the annotations to attach to - headless service the CassKop operator creates - type: object - type: object - shareProcessNamespace: - description: 'When process namespace sharing is enabled, processes in - a container are visible to all other containers in that pod. https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/ - Optional: Default to false.' - type: boolean - sidecarConfigs: - description: SidecarsConfig defines additional sidecar configurations - items: - description: A single application container that you want to run within - a pod. - properties: - args: - description: 'Arguments to the entrypoint. The docker image''s - CMD is used if this is not provided. Variable references $(VAR_NAME) - are expanded using the container''s environment. If a variable - cannot be resolved, the reference in the input string will be - unchanged. The $(VAR_NAME) syntax can be escaped with a double - $$, ie: $$(VAR_NAME). Escaped references will never be expanded, - regardless of whether the variable exists or not. Cannot be - updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' - items: + args: + description: 'Arguments to the entrypoint. The docker image''s + CMD is used if this is not provided. Variable references $(VAR_NAME) + are expanded using the container''s environment. If a variable + cannot be resolved, the reference in the input string will + be unchanged. The $(VAR_NAME) syntax can be escaped with a + double $$, ie: $$(VAR_NAME). Escaped references will never + be expanded, regardless of whether the variable exists or + not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + command: + description: 'Entrypoint array. Not executed within a shell. + The docker image''s ENTRYPOINT is used if this is not provided. + Variable references $(VAR_NAME) are expanded using the container''s + environment. If a variable cannot be resolved, the reference + in the input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped + references will never be expanded, regardless of whether the + variable exists or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' + items: + type: string + type: array + env: + description: List of environment variables to set in the container. + Cannot be updated. + items: + description: EnvVar represents an environment variable present + in a Container. + properties: + name: + description: Name of the environment variable. Must be + a C_IDENTIFIER. + type: string + value: + description: 'Variable references $(VAR_NAME) are expanded + using the previous defined environment variables in + the container and any service environment variables. + If a variable cannot be resolved, the reference in the + input string will be unchanged. The $(VAR_NAME) syntax + can be escaped with a double $$, ie: $$(VAR_NAME). Escaped + references will never be expanded, regardless of whether + the variable exists or not. Defaults to "".' + type: string + valueFrom: + description: Source for the environment variable's value. + Cannot be used if value is not empty. + properties: + configMapKeyRef: + description: Selects a key of a ConfigMap. + properties: + key: + description: The key to select. + type: string + name: + description: 'Name of the referent. More info: + https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the ConfigMap or + its key must be defined + type: boolean + required: + - key + type: object + fieldRef: + description: 'Selects a field of the pod: supports + metadata.name, metadata.namespace, metadata.labels, + metadata.annotations, spec.nodeName, spec.serviceAccountName, + status.hostIP, status.podIP, status.podIPs.' + properties: + apiVersion: + description: Version of the schema the FieldPath + is written in terms of, defaults to "v1". + type: string + fieldPath: + description: Path of the field to select in the + specified API version. + type: string + required: + - fieldPath + type: object + resourceFieldRef: + description: 'Selects a resource of the container: + only resources limits and requests (limits.cpu, + limits.memory, limits.ephemeral-storage, requests.cpu, + requests.memory and requests.ephemeral-storage) + are currently supported.' + properties: + containerName: + description: 'Container name: required for volumes, + optional for env vars' + type: string + divisor: + anyOf: + - type: integer + - type: string + description: Specifies the output format of the + exposed resources, defaults to "1" + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + resource: + description: 'Required: resource to select' + type: string + required: + - resource + type: object + secretKeyRef: + description: Selects a key of a secret in the pod's + namespace + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + description: 'Name of the referent. More info: + https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the Secret or its + key must be defined + type: boolean + required: + - key + type: object + type: object + required: + - name + type: object + type: array + envFrom: + description: List of sources to populate environment variables + in the container. The keys defined within a source must be + a C_IDENTIFIER. All invalid keys will be reported as an event + when the container is starting. When a key exists in multiple + sources, the value associated with the last source will take + precedence. Values defined by an Env with a duplicate key + will take precedence. Cannot be updated. + items: + description: EnvFromSource represents the source of a set + of ConfigMaps + properties: + configMapRef: + description: The ConfigMap to select from + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the ConfigMap must be + defined + type: boolean + type: object + prefix: + description: An optional identifier to prepend to each + key in the ConfigMap. Must be a C_IDENTIFIER. + type: string + secretRef: + description: The Secret to select from + properties: + name: + description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + TODO: Add other useful fields. apiVersion, kind, + uid?' + type: string + optional: + description: Specify whether the Secret must be defined + type: boolean + type: object + type: object + type: array + image: + description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images + This field is optional to allow higher level config management + to default or override container images in workload controllers + like Deployments and StatefulSets.' type: string - type: array - command: - description: 'Entrypoint array. Not executed within a shell. The - docker image''s ENTRYPOINT is used if this is not provided. - Variable references $(VAR_NAME) are expanded using the container''s - environment. If a variable cannot be resolved, the reference - in the input string will be unchanged. The $(VAR_NAME) syntax - can be escaped with a double $$, ie: $$(VAR_NAME). Escaped references - will never be expanded, regardless of whether the variable exists - or not. Cannot be updated. More info: https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#running-a-command-in-a-shell' - items: + imagePullPolicy: + description: 'Image pull policy. One of Always, Never, IfNotPresent. + Defaults to Always if :latest tag is specified, or IfNotPresent + otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' type: string - type: array - env: - description: List of environment variables to set in the container. - Cannot be updated. - items: - description: EnvVar represents an environment variable present - in a Container. + lifecycle: + description: Actions that the management system should take + in response to container lifecycle events. Cannot be updated. properties: - name: - description: Name of the environment variable. Must be a - C_IDENTIFIER. - type: string - value: - description: 'Variable references $(VAR_NAME) are expanded - using the previous defined environment variables in the - container and any service environment variables. If a - variable cannot be resolved, the reference in the input - string will be unchanged. The $(VAR_NAME) syntax can be - escaped with a double $$, ie: $$(VAR_NAME). Escaped references - will never be expanded, regardless of whether the variable - exists or not. Defaults to "".' - type: string - valueFrom: - description: Source for the environment variable's value. - Cannot be used if value is not empty. + postStart: + description: 'PostStart is called immediately after a container + is created. If the handler fails, the container is terminated + and restarted according to its restart policy. Other management + of the container blocks until the hook completes. More + info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' properties: - configMapKeyRef: - description: Selects a key of a ConfigMap. + exec: + description: One and only one of the following should + be specified. Exec specifies the action to take. properties: - key: - description: The key to select. + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's + filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions + ('|', etc) won't work. To use a shell, you need + to explicitly call out to that shell. Exit status + of 0 is treated as live/healthy and non-zero is + unhealthy. + items: + type: string + type: array + type: object + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in + httpHeaders instead. + type: string + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. type: string - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, - uid?' + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the + host. Defaults to HTTP. type: string - optional: - description: Specify whether the ConfigMap or its - key must be defined - type: boolean required: - - key + - port type: object - fieldRef: - description: 'Selects a field of the pod: supports metadata.name, - metadata.namespace, metadata.labels, metadata.annotations, - spec.nodeName, spec.serviceAccountName, status.hostIP, - status.podIP, status.podIPs.' + tcpSocket: + description: 'TCPSocket specifies an action involving + a TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' properties: - apiVersion: - description: Version of the schema the FieldPath - is written in terms of, defaults to "v1". - type: string - fieldPath: - description: Path of the field to select in the - specified API version. + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true required: - - fieldPath + - port + type: object + type: object + preStop: + description: 'PreStop is called immediately before a container + is terminated due to an API request or management event + such as liveness/startup probe failure, preemption, resource + contention, etc. The handler is not called if the container + crashes or exits. The reason for termination is passed + to the handler. The Pod''s termination grace period countdown + begins before the PreStop hooked is executed. Regardless + of the outcome of the handler, the container will eventually + terminate within the Pod''s termination grace period. + Other management of the container blocks until the hook + completes or until the termination grace period is reached. + More info: https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' + properties: + exec: + description: One and only one of the following should + be specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for + the command is root ('/') in the container's + filesystem. The command is simply exec'd, it is + not run inside a shell, so traditional shell instructions + ('|', etc) won't work. To use a shell, you need + to explicitly call out to that shell. Exit status + of 0 is treated as live/healthy and non-zero is + unhealthy. + items: + type: string + type: array type: object - resourceFieldRef: - description: 'Selects a resource of the container: only - resources limits and requests (limits.cpu, limits.memory, - limits.ephemeral-storage, requests.cpu, requests.memory - and requests.ephemeral-storage) are currently supported.' + httpGet: + description: HTTPGet specifies the http request to perform. properties: - containerName: - description: 'Container name: required for volumes, - optional for env vars' + host: + description: Host name to connect to, defaults to + the pod IP. You probably want to set "Host" in + httpHeaders instead. type: string - divisor: - description: Specifies the output format of the - exposed resources, defaults to "1" + httpHeaders: + description: Custom headers to set in the request. + HTTP allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. type: string - resource: - description: 'Required: resource to select' + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the + host. Defaults to HTTP. type: string required: - - resource + - port type: object - secretKeyRef: - description: Selects a key of a secret in the pod's - namespace + tcpSocket: + description: 'TCPSocket specifies an action involving + a TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' properties: - key: - description: The key of the secret to select from. Must - be a valid secret key. + host: + description: 'Optional: Host name to connect to, + defaults to the pod IP.' type: string - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, - uid?' - type: string - optional: - description: Specify whether the Secret or its key - must be defined - type: boolean + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access + on the container. Number must be in the range + 1 to 65535. Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true required: - - key + - port type: object type: object - required: - - name type: object - type: array - envFrom: - description: List of sources to populate environment variables - in the container. The keys defined within a source must be a - C_IDENTIFIER. All invalid keys will be reported as an event - when the container is starting. When a key exists in multiple - sources, the value associated with the last source will take - precedence. Values defined by an Env with a duplicate key will - take precedence. Cannot be updated. - items: - description: EnvFromSource represents the source of a set of - ConfigMaps + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: - configMapRef: - description: The ConfigMap to select from + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. properties: - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid?' + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. type: string - optional: - description: Specify whether the ConfigMap must be defined - type: boolean + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port type: object - prefix: - description: An optional identifier to prepend to each key - in the ConfigMap. Must be a C_IDENTIFIER. - type: string - secretRef: - description: The Secret to select from + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' properties: - name: - description: 'Name of the referent. More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - TODO: Add other useful fields. apiVersion, kind, uid?' + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' type: string - optional: - description: Specify whether the Secret must be defined - type: boolean + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer type: object - type: array - image: - description: 'Docker image name. More info: https://kubernetes.io/docs/concepts/containers/images - This field is optional to allow higher level config management - to default or override container images in workload controllers - like Deployments and StatefulSets.' - type: string - imagePullPolicy: - description: 'Image pull policy. One of Always, Never, IfNotPresent. - Defaults to Always if :latest tag is specified, or IfNotPresent - otherwise. Cannot be updated. More info: https://kubernetes.io/docs/concepts/containers/images#updating-images' - type: string - lifecycle: - description: Actions that the management system should take in - response to container lifecycle events. Cannot be updated. - properties: - postStart: - description: 'PostStart is called immediately after a container - is created. If the handler fails, the container is terminated - and restarted according to its restart policy. Other management - of the container blocks until the hook completes. More info: - https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' - properties: - exec: - description: One and only one of the following should - be specified. Exec specifies the action to take. - properties: - command: - description: Command is the command line to execute - inside the container, the working directory for - the command is root ('/') in the container's filesystem. - The command is simply exec'd, it is not run inside - a shell, so traditional shell instructions ('|', - etc) won't work. To use a shell, you need to explicitly - call out to that shell. Exit status of 0 is treated - as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - type: object - httpGet: - description: HTTPGet specifies the http request to perform. - properties: - host: - description: Host name to connect to, defaults to - the pod IP. You probably want to set "Host" in httpHeaders - instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom header - to be used in HTTP probes - properties: - name: - description: The header field name - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to access - on the container. Number must be in the range 1 - to 65535. Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - tcpSocket: - description: 'TCPSocket specifies an action involving - a TCP port. TCP hooks not yet supported TODO: implement - a realistic TCP lifecycle hook' - properties: - host: - description: 'Optional: Host name to connect to, defaults - to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: Number or name of the port to access - on the container. Number must be in the range 1 - to 65535. Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - preStop: - description: 'PreStop is called immediately before a container - is terminated due to an API request or management event - such as liveness/startup probe failure, preemption, resource - contention, etc. The handler is not called if the container - crashes or exits. The reason for termination is passed to - the handler. The Pod''s termination grace period countdown - begins before the PreStop hooked is executed. Regardless - of the outcome of the handler, the container will eventually - terminate within the Pod''s termination grace period. Other - management of the container blocks until the hook completes - or until the termination grace period is reached. More info: - https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/#container-hooks' - properties: - exec: - description: One and only one of the following should - be specified. Exec specifies the action to take. - properties: - command: - description: Command is the command line to execute - inside the container, the working directory for - the command is root ('/') in the container's filesystem. - The command is simply exec'd, it is not run inside - a shell, so traditional shell instructions ('|', - etc) won't work. To use a shell, you need to explicitly - call out to that shell. Exit status of 0 is treated - as live/healthy and non-zero is unhealthy. - items: - type: string - type: array - type: object - httpGet: - description: HTTPGet specifies the http request to perform. - properties: - host: - description: Host name to connect to, defaults to - the pod IP. You probably want to set "Host" in httpHeaders - instead. - type: string - httpHeaders: - description: Custom headers to set in the request. - HTTP allows repeated headers. - items: - description: HTTPHeader describes a custom header - to be used in HTTP probes - properties: - name: - description: The header field name - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - path: - description: Path to access on the HTTP server. - type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to access - on the container. Number must be in the range 1 - to 65535. Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - tcpSocket: - description: 'TCPSocket specifies an action involving - a TCP port. TCP hooks not yet supported TODO: implement - a realistic TCP lifecycle hook' - properties: - host: - description: 'Optional: Host name to connect to, defaults - to the pod IP.' - type: string - port: - anyOf: - - type: integer - - type: string - description: Number or name of the port to access - on the container. Number must be in the range 1 - to 65535. Name must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - type: object - type: object - livenessProbe: - description: 'Periodic probe of container liveness. Container - will be restarted if the probe fails. Cannot be updated. More - info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - properties: - exec: - description: One and only one of the following should be specified. - Exec specifies the action to take. - properties: - command: - description: Command is the command line to execute inside - the container, the working directory for the command is - root ('/') in the container's filesystem. The command - is simply exec'd, it is not run inside a shell, so traditional - shell instructions ('|', etc) won't work. To use a shell, - you need to explicitly call out to that shell. Exit - status of 0 is treated as live/healthy and non-zero - is unhealthy. - items: - type: string - type: array - type: object - failureThreshold: - description: Minimum consecutive failures for the probe to - be considered failed after having succeeded. Defaults to - 3. Minimum value is 1. - format: int32 - type: integer - httpGet: - description: HTTPGet specifies the http request to perform. + name: + description: Name of the container specified as a DNS_LABEL. + Each container in a pod must have a unique name (DNS_LABEL). + Cannot be updated. + type: string + ports: + description: List of ports to expose from the container. Exposing + a port here gives the system additional information about + the network connections a container uses, but is primarily + informational. Not specifying a port here DOES NOT prevent + that port from being exposed. Any port which is listening + on the default "0.0.0.0" address inside a container will be + accessible from the network. Cannot be updated. + items: + description: ContainerPort represents a network port in a + single container. properties: - host: - description: Host name to connect to, defaults to the - pod IP. You probably want to set "Host" in httpHeaders - instead. - type: string - httpHeaders: - description: Custom headers to set in the request. HTTP - allows repeated headers. - items: - description: HTTPHeader describes a custom header to - be used in HTTP probes - properties: - name: - description: The header field name - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - path: - description: Path to access on the HTTP server. + containerPort: + description: Number of port to expose on the pod's IP + address. This must be a valid port number, 0 < x < 65536. + format: int32 + type: integer + hostIP: + description: What host IP to bind the external port to. type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use for connecting to the host. - Defaults to HTTP. + hostPort: + description: Number of port to expose on the host. If + specified, this must be a valid port number, 0 < x < + 65536. If HostNetwork is specified, this must match + ContainerPort. Most containers do not need this. + format: int32 + type: integer + name: + description: If specified, this must be an IANA_SVC_NAME + and unique within the pod. Each named port in a pod + must have a unique name. Name for the port that can + be referred to by services. type: string - required: - - port - type: object - initialDelaySeconds: - description: 'Number of seconds after the container has started - before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - periodSeconds: - description: How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: Minimum consecutive successes for the probe to - be considered successful after having failed. Defaults to - 1. Must be 1 for liveness and startup. Minimum value is - 1. - format: int32 - type: integer - tcpSocket: - description: 'TCPSocket specifies an action involving a TCP - port. TCP hooks not yet supported TODO: implement a realistic - TCP lifecycle hook' - properties: - host: - description: 'Optional: Host name to connect to, defaults - to the pod IP.' + protocol: + description: Protocol for port. Must be UDP, TCP, or SCTP. + Defaults to "TCP". type: string - port: - anyOf: - - type: integer - - type: string - description: Number or name of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true required: - - port + - containerPort type: object - timeoutSeconds: - description: 'Number of seconds after which the probe times - out. Defaults to 1 second. Minimum value is 1. More info: - https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - type: object - name: - description: Name of the container specified as a DNS_LABEL. Each - container in a pod must have a unique name (DNS_LABEL). Cannot - be updated. - type: string - ports: - description: List of ports to expose from the container. Exposing - a port here gives the system additional information about the - network connections a container uses, but is primarily informational. - Not specifying a port here DOES NOT prevent that port from being - exposed. Any port which is listening on the default "0.0.0.0" - address inside a container will be accessible from the network. - Cannot be updated. - items: - description: ContainerPort represents a network port in a single - container. + type: array + x-kubernetes-list-map-keys: + - containerPort + - protocol + x-kubernetes-list-type: map + readinessProbe: + description: 'Periodic probe of container service readiness. + Container will be removed from service endpoints if the probe + fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' properties: - containerPort: - description: Number of port to expose on the pod's IP address. - This must be a valid port number, 0 < x < 65536. + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. format: int32 type: integer - hostIP: - description: What host IP to bind the external port to. - type: string - hostPort: - description: Number of port to expose on the host. If specified, - this must be a valid port number, 0 < x < 65536. If HostNetwork - is specified, this must match ContainerPort. Most containers - do not need this. + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' format: int32 type: integer - name: - description: If specified, this must be an IANA_SVC_NAME - and unique within the pod. Each named port in a pod must - have a unique name. Name for the port that can be referred - to by services. - type: string - protocol: - description: Protocol for port. Must be UDP, TCP, or SCTP. - Defaults to "TCP". - type: string - required: - - containerPort - type: object - type: array - readinessProbe: - description: 'Periodic probe of container service readiness. Container - will be removed from service endpoints if the probe fails. Cannot - be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - properties: - exec: - description: One and only one of the following should be specified. - Exec specifies the action to take. - properties: - command: - description: Command is the command line to execute inside - the container, the working directory for the command is - root ('/') in the container's filesystem. The command - is simply exec'd, it is not run inside a shell, so traditional - shell instructions ('|', etc) won't work. To use a shell, - you need to explicitly call out to that shell. Exit - status of 0 is treated as live/healthy and non-zero - is unhealthy. - items: + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' type: string - type: array - type: object - failureThreshold: - description: Minimum consecutive failures for the probe to - be considered failed after having succeeded. Defaults to - 3. Minimum value is 1. - format: int32 - type: integer - httpGet: - description: HTTPGet specifies the http request to perform. - properties: - host: - description: Host name to connect to, defaults to the - pod IP. You probably want to set "Host" in httpHeaders - instead. - type: string - httpHeaders: - description: Custom headers to set in the request. HTTP - allows repeated headers. - items: - description: HTTPHeader describes a custom header to - be used in HTTP probes - properties: - name: - description: The header field name - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - path: - description: Path to access on the HTTP server. - type: string - port: + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + resources: + description: 'Compute Resources required by this container. + Cannot be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + properties: + limits: + additionalProperties: anyOf: - type: integer - type: string - description: Name or number of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true - scheme: - description: Scheme to use for connecting to the host. - Defaults to HTTP. - type: string - required: - - port - type: object - initialDelaySeconds: - description: 'Number of seconds after the container has started - before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - periodSeconds: - description: How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: Minimum consecutive successes for the probe to - be considered successful after having failed. Defaults to - 1. Must be 1 for liveness and startup. Minimum value is - 1. - format: int32 - type: integer - tcpSocket: - description: 'TCPSocket specifies an action involving a TCP - port. TCP hooks not yet supported TODO: implement a realistic - TCP lifecycle hook' - properties: - host: - description: 'Optional: Host name to connect to, defaults - to the pod IP.' - type: string - port: + description: 'Limits describes the maximum amount of compute + resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: anyOf: - type: integer - type: string - description: Number or name of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ x-kubernetes-int-or-string: true - required: - - port - type: object - timeoutSeconds: - description: 'Number of seconds after which the probe times - out. Defaults to 1 second. Minimum value is 1. More info: - https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - type: object - resources: - description: 'Compute Resources required by this container. Cannot - be updated. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' - properties: - limits: - additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute - resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' - type: object - requests: - additionalProperties: + description: 'Requests describes the minimum amount of compute + resources required. If Requests is omitted for a container, + it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. More info: + https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + securityContext: + description: 'Security options the pod should run with. More + info: https://kubernetes.io/docs/concepts/policy/security-context/ + More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' + properties: + allowPrivilegeEscalation: + description: 'AllowPrivilegeEscalation controls whether + a process can gain more privileges than its parent process. + This bool directly controls if the no_new_privs flag will + be set on the container process. AllowPrivilegeEscalation + is true always when the container is: 1) run as Privileged + 2) has CAP_SYS_ADMIN' + type: boolean + capabilities: + description: The capabilities to add/drop when running containers. + Defaults to the default set of capabilities granted by + the container runtime. + properties: + add: + description: Added capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + drop: + description: Removed capabilities + items: + description: Capability represent POSIX capabilities + type + type: string + type: array + type: object + privileged: + description: Run container in privileged mode. Processes + in privileged containers are essentially equivalent to + root on the host. Defaults to false. + type: boolean + procMount: + description: procMount denotes the type of proc mount to + use for the containers. The default is DefaultProcMount + which uses the container runtime defaults for readonly + paths and masked paths. This requires the ProcMountType + feature flag to be enabled. type: string - description: 'Requests describes the minimum amount of compute - resources required. If Requests is omitted for a container, - it defaults to Limits if that is explicitly specified, otherwise - to an implementation-defined value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' - type: object - type: object - securityContext: - description: 'Security options the pod should run with. More info: - https://kubernetes.io/docs/concepts/policy/security-context/ - More info: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/' - properties: - allowPrivilegeEscalation: - description: 'AllowPrivilegeEscalation controls whether a - process can gain more privileges than its parent process. - This bool directly controls if the no_new_privs flag will - be set on the container process. AllowPrivilegeEscalation - is true always when the container is: 1) run as Privileged - 2) has CAP_SYS_ADMIN' - type: boolean - capabilities: - description: The capabilities to add/drop when running containers. - Defaults to the default set of capabilities granted by the - container runtime. - properties: - add: - description: Added capabilities - items: - description: Capability represent POSIX capabilities - type + readOnlyRootFilesystem: + description: Whether this container has a read-only root + filesystem. Default is false. + type: boolean + runAsGroup: + description: The GID to run the entrypoint of the container + process. Uses runtime default if unset. May also be set + in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. + format: int64 + type: integer + runAsNonRoot: + description: Indicates that the container must run as a + non-root user. If true, the Kubelet will validate the + image at runtime to ensure that it does not run as UID + 0 (root) and fail to start the container if it does. If + unset or false, no such validation will be performed. + May also be set in PodSecurityContext. If set in both + SecurityContext and PodSecurityContext, the value specified + in SecurityContext takes precedence. + type: boolean + runAsUser: + description: The UID to run the entrypoint of the container + process. Defaults to user specified in image metadata + if unspecified. May also be set in PodSecurityContext. If + set in both SecurityContext and PodSecurityContext, the + value specified in SecurityContext takes precedence. + format: int64 + type: integer + seLinuxOptions: + description: The SELinux context to be applied to the container. + If unspecified, the container runtime will allocate a + random SELinux context for each container. May also be + set in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. + properties: + level: + description: Level is SELinux level label that applies + to the container. type: string - type: array - drop: - description: Removed capabilities - items: - description: Capability represent POSIX capabilities - type + role: + description: Role is a SELinux role label that applies + to the container. type: string - type: array - type: object - privileged: - description: Run container in privileged mode. Processes in - privileged containers are essentially equivalent to root - on the host. Defaults to false. - type: boolean - procMount: - description: procMount denotes the type of proc mount to use - for the containers. The default is DefaultProcMount which - uses the container runtime defaults for readonly paths and - masked paths. This requires the ProcMountType feature flag - to be enabled. - type: string - readOnlyRootFilesystem: - description: Whether this container has a read-only root filesystem. - Default is false. - type: boolean - runAsGroup: - description: The GID to run the entrypoint of the container - process. Uses runtime default if unset. May also be set - in PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext - takes precedence. - format: int64 - type: integer - runAsNonRoot: - description: Indicates that the container must run as a non-root - user. If true, the Kubelet will validate the image at runtime - to ensure that it does not run as UID 0 (root) and fail - to start the container if it does. If unset or false, no - such validation will be performed. May also be set in PodSecurityContext. If - set in both SecurityContext and PodSecurityContext, the - value specified in SecurityContext takes precedence. - type: boolean - runAsUser: - description: The UID to run the entrypoint of the container - process. Defaults to user specified in image metadata if - unspecified. May also be set in PodSecurityContext. If - set in both SecurityContext and PodSecurityContext, the - value specified in SecurityContext takes precedence. - format: int64 - type: integer - seLinuxOptions: - description: The SELinux context to be applied to the container. - If unspecified, the container runtime will allocate a random - SELinux context for each container. May also be set in - PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext - takes precedence. + type: + description: Type is a SELinux type label that applies + to the container. + type: string + user: + description: User is a SELinux user label that applies + to the container. + type: string + type: object + windowsOptions: + description: The Windows specific settings applied to all + containers. If unspecified, the options from the PodSecurityContext + will be used. If set in both SecurityContext and PodSecurityContext, + the value specified in SecurityContext takes precedence. + properties: + gmsaCredentialSpec: + description: GMSACredentialSpec is where the GMSA admission + webhook (https://github.com/kubernetes-sigs/windows-gmsa) + inlines the contents of the GMSA credential spec named + by the GMSACredentialSpecName field. + type: string + gmsaCredentialSpecName: + description: GMSACredentialSpecName is the name of the + GMSA credential spec to use. + type: string + runAsUserName: + description: The UserName in Windows to run the entrypoint + of the container process. Defaults to the user specified + in image metadata if unspecified. May also be set + in PodSecurityContext. If set in both SecurityContext + and PodSecurityContext, the value specified in SecurityContext + takes precedence. + type: string + type: object + type: object + startupProbe: + description: 'StartupProbe indicates that the Pod has successfully + initialized. If specified, no other probes are executed until + this completes successfully. If this probe fails, the Pod + will be restarted, just as if the livenessProbe failed. This + can be used to provide different probe parameters at the beginning + of a Pod''s lifecycle, when it might take a long time to load + data or warm a cache, than during steady-state operation. + This cannot be updated. This is a beta feature enabled by + the StartupProbe feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object + stdin: + description: Whether this container should allocate a buffer + for stdin in the container runtime. If this is not set, reads + from stdin in the container will always result in EOF. Default + is false. + type: boolean + stdinOnce: + description: Whether the container runtime should close the + stdin channel after it has been opened by a single attach. + When stdin is true the stdin stream will remain open across + multiple attach sessions. If stdinOnce is set to true, stdin + is opened on container start, is empty until the first client + attaches to stdin, and then remains open and accepts data + until the client disconnects, at which time stdin is closed + and remains closed until the container is restarted. If this + flag is false, a container processes that reads from stdin + will never receive an EOF. Default is false + type: boolean + terminationMessagePath: + description: 'Optional: Path at which the file to which the + container''s termination message will be written is mounted + into the container''s filesystem. Message written is intended + to be brief final status, such as an assertion failure message. + Will be truncated by the node if greater than 4096 bytes. + The total message length across all containers will be limited + to 12kb. Defaults to /dev/termination-log. Cannot be updated.' + type: string + terminationMessagePolicy: + description: Indicate how the termination message should be + populated. File will use the contents of terminationMessagePath + to populate the container status message on both success and + failure. FallbackToLogsOnError will use the last chunk of + container log output if the termination message file is empty + and the container exited with an error. The log output is + limited to 2048 bytes or 80 lines, whichever is smaller. Defaults + to File. Cannot be updated. + type: string + tty: + description: Whether this container should allocate a TTY for + itself, also requires 'stdin' to be true. Default is false. + type: boolean + volumeDevices: + description: volumeDevices is the list of block devices to be + used by the container. + items: + description: volumeDevice describes a mapping of a raw block + device within a container. properties: - level: - description: Level is SELinux level label that applies - to the container. + devicePath: + description: devicePath is the path inside of the container + that the device will be mapped to. type: string - role: - description: Role is a SELinux role label that applies - to the container. - type: string - type: - description: Type is a SELinux type label that applies - to the container. - type: string - user: - description: User is a SELinux user label that applies - to the container. + name: + description: name must match the name of a persistentVolumeClaim + in the pod type: string + required: + - devicePath + - name type: object - windowsOptions: - description: The Windows specific settings applied to all - containers. If unspecified, the options from the PodSecurityContext - will be used. If set in both SecurityContext and PodSecurityContext, - the value specified in SecurityContext takes precedence. + type: array + volumeMounts: + description: Pod volumes to mount into the container's filesystem. + Cannot be updated. + items: + description: VolumeMount describes a mounting of a Volume + within a container. properties: - gmsaCredentialSpec: - description: GMSACredentialSpec is where the GMSA admission - webhook (https://github.com/kubernetes-sigs/windows-gmsa) - inlines the contents of the GMSA credential spec named - by the GMSACredentialSpecName field. - type: string - gmsaCredentialSpecName: - description: GMSACredentialSpecName is the name of the - GMSA credential spec to use. + mountPath: + description: Path within the container at which the volume + should be mounted. Must not contain ':'. type: string - runAsUserName: - description: The UserName in Windows to run the entrypoint - of the container process. Defaults to the user specified - in image metadata if unspecified. May also be set in - PodSecurityContext. If set in both SecurityContext and - PodSecurityContext, the value specified in SecurityContext - takes precedence. + mountPropagation: + description: mountPropagation determines how mounts are + propagated from the host to container and the other + way around. When not set, MountPropagationNone is used. + This field is beta in 1.10. type: string - type: object - type: object - startupProbe: - description: 'StartupProbe indicates that the Pod has successfully - initialized. If specified, no other probes are executed until - this completes successfully. If this probe fails, the Pod will - be restarted, just as if the livenessProbe failed. This can - be used to provide different probe parameters at the beginning - of a Pod''s lifecycle, when it might take a long time to load - data or warm a cache, than during steady-state operation. This - cannot be updated. This is a beta feature enabled by the StartupProbe - feature flag. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - properties: - exec: - description: One and only one of the following should be specified. - Exec specifies the action to take. - properties: - command: - description: Command is the command line to execute inside - the container, the working directory for the command is - root ('/') in the container's filesystem. The command - is simply exec'd, it is not run inside a shell, so traditional - shell instructions ('|', etc) won't work. To use a shell, - you need to explicitly call out to that shell. Exit - status of 0 is treated as live/healthy and non-zero - is unhealthy. - items: - type: string - type: array - type: object - failureThreshold: - description: Minimum consecutive failures for the probe to - be considered failed after having succeeded. Defaults to - 3. Minimum value is 1. - format: int32 - type: integer - httpGet: - description: HTTPGet specifies the http request to perform. - properties: - host: - description: Host name to connect to, defaults to the - pod IP. You probably want to set "Host" in httpHeaders - instead. + name: + description: This must match the Name of a Volume. type: string - httpHeaders: - description: Custom headers to set in the request. HTTP - allows repeated headers. - items: - description: HTTPHeader describes a custom header to - be used in HTTP probes - properties: - name: - description: The header field name - type: string - value: - description: The header field value - type: string - required: - - name - - value - type: object - type: array - path: - description: Path to access on the HTTP server. + readOnly: + description: Mounted read-only if true, read-write otherwise + (false or unspecified). Defaults to false. + type: boolean + subPath: + description: Path within the volume from which the container's + volume should be mounted. Defaults to "" (volume's root). type: string - port: - anyOf: - - type: integer - - type: string - description: Name or number of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - scheme: - description: Scheme to use for connecting to the host. - Defaults to HTTP. + subPathExpr: + description: Expanded path within the volume from which + the container's volume should be mounted. Behaves similarly + to SubPath but environment variable references $(VAR_NAME) + are expanded using the container's environment. Defaults + to "" (volume's root). SubPathExpr and SubPath are mutually + exclusive. type: string required: - - port + - mountPath + - name type: object - initialDelaySeconds: - description: 'Number of seconds after the container has started - before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - periodSeconds: - description: How often (in seconds) to perform the probe. - Default to 10 seconds. Minimum value is 1. - format: int32 - type: integer - successThreshold: - description: Minimum consecutive successes for the probe to - be considered successful after having failed. Defaults to - 1. Must be 1 for liveness and startup. Minimum value is - 1. - format: int32 - type: integer - tcpSocket: - description: 'TCPSocket specifies an action involving a TCP - port. TCP hooks not yet supported TODO: implement a realistic - TCP lifecycle hook' - properties: - host: - description: 'Optional: Host name to connect to, defaults - to the pod IP.' + type: array + workingDir: + description: Container's working directory. If not specified, + the container runtime's default will be used, which might + be configured in the container image. Cannot be updated. + type: string + required: + - name + type: object + type: array + storageConfigs: + description: StorageConfig defines additional storage configurations + items: + description: StorageConfig defines additional storage configurations + properties: + mountPath: + description: Mount path into cassandra container + type: string + name: + description: Name of the pvc + pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' + type: string + pvcSpec: + description: Persistent volume claim spec + properties: + accessModes: + description: 'AccessModes contains the desired access modes + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' + items: type: string - port: - anyOf: - - type: integer - - type: string - description: Number or name of the port to access on the - container. Number must be in the range 1 to 65535. Name - must be an IANA_SVC_NAME. - x-kubernetes-int-or-string: true - required: - - port - type: object - timeoutSeconds: - description: 'Number of seconds after which the probe times - out. Defaults to 1 second. Minimum value is 1. More info: - https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' - format: int32 - type: integer - type: object - stdin: - description: Whether this container should allocate a buffer for - stdin in the container runtime. If this is not set, reads from - stdin in the container will always result in EOF. Default is - false. - type: boolean - stdinOnce: - description: Whether the container runtime should close the stdin - channel after it has been opened by a single attach. When stdin - is true the stdin stream will remain open across multiple attach - sessions. If stdinOnce is set to true, stdin is opened on container - start, is empty until the first client attaches to stdin, and - then remains open and accepts data until the client disconnects, - at which time stdin is closed and remains closed until the container - is restarted. If this flag is false, a container processes that - reads from stdin will never receive an EOF. Default is false - type: boolean - terminationMessagePath: - description: 'Optional: Path at which the file to which the container''s - termination message will be written is mounted into the container''s - filesystem. Message written is intended to be brief final status, - such as an assertion failure message. Will be truncated by the - node if greater than 4096 bytes. The total message length across - all containers will be limited to 12kb. Defaults to /dev/termination-log. - Cannot be updated.' - type: string - terminationMessagePolicy: - description: Indicate how the termination message should be populated. - File will use the contents of terminationMessagePath to populate - the container status message on both success and failure. FallbackToLogsOnError - will use the last chunk of container log output if the termination - message file is empty and the container exited with an error. - The log output is limited to 2048 bytes or 80 lines, whichever - is smaller. Defaults to File. Cannot be updated. - type: string - tty: - description: Whether this container should allocate a TTY for - itself, also requires 'stdin' to be true. Default is false. - type: boolean - volumeDevices: - description: volumeDevices is the list of block devices to be - used by the container. + type: array + dataSource: + description: 'This field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot + - Beta) * An existing PVC (PersistentVolumeClaim) * An + existing custom resource/object that implements data population + (Alpha) In order to use VolumeSnapshot object types, the + appropriate feature gate must be enabled (VolumeSnapshotDataSource + or AnyVolumeDataSource) If the provisioner or an external + controller can support the specified data source, it will + create a new volume based on the contents of the specified + data source. If the specified data source is not supported, + the volume will not be created and the failure will be + reported as an event. In the future, we plan to support + more data source types and the behavior of the provisioner + may change.' + properties: + apiGroup: + description: APIGroup is the group for the resource + being referenced. If APIGroup is not specified, the + specified Kind must be in the core API group. For + any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource being referenced + type: string + name: + description: Name is the name of resource being referenced + type: string + required: + - kind + - name + type: object + resources: + description: 'Resources represents the minimum resources + the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Limits describes the maximum amount of + compute resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: 'Requests describes the minimum amount + of compute resources required. If Requests is omitted + for a container, it defaults to Limits if that is + explicitly specified, otherwise to an implementation-defined + value. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' + type: object + type: object + selector: + description: A label query over volumes to consider for + binding. + properties: + matchExpressions: + description: matchExpressions is a list of label selector + requirements. The requirements are ANDed. + items: + description: A label selector requirement is a selector + that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key that the selector + applies to. + type: string + operator: + description: operator represents a key's relationship + to a set of values. Valid operators are In, + NotIn, Exists and DoesNotExist. + type: string + values: + description: values is an array of string values. + If the operator is In or NotIn, the values array + must be non-empty. If the operator is Exists + or DoesNotExist, the values array must be empty. + This array is replaced during a strategic merge + patch. + items: + type: string + type: array + required: + - key + - operator + type: object + type: array + matchLabels: + additionalProperties: + type: string + description: matchLabels is a map of {key,value} pairs. + A single {key,value} in the matchLabels map is equivalent + to an element of matchExpressions, whose key field + is "key", the operator is "In", and the values array + contains only "value". The requirements are ANDed. + type: object + type: object + storageClassName: + description: 'Name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' + type: string + volumeMode: + description: volumeMode defines what type of volume is required + by the claim. Value of Filesystem is implied when not + included in claim spec. + type: string + volumeName: + description: VolumeName is the binding reference to the + PersistentVolume backing this claim. + type: string + type: object + required: + - mountPath + - name + - pvcSpec + type: object + type: array + topology: + description: Topology to create Cassandra DC and Racks and to target + appropriate Kubernetes Nodes + properties: + dc: + description: List of DC defined in the CassandraCluster items: - description: volumeDevice describes a mapping of a raw block - device within a container. + description: DC allow to configure Cassandra RC according to + kubernetes nodeselector labels properties: - devicePath: - description: devicePath is the path inside of the container - that the device will be mapped to. + dataCapacity: + description: Define the Capacity for Persistent Volume Claims + in the local storage + pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ type: string + dataStorageClass: + description: Define StorageClass for Persistent Volume Claims + in the local storage. + type: string + labels: + additionalProperties: + type: string + description: Labels used to target Kubernetes nodes + type: object name: - description: name must match the name of a persistentVolumeClaim - in the pod + description: Name of the DC + pattern: ^[^-]+$ type: string - required: - - devicePath - - name + nodesPerRacks: + description: 'Number of nodes to deploy for a Cassandra + deployment in each Racks. Default: 1. Optional, if not + filled, used value define in CassandraClusterSpec' + format: int32 + type: integer + numTokens: + description: 'NumTokens : configure the CASSANDRA_NUM_TOKENS + parameter which can be different for each DD' + format: int32 + type: integer + rack: + description: List of Racks defined in the Cassandra DC + items: + description: Rack allow to configure Cassandra Rack according + to kubernetes nodeselector labels + properties: + labels: + additionalProperties: + type: string + description: Labels used to target Kubernetes nodes + type: object + name: + description: Name of the Rack + pattern: ^[^-]+$ + type: string + rollingPartition: + description: The Partition to control the Statefulset + Upgrade + format: int32 + type: integer + rollingRestart: + description: Flag to tell the operator to trigger + a rolling restart of the Rack + type: boolean + type: object + type: array type: object type: array - volumeMounts: - description: Pod volumes to mount into the container's filesystem. - Cannot be updated. - items: - description: VolumeMount describes a mounting of a Volume within - a container. + type: object + unlockNextOperation: + description: Very special Flag to hack CassKop reconcile loop - use + with really good Care + type: boolean + type: object + status: + description: CassandraClusterStatus defines Global state of CassandraCluster + properties: + cassandraNodeStatus: + additionalProperties: + properties: + hostId: + type: string + nodeIp: + type: string + type: object + type: object + cassandraRackStatus: + additionalProperties: + description: CassandraRackStatus defines states of Cassandra for + 1 rack (1 statefulset) + properties: + cassandraLastAction: + description: 'CassandraLastAction is the set of Cassandra State + & Actions: Active, Standby..' properties: - mountPath: - description: Path within the container at which the volume - should be mounted. Must not contain ':'. - type: string - mountPropagation: - description: mountPropagation determines how mounts are - propagated from the host to container and the other way - around. When not set, MountPropagationNone is used. This - field is beta in 1.10. + endTime: + format: date-time type: string name: - description: This must match the Name of a Volume. + description: 'Type of action to perform : UpdateVersion, + UpdateBaseImage, UpdateConfigMap..' type: string - readOnly: - description: Mounted read-only if true, read-write otherwise - (false or unspecified). Defaults to false. - type: boolean - subPath: - description: Path within the volume from which the container's - volume should be mounted. Defaults to "" (volume's root). + startTime: + format: date-time type: string - subPathExpr: - description: Expanded path within the volume from which - the container's volume should be mounted. Behaves similarly - to SubPath but environment variable references $(VAR_NAME) - are expanded using the container's environment. Defaults - to "" (volume's root). SubPathExpr and SubPath are mutually - exclusive. + status: + description: Action is the specific actions that can be + done on a Cassandra Cluster such as cleanup, upgradesstables.. type: string - required: - - mountPath - - name + updatedNodes: + description: PodNames of updated Cassandra nodes. Updated + means the Cassandra container image version matches the + spec's version. + items: + type: string + type: array type: object - type: array - workingDir: - description: Container's working directory. If not specified, - the container runtime's default will be used, which might be - configured in the container image. Cannot be updated. - type: string - required: - - name - type: object - type: array - storageConfigs: - description: StorageConfig defines additional storage configurations - items: - description: StorageConfig defines additional storage configurations - properties: - mountPath: - description: Mount path into cassandra container - type: string - name: - description: Name of the pvc - pattern: '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*' - type: string - pvcSpec: - description: Persistent volume claim spec - properties: - accessModes: - description: 'AccessModes contains the desired access modes - the volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1' - items: + phase: + description: 'Phase indicates the state this Cassandra cluster + jumps in. Phase goes as one way as below: Initial -> Running + <-> updating' + type: string + podLastOperation: + description: PodLastOperation manage status for Pod Operation + (nodetool cleanup, upgradesstables..) + properties: + endTime: + format: date-time type: string - type: array - dataSource: - description: 'This field can be used to specify either: * - An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot - - Beta) * An existing PVC (PersistentVolumeClaim) * An existing - custom resource/object that implements data population (Alpha) - In order to use VolumeSnapshot object types, the appropriate - feature gate must be enabled (VolumeSnapshotDataSource or - AnyVolumeDataSource) If the provisioner or an external controller - can support the specified data source, it will create a - new volume based on the contents of the specified data source. - If the specified data source is not supported, the volume - will not be created and the failure will be reported as - an event. In the future, we plan to support more data source - types and the behavior of the provisioner may change.' - properties: - apiGroup: - description: APIGroup is the group for the resource being - referenced. If APIGroup is not specified, the specified - Kind must be in the core API group. For any other third-party - types, APIGroup is required. + name: + type: string + operatorName: + description: Name of operator + type: string + pods: + description: List of pods running an operation + items: type: string - kind: - description: Kind is the type of resource being referenced + type: array + podsKO: + description: List of pods that fail to run an operation + items: type: string - name: - description: Name is the name of resource being referenced + type: array + podsOK: + description: List of pods that run an operation successfully + items: type: string - required: - - kind - - name - type: object - resources: - description: 'Resources represents the minimum resources the - volume should have. More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources' - properties: - limits: - additionalProperties: - type: string - description: 'Limits describes the maximum amount of compute - resources allowed. More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' - type: object - requests: - additionalProperties: - type: string - description: 'Requests describes the minimum amount of - compute resources required. If Requests is omitted for - a container, it defaults to Limits if that is explicitly - specified, otherwise to an implementation-defined value. - More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/' - type: object - type: object - selector: - description: A label query over volumes to consider for binding. - properties: - matchExpressions: - description: matchExpressions is a list of label selector - requirements. The requirements are ANDed. - items: - description: A label selector requirement is a selector - that contains values, a key, and an operator that - relates the key and values. - properties: - key: - description: key is the label key that the selector - applies to. - type: string - operator: - description: operator represents a key's relationship - to a set of values. Valid operators are In, NotIn, - Exists and DoesNotExist. - type: string - values: - description: values is an array of string values. - If the operator is In or NotIn, the values array - must be non-empty. If the operator is Exists or - DoesNotExist, the values array must be empty. - This array is replaced during a strategic merge - patch. - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: matchLabels is a map of {key,value} pairs. - A single {key,value} in the matchLabels map is equivalent - to an element of matchExpressions, whose key field is - "key", the operator is "In", and the values array contains - only "value". The requirements are ANDed. - type: object - type: object - storageClassName: - description: 'Name of the StorageClass required by the claim. - More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1' - type: string - volumeMode: - description: volumeMode defines what type of volume is required - by the claim. Value of Filesystem is implied when not included - in claim spec. - type: string - volumeName: - description: VolumeName is the binding reference to the PersistentVolume - backing this claim. - type: string - type: object - required: - - mountPath - - name - - pvcSpec - type: object - type: array - topology: - description: Topology to create Cassandra DC and Racks and to target - appropriate Kubernetes Nodes - properties: - dc: - description: List of DC defined in the CassandraCluster - items: - description: DC allow to configure Cassandra RC according to kubernetes - nodeselector labels - properties: - dataCapacity: - description: Define the Capacity for Persistent Volume Claims - in the local storage - pattern: ^([+-]?[0-9.]+)([eEinumkKMGTP]*[-+]?[0-9]*)$ - type: string - dataStorageClass: - description: Define StorageClass for Persistent Volume Claims - in the local storage. - type: string - labels: - additionalProperties: - type: string - description: Labels used to target Kubernetes nodes - type: object - name: - description: Name of the DC - pattern: ^[^-]+$ - type: string - nodesPerRacks: - description: 'Number of nodes to deploy for a Cassandra deployment - in each Racks. Default: 1. Optional, if not filled, used - value define in CassandraClusterSpec' - format: int32 - type: integer - numTokens: - description: 'NumTokens : configure the CASSANDRA_NUM_TOKENS - parameter which can be different for each DD' - format: int32 - type: integer - rack: - description: List of Racks defined in the Cassandra DC - items: - description: Rack allow to configure Cassandra Rack according - to kubernetes nodeselector labels - properties: - labels: - additionalProperties: - type: string - description: Labels used to target Kubernetes nodes - type: object - name: - description: Name of the Rack - pattern: ^[^-]+$ - type: string - rollingPartition: - description: The Partition to control the Statefulset - Upgrade - format: int32 - type: integer - rollingRestart: - description: Flag to tell the operator to trigger a - rolling restart of the Rack - type: boolean - type: object - type: array - type: object - type: array - type: object - unlockNextOperation: - description: Very special Flag to hack CassKop reconcile loop - use - with really good Care - type: boolean - type: object - status: - description: CassandraClusterStatus defines Global state of CassandraCluster - properties: - cassandraNodeStatus: - additionalProperties: - properties: - hostId: - type: string - nodeIp: - type: string - type: object - type: object - cassandraRackStatus: - additionalProperties: - description: CassandraRackStatus defines states of Cassandra for 1 - rack (1 statefulset) - properties: - cassandraLastAction: - description: 'CassandraLastAction is the set of Cassandra State - & Actions: Active, Standby..' - properties: - endTime: - format: date-time - type: string - name: - description: 'Type of action to perform : UpdateVersion, UpdateBaseImage, - UpdateConfigMap..' - type: string - startTime: - format: date-time - type: string - status: - description: Action is the specific actions that can be done - on a Cassandra Cluster such as cleanup, upgradesstables.. - type: string - updatedNodes: - description: PodNames of updated Cassandra nodes. Updated - means the Cassandra container image version matches the - spec's version. - items: + type: array + startTime: + format: date-time type: string - type: array - type: object - phase: - description: 'Phase indicates the state this Cassandra cluster - jumps in. Phase goes as one way as below: Initial -> Running - <-> updating' - type: string - podLastOperation: - description: PodLastOperation manage status for Pod Operation - (nodetool cleanup, upgradesstables..) - properties: - endTime: - format: date-time - type: string - name: - type: string - operatorName: - description: Name of operator - type: string - pods: - description: List of pods running an operation - items: - type: string - type: array - podsKO: - description: List of pods that fail to run an operation - items: + status: type: string - type: array - podsOK: - description: List of pods that run an operation successfully - items: - type: string - type: array - startTime: - format: date-time - type: string - status: - type: string - type: object + type: object + type: object + description: CassandraRackStatusList list status for each Rack type: object - description: CassandraRackStatusList list status for each Rack - type: object - lastClusterAction: - description: Store last action at cluster level - type: string - lastClusterActionStatus: - type: string - phase: - description: 'Phase indicates the state this Cassandra cluster jumps - in. Phase goes as one way as below: Initial -> Running <-> updating' - type: string - seedlist: - description: seeList to be used in Cassandra's Pods (computed by the - Operator) - items: + lastClusterAction: + description: Store last action at cluster level type: string - type: array - type: object - type: object - version: v1alpha1 - versions: - - name: v1alpha1 + lastClusterActionStatus: + type: string + phase: + description: 'Phase indicates the state this Cassandra cluster jumps + in. Phase goes as one way as below: Initial -> Running <-> updating' + type: string + seedlist: + description: seeList to be used in Cassandra's Pods (computed by the + Operator) + items: + type: string + type: array + type: object + type: object served: true storage: true diff --git a/pkg/apis/db/v1alpha1/cassandracluster_types.go b/pkg/apis/db/v1alpha1/cassandracluster_types.go index 6c03e4f9c..a842b12d9 100644 --- a/pkg/apis/db/v1alpha1/cassandracluster_types.go +++ b/pkg/apis/db/v1alpha1/cassandracluster_types.go @@ -85,6 +85,7 @@ var ( ActionCorrectCRDConfig = ClusterStateInfo{11, "CorrectCRDConfig"} //The Operator has correct a bad CRD configuration + regexDCRackName = regexp.MustCompile("^[a-z]([-a-z0-9]*[a-z0-9])?$") ) const ( @@ -264,12 +265,9 @@ func (cc *CassandraCluster) GetRackSize(dc int) int { return len(cc.Spec.Topology.DC[dc].Rack) } -//GetRackName return the Name of the rack for DC at indice dc and Rack at indice rack +//GetRackName return the Name of the rack for DC at index dc and Rack at index rack func (cc *CassandraCluster) GetRackName(dc int, rack int) string { - if dc >= cc.GetDCSize() { - return DefaultCassandraRack - } - if rack >= cc.GetRackSize(dc) { + if dc >= cc.GetDCSize() || rack >= cc.GetRackSize(dc) { return DefaultCassandraRack } return cc.Spec.Topology.DC[dc].Rack[rack].Name @@ -278,14 +276,13 @@ func (cc *CassandraCluster) GetRackName(dc int, rack int) string { // GetDCRackName compute dcName + RackName to be used in statefulsets, services.. // it return empty if the name don't match with kubernetes domain name validation regexp func (cc *CassandraCluster) GetDCRackName(dcName string, rackName string) string { - var dcRackName string - dcRackName = dcName + "-" + rackName - var regex_name = regexp.MustCompile("^[a-z]([-a-z0-9]*[a-z0-9])?$") - if !regex_name.MatchString(dcRackName) { - logrus.Errorf("%s don't match valide name service: a DNS-1035 label must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character", dcRackName) - return "" + dcRackName := dcName + "-" + rackName + if regexDCRackName.MatchString(dcRackName) { + return dcRackName } - return dcRackName + logrus.Errorf("%s is not a valid service name: a DNS-1035 label must consist of lower case "+ + "alphanumeric characters or '-', and must start and end with an alphanumeric character", dcRackName) + return "" } //GetDCFromDCRackName send dc name from dcRackName (dc-rack) @@ -304,10 +301,10 @@ func (cc *CassandraCluster) GetDCAndRackFromDCRackName(dcRackName string) (strin func (cc *CassandraCluster) initTopology(dcName string, rackName string) { cc.Spec.Topology = Topology{ DC: []DC{ - DC{ + { Name: dcName, Rack: []Rack{ - Rack{ + { Name: rackName, }, }, diff --git a/pkg/controller/cassandracluster/cassandra_status.go b/pkg/controller/cassandracluster/cassandra_status.go index 083ce6bb8..d8a6b9509 100644 --- a/pkg/controller/cassandracluster/cassandra_status.go +++ b/pkg/controller/cassandracluster/cassandra_status.go @@ -61,7 +61,7 @@ func (rcc *ReconcileCassandraCluster) updateCassandraStatus(cc *api.CassandraClu } // getNextCassandraClusterStatus goal is to detect some changes in the status between cassandracluster and its statefulset -// We follow only one change at a Time : so this function will return on first changed found +// We follow only one change at a Time : so this function will return on the first change found func (rcc *ReconcileCassandraCluster) getNextCassandraClusterStatus(cc *api.CassandraCluster, dc, rack int, dcName, rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) error { @@ -77,14 +77,14 @@ func (rcc *ReconcileCassandraCluster) getNextCassandraClusterStatus(cc *api.Cass } //If we set up UnlockNextOperation in CRD we allow to see mode change even last operation didn't ended correctly - needSpecificChange := false + unlockNextOperation := false if cc.Spec.UnlockNextOperation && rcc.hasUnschedulablePod(cc.Namespace, dcName, rackName) { - needSpecificChange = true + unlockNextOperation = true } //Do nothing in Initial phase except if we force it if status.CassandraRackStatus[dcRackName].Phase == api.ClusterPhaseInitial.Name { - if !needSpecificChange { + if !unlockNextOperation { ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) return nil } @@ -101,7 +101,7 @@ func (rcc *ReconcileCassandraCluster) getNextCassandraClusterStatus(cc *api.Cass // decommission more // We don't want to check for new operation while there are already ongoing one in order not to break them (ie decommission..) // Meanwhile we allow to check for new changes if unlockNextOperation has been set (to recover from problems) - if needSpecificChange || + if unlockNextOperation || (!rcc.thereIsPodDisruption() && lastAction.Status != api.StatusOngoing && lastAction.Status != api.StatusToDo && @@ -164,8 +164,9 @@ func needToWaitDelayBeforeCheck(cc *api.CassandraCluster, dcRackName string, sto if t.Add(api.DefaultDelayWait * time.Second).After(now.Time) { logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "rack": dcRackName}).Info("The Operator Waits " + strconv.Itoa(api. - DefaultDelayWait) + " seconds for the action to start correctly") + "rack": dcRackName}).Info("The Operator Waits " + + strconv.Itoa(api.DefaultDelayWait) + + " seconds for the action to start correctly") return true } } @@ -448,47 +449,42 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas logrus.Infof("[%s][%s]: Initializing StatefulSet: Replicas Number Not OK: %d on %d, ready[%d]", cc.Name, dcRackName, storedStatefulSet.Status.Replicas, *storedStatefulSet.Spec.Replicas, storedStatefulSet.Status.ReadyReplicas) - } else { - //If yes, just check that lastPod is running - podsList, err := rcc.ListPods(cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) - nb := len(podsList.Items) - if err != nil || nb < 1 { - return nil - } - nodesPerRacks := cc.GetNodesPerRacks(dcRackName) - if len(podsList.Items) < int(nodesPerRacks) { - logrus.Infof("[%s][%s]: StatefulSet is waiting for scaleUp", cc.Name, dcRackName) - return nil - } - pod := podsList.Items[nodesPerRacks-1] - if cassandraPodIsReady(&pod) { - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name - ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) - now := metav1.Now() - lastAction.EndTime = &now - lastAction.Status = api.StatusDone - logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, lastAction.Name, storedStatefulSet.Status.ReadyReplicas) - return nil - } return nil - } - - } else { - - //We are no more in Initializing state - if isStatefulSetNotReady(storedStatefulSet) { - logrus.Infof("[%s][%s]: StatefulSet(%s) Replicas Number Not OK: %d on %d, ready[%d]", cc.Name, - dcRackName, lastAction.Name, storedStatefulSet.Status.Replicas, *storedStatefulSet.Spec.Replicas, - storedStatefulSet.Status.ReadyReplicas) - status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name - ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) - } else if status.CassandraRackStatus[dcRackName].Phase != api.ClusterPhaseRunning.Name { - logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, - lastAction.Name, storedStatefulSet.Status.ReadyReplicas) + //If yes, just check that lastPod is running + podsList, err := rcc.ListPods(cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) + if err != nil || len(podsList.Items) < 1 { + return nil + } + if len(podsList.Items) < int(nodesPerRacks) { + logrus.Infof("[%s][%s]: StatefulSet is waiting for scaleUp", cc.Name, dcRackName) + return nil + } + pod := podsList.Items[nodesPerRacks-1] + if cassandraPodIsReady(&pod) { status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) + now := metav1.Now() + lastAction.EndTime = &now + lastAction.Status = api.StatusDone + logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, lastAction.Name, storedStatefulSet.Status.ReadyReplicas) + return nil } + return nil + } + + //We are no more in Initializing state + if isStatefulSetNotReady(storedStatefulSet) { + logrus.Infof("[%s][%s]: StatefulSet(%s) Replicas Number Not OK: %d on %d, ready[%d]", cc.Name, + dcRackName, lastAction.Name, storedStatefulSet.Status.Replicas, *storedStatefulSet.Spec.Replicas, + storedStatefulSet.Status.ReadyReplicas) + status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name + ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) + } else if status.CassandraRackStatus[dcRackName].Phase != api.ClusterPhaseRunning.Name { + logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, + lastAction.Name, storedStatefulSet.Status.ReadyReplicas) + status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name + ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) } return nil } diff --git a/pkg/controller/cassandracluster/cassandra_status_test.go b/pkg/controller/cassandracluster/cassandra_status_test.go index a420149fd..e62c42ad4 100644 --- a/pkg/controller/cassandracluster/cassandra_status_test.go +++ b/pkg/controller/cassandracluster/cassandra_status_test.go @@ -164,18 +164,14 @@ func helperCreateCassandraCluster(t *testing.T, cassandraClusterFileName string) if err != nil { t.Fatalf("get statefulset: (%v)", err) } - // Check if the quantity of Replicas for this deployment is equals the specification - dsize := *sts.Spec.Replicas - if dsize != 1 { - t.Errorf("dep size (%d) is not the expected size (%d)", dsize, cc.Spec.NodesPerRacks) - } + //Now simulate sts to be ready for CassKop sts.Status.Replicas = *sts.Spec.Replicas sts.Status.ReadyReplicas = *sts.Spec.Replicas rcc.UpdateStatefulSet(sts) //Create Statefulsets associated fake Pods - pod := &v1.Pod{ + podTemplate := v1.Pod{ TypeMeta: metav1.TypeMeta{ Kind: "Pod", APIVersion: "v1", @@ -196,8 +192,7 @@ func helperCreateCassandraCluster(t *testing.T, cassandraClusterFileName string) Phase: v1.PodRunning, ContainerStatuses: []v1.ContainerStatus{ { - Name: "cassandra", - //Image: cc.Spec.BaseImage + ":" + cc.Spec.Version + Name: "cassandra", Ready: true, }, }, @@ -205,9 +200,10 @@ func helperCreateCassandraCluster(t *testing.T, cassandraClusterFileName string) } for i := 0; i < int(sts.Status.Replicas); i++ { + pod := podTemplate.DeepCopy() pod.Name = sts.Name + strconv.Itoa(i) if err = rcc.CreatePod(pod); err != nil { - t.Fatalf("can't create pod: (%v)", err) + t.Fatalf("can't create pod %s: (%v)", pod.Name, err) } } diff --git a/pkg/controller/cassandracluster/cassandracluster_controller.go b/pkg/controller/cassandracluster/cassandracluster_controller.go index e333169e8..274b0f930 100644 --- a/pkg/controller/cassandracluster/cassandracluster_controller.go +++ b/pkg/controller/cassandracluster/cassandracluster_controller.go @@ -120,8 +120,7 @@ func (rcc *ReconcileCassandraCluster) Reconcile(request reconcile.Request) (reco } cc.CheckDefaults() - err = rcc.CheckDeletePVC(cc) - if err != nil { + if err = rcc.CheckDeletePVC(cc); err != nil { return forget, err } diff --git a/pkg/controller/cassandracluster/node_operations_test.go b/pkg/controller/cassandracluster/node_operations_test.go index dc58b2078..41bc9528f 100644 --- a/pkg/controller/cassandracluster/node_operations_test.go +++ b/pkg/controller/cassandracluster/node_operations_test.go @@ -30,7 +30,7 @@ var allKeyspaces = []string{"system", "system_auth", "system_schema", "demo1", " const ( host = "cassandra-0.cassandra.cassie1" - port = 8778 + jolokiaPort = 8778 KeyspacesJolokiaQueryP = `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "attribute": "Keyspaces", "type": "read"}, @@ -52,8 +52,8 @@ func keyspaceListString() string { `"`+strings.Join(allKeyspaces, `","`)+`"`) } func TestJolokiaURL(t *testing.T) { - jolokiaURL := JolokiaURL(host, port) - if jolokiaURL != fmt.Sprintf("http://%s:%d/jolokia/", host, port) { + jolokiaURL := JolokiaURL(host, jolokiaPort) + if jolokiaURL != fmt.Sprintf("http://%s:%d/jolokia/", host, jolokiaPort) { t.Errorf("Malformed jolokia_url") } } @@ -61,7 +61,7 @@ func TestJolokiaURL(t *testing.T) { func TestNodeCleanupKeyspace(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "arguments": ["demo", []], @@ -82,7 +82,7 @@ func TestNodeCleanup(t *testing.T) { defer httpmock.DeactivateAndReset() keyspacescleaned := []string{} - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { var execrequestdata execRequestData if err := json.NewDecoder(req.Body).Decode(&execrequestdata); err != nil { @@ -126,7 +126,7 @@ func TestNodeUpgradeSSTables(t *testing.T) { defer httpmock.DeactivateAndReset() keyspacesUpgraded := []string{} - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { var execrequestdata execRequestData if err := json.NewDecoder(req.Body).Decode(&execrequestdata); err != nil { @@ -168,7 +168,7 @@ func TestNodeUpgradeSSTables(t *testing.T) { func TestNodeRebuild(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "arguments": ["dc1"], "type": "exec", @@ -188,7 +188,7 @@ func TestHasStreamingSessions(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { return httpmock.NewStringResponse(200, `{"request": {"mbean": "org.apache.cassandra.net:type=StreamManager", @@ -249,7 +249,7 @@ func TestHasStreamingSessions(t *testing.T) { t.Errorf("hasStreamingSessions returns a bad answer") } - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { return httpmock.NewStringResponse(200, `{"request": {"mbean": "org.apache.cassandra.net:type=StreamManager", @@ -273,7 +273,7 @@ func TestHasCleanupCompactions(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { return httpmock.NewStringResponse(200, `{ "request": @@ -316,7 +316,7 @@ func TestHasCleanupCompactions(t *testing.T) { t.Errorf("hasCleanupCompactions returns a bad answer") } - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { return httpmock.NewStringResponse(200, `{"request": {"mbean": "org.apache.cassandra.db:type=CompactionManager", @@ -352,7 +352,7 @@ func TestReplicateData(t *testing.T) { defer httpmock.DeactivateAndReset() keyspacesDescribed := []string{} - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), func(req *http.Request) (*http.Response, error) { var execrequestdata execRequestData if err := json.NewDecoder(req.Body).Decode(&execrequestdata); err != nil { @@ -414,7 +414,7 @@ func TestReplicateData(t *testing.T) { func TestNodeDecommission(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "arguments": [], "type": "exec", @@ -422,7 +422,7 @@ func TestNodeDecommission(t *testing.T) { "value": null, "timestamp": 1528848808, "status": 200}`)) - jolokiaClient, _ := NewJolokiaClient(host, port, nil, v1.LocalObjectReference{}, "ns") + jolokiaClient, _ := NewJolokiaClient(host, jolokiaPort, nil, v1.LocalObjectReference{}, "ns") err := jolokiaClient.NodeDecommission() if err != nil { t.Errorf("NodeDecommision failed with : %v", err) @@ -432,7 +432,7 @@ func TestNodeDecommission(t *testing.T) { func TestNodeOperationMode(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "attribute": "OperationMode", @@ -454,7 +454,7 @@ func TestNodeOperationMode(t *testing.T) { func TestLeavingNodes(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "attribute": "LeavingNodes", @@ -476,7 +476,7 @@ func TestLeavingNodes(t *testing.T) { func TestHostIDMap(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, port), + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", "attribute": "LeavingNodes", diff --git a/pkg/controller/cassandracluster/pod_operation.go b/pkg/controller/cassandracluster/pod_operation.go index 309ee6aca..18d185e65 100644 --- a/pkg/controller/cassandracluster/pod_operation.go +++ b/pkg/controller/cassandracluster/pod_operation.go @@ -80,18 +80,18 @@ func (rcc *ReconcileCassandraCluster) executePodOperation(cc *api.CassandraClust status *api.CassandraClusterStatus) (bool, error) { dcRackName := cc.GetDCRackName(dcName, rackName) dcRackStatus := status.CassandraRackStatus[dcRackName] - var breakResyncloop = false + var breakResyncLoopSwitch = false var err error // If we ask a ScaleDown, We can't update the Statefulset before the nodetool decommission has finished if rcc.weAreScalingDown(dcRackStatus) { //If a Decommission is Ongoing, we want to break the Resyncloop until the Decommission is succeed - breakResyncloop, err = rcc.ensureDecommission(cc, dcName, rackName, status) + breakResyncLoopSwitch, err = rcc.ensureDecommission(cc, dcName, rackName, status) if err != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc": dcName, "rack": rackName, "err": err}).Error("Error with decommission") } - return breakResyncloop, err + return breakResyncLoopSwitch, err } // If LastClusterAction was a ScaleUp and It is Done then @@ -108,7 +108,7 @@ func (rcc *ReconcileCassandraCluster) executePodOperation(cc *api.CassandraClust rcc.ensureOperation(cc, dcName, rackName, status, randomPodOperationKey()) } - return breakResyncloop, err + return breakResyncLoopSwitch, err } //addPodOperationLabels will add Pod Labels labels on all Pod in the Current dcRackName @@ -290,33 +290,30 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste if podLastOperation.Name != api.OperationDecommission { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "lastOperation": podLastOperation.Name}).Warnf("We should decommission only if pod.Operation == decommission, not the case here") + "lastOperation": podLastOperation.Name}).Warnf("There is another operation than decommission that was asked") return continueResyncLoop, nil } switch podLastOperation.Status { - case api.StatusToDo: + case api.StatusToDo, api.StatusContinue: return rcc.ensureDecommissionToDo(cc, dcName, rackName, status) - case api.StatusOngoing, - api.StatusFinalizing: + case api.StatusOngoing, api.StatusFinalizing: if podLastOperation.Pods == nil || podLastOperation.Pods[0] == "" { return breakResyncLoop, fmt.Errorf("For Status Ongoing we should have a PodLastOperation Pods item") } + lastPod, err := rcc.GetPod(cc.Namespace, podLastOperation.Pods[0]) if err != nil { - if !apierrors.IsNotFound(err) { - return breakResyncLoop, fmt.Errorf("failed to get last cassandra's pods '%s': %v", - podLastOperation.Pods[0], err) + //If Node is already Gone, We Delete PVC + if apierrors.IsNotFound(err) { + return rcc.ensureDecommissionFinalizing(cc, dcName, rackName, status, lastPod) } - } - - //If Node is already Gone, We Delete PVC - if apierrors.IsNotFound(err) { - return rcc.ensureDecommissionFinalizing(cc, dcName, rackName, status, lastPod) + return breakResyncLoop, fmt.Errorf("failed to get last cassandra's pods '%s': %v", + podLastOperation.Pods[0], err) } //LastPod Still Exists @@ -447,14 +444,14 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionToDo(cc *api.CassandraCl return breakResyncLoop, nil } - err = rcc.UpdatePodLabel(lastPod, map[string]string{ + if err = rcc.UpdatePodLabel(lastPod, map[string]string{ "operation-status": api.StatusOngoing, "operation-start": k8s.LabelTime(), - "operation-name": api.OperationDecommission}) - if err != nil { + "operation-name": api.OperationDecommission}); err != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pod": lastPod.Name, "err": err}).Debug("Error updating pod") } + podLastOperation.Status = api.StatusOngoing podLastOperation.Pods = append(list, lastPod.Name) podLastOperation.PodsOK = []string{} @@ -689,8 +686,9 @@ func (rcc *ReconcileCassandraCluster) runRebuild(hostName string, cc *api.Cassan if labelSet != true { err = errors.New("operation-argument is needed to get the datacenter name to rebuild from") - } else if keyspaces, err = jolokiaClient.NonLocalKeyspacesInDC(rebuildFrom); err == nil && len(keyspaces) == 0 { - err = fmt.Errorf("%s has no keyspace to replicate data from", rebuildFrom) } + } else if keyspaces, err = jolokiaClient.NonLocalKeyspacesInDC(rebuildFrom); err == nil && len(keyspaces) == 0 { + err = fmt.Errorf("%s has no keyspace to replicate data from", rebuildFrom) + } // In case of an error set the status on the pod and skip it if err != nil { diff --git a/pkg/controller/cassandracluster/reconcile.go b/pkg/controller/cassandracluster/reconcile.go index 8f77d2b5f..e4eb746a2 100644 --- a/pkg/controller/cassandracluster/reconcile.go +++ b/pkg/controller/cassandracluster/reconcile.go @@ -161,7 +161,6 @@ func (rcc *ReconcileCassandraCluster) CheckNonAllowedChanges(cc *api.CassandraCl } } - if needUpdate { status.LastClusterAction = api.ActionCorrectCRDConfig.Name ClusterActionMetric.set(api.ActionCorrectCRDConfig, cc.Name) @@ -449,12 +448,12 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, rackName := cc.GetRackName(dc, rack) dcRackName := cc.GetDCRackName(dcName, rackName) if dcRackName == "" { - return fmt.Errorf("Name uses for DC and/or Rack are not good") + return fmt.Errorf("Name used for DC and/or Rack are not good") } //If we have added a dc/rack to the CRD, we add it to the Status if _, ok := status.CassandraRackStatus[dcRackName]; !ok { - logrus.WithFields(logrus.Fields{"cluster": cc.Name}).Infof("the DC(%s) and Rack(%s) does not exist, "+ + logrus.WithFields(logrus.Fields{"cluster": cc.Name}).Infof("DC-Rack(%s-%s) does not exist, "+ "initialize it in status", dcName, rackName) ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) cc.InitCassandraRackinStatus(status, dcName, rackName) @@ -482,8 +481,7 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, //Find if there is an Action to execute or to end rcc.getNextCassandraClusterStatus(cc, dc, rack, dcName, rackName, storedStatefulSet, status) - //If Not in +Initial State - // Find if we have some Pod Operation to Execute, and execute them + //If not Initializing cluster execute pod operations queued if dcRackStatus.Phase != api.ClusterPhaseInitial.Name { breakResyncloop, err := rcc.executePodOperation(cc, dcName, rackName, status) if err != nil { @@ -535,8 +533,8 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, needUpdate = true } if breakLoop { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName, - "err": err}).Debug("We just update Statefulset " + + logrus.WithFields(logrus.Fields{"cluster": cc.Name, + "dc-rack": dcRackName}).Debug("We just update Statefulset " + "we break ReconcileRack") return nil } @@ -691,7 +689,7 @@ func (rcc *ReconcileCassandraCluster) CheckPodsState(cc *api.CassandraCluster, } logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "err": err}).Info("We will get first available pod") + "err": err}).Debug("Get first available pod") firstPod, err := GetLastOrFirstPodReady(podsList, true) if err != nil { @@ -715,7 +713,7 @@ func (rcc *ReconcileCassandraCluster) CheckPodsState(cc *api.CassandraCluster, return err } - podToDelete, err := processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion , podsList, status) + podToDelete, err := processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, podsList, status) if err != nil { return err } @@ -736,10 +734,10 @@ func (rcc *ReconcileCassandraCluster) ListCassandraClusterPods(cc *api.Cassandra rackName := cc.GetRackName(dc, rack) dcRackName := cc.GetDCRackName(dcName, rackName) if dcRackName == "" { - return nil, fmt.Errorf("Name uses for DC and/or Rack are not good") + return nil, fmt.Errorf("Name used for DC and/or Rack are not good") } - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName}).Info("We will list available pods") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "dc-rack": dcRackName}).Debug("List available pods") pods, err := rcc.ListPods(cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) if err != nil { return nil, err @@ -772,12 +770,12 @@ func updateCassandraNodesStatusForPod(hostIDMap map[string]string, pod *v1.Pod, // Update Pod, HostId, Ip couple cached into status hostId, keyFound := hostIDMap[pod.Status.PodIP] - if keyFound == true && cassandraPodIsReady(pod){ + if keyFound == true && cassandraPodIsReady(pod) { status.CassandraNodesStatus[pod.Name] = api.CassandraNodeStatus{HostId: hostId, NodeIp: pod.Status.PodIP} } } -func checkPodCrossIpUseCaseForPod(hostIDMap map[string]string, pod *v1.Pod, status *api.CassandraClusterStatus) (*v1.Pod, error){ +func checkPodCrossIpUseCaseForPod(hostIDMap map[string]string, pod *v1.Pod, status *api.CassandraClusterStatus) (*v1.Pod, error) { // We compare the hostId associated to the pod (cached into the resource status) and the one associated // to the podIp into the cassandra cluster. @@ -789,9 +787,9 @@ func checkPodCrossIpUseCaseForPod(hostIDMap map[string]string, pod *v1.Pod, stat // that the one associated into cassandra for the same IP, so we are in IP cross cases. if keyFound == true && statusHostId != hostId { logrus.WithFields(logrus.Fields{"pod": pod.Name}). - Info(fmt.Sprintf("Pod %s, have a cross Ip situation. The pod have ip : %s, with hostId : %s, " + + Info(fmt.Sprintf("Pod %s, have a cross Ip situation. The pod have ip : %s, with hostId : %s, "+ "but this ip is already associated to the hostId : %s. We force delete of the pod", pod.Name, pod.Status.PodIP, statusHostId, hostId)) return pod, nil } return nil, nil -} \ No newline at end of file +} diff --git a/pkg/controller/cassandracluster/reconcile_test.go b/pkg/controller/cassandracluster/reconcile_test.go index 65a6da85a..94426a3ed 100644 --- a/pkg/controller/cassandracluster/reconcile_test.go +++ b/pkg/controller/cassandracluster/reconcile_test.go @@ -600,7 +600,7 @@ func TestCheckNonAllowedChangesScaleDown(t *testing.T) { defer httpmock.DeactivateAndReset() keyspacesDescribed := []string{} - httpmock.RegisterResponder("POST", JolokiaURL(hostName, port), + httpmock.RegisterResponder("POST", JolokiaURL(hostName, jolokiaPort), func(req *http.Request) (*http.Response, error) { var execrequestdata execRequestData if err := json.NewDecoder(req.Body).Decode(&execrequestdata); err != nil { @@ -872,27 +872,27 @@ func TestProcessingPods(t *testing.T) { // Pod ip change and hostId are not the same in cache. dc2Rack10PodName := "dc2-rack1-0" - oldDc2Rack10PodIp := "10.180.150.109" - dc2Rack10PodIp := "10.100.150.109" - cachedHostId := "ca716bef-dc68-427d-be27-b4eeede1e072" - dc2Rack10HostId := "fsdf6716-dc54-414d-ef27-sdzdgkds04bf" - hostIDMap[dc2Rack10PodIp] = cachedHostId + oldDc2Rack10PodIP := "10.180.150.109" + dc2Rack10PodIP := "10.100.150.109" + cachedHostID := "ca716bef-dc68-427d-be27-b4eeede1e072" + dc2Rack10HostID := "fsdf6716-dc54-414d-ef27-sdzdgkds04bf" + hostIDMap[dc2Rack10PodIP] = cachedHostID // No enough restart returnedPod, _ := processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, - []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIp, 1)}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIP, 1)}, &cc.Status) + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.True(t, returnedPod == nil) // No enough restart returnedPod, _ = processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, - []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIp, cc.Spec.RestartCountBeforePodDeletion)}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIP, cc.Spec.RestartCountBeforePodDeletion)}, &cc.Status) + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.True(t, returnedPod == nil) // Enough restart - pod := mkPod(dc2Rack10PodName, dc2Rack10PodIp, 100) + pod := mkPod(dc2Rack10PodName, dc2Rack10PodIP, 100) returnedPod, _ = processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, []v1.Pod{*pod}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.Equal(t, returnedPod, pod) // Test with option disabled @@ -900,17 +900,17 @@ func TestProcessingPods(t *testing.T) { // No enough restart returnedPod, _ = processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, - []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIp, 1)}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIP, 1)}, &cc.Status) + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.True(t, returnedPod == nil) // No enough restart returnedPod, _ = processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, - []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIp, cc.Spec.RestartCountBeforePodDeletion)}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIP, cc.Spec.RestartCountBeforePodDeletion)}, &cc.Status) + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.True(t, returnedPod == nil) // Enough restart returnedPod, _ = processingPods(hostIDMap, cc.Spec.RestartCountBeforePodDeletion, - []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIp, 100)}, &cc.Status) - cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIp, HostId: dc2Rack10HostId} + []v1.Pod{*mkPod(dc2Rack10PodName, dc2Rack10PodIP, 100)}, &cc.Status) + cc.Status.CassandraNodesStatus[dc2Rack10PodName] = api.CassandraNodeStatus{NodeIp: oldDc2Rack10PodIP, HostId: dc2Rack10HostID} assert.True(t, returnedPod == nil) } diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index e5888c22d..ab4fd3e6b 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -175,13 +175,8 @@ func statefulSetsAreEqual(sts1, sts2 *appsv1.StatefulSet) bool { sts2.Status.Replicas = sts1.Status.Replicas - patchResult, err := patch.DefaultPatchMaker.Calculate(sts1, sts2) - if err != nil { - logrus.Infof("Template is different: " + pretty.Compare(sts1.Spec, sts2.Spec)) - return false - } - if !patchResult.IsEmpty() { - logrus.Infof("Template is different: " + pretty.Compare(sts1.Spec, sts2.Spec)) + if patchResult, err := patch.DefaultPatchMaker.Calculate(sts1, sts2); err != nil || !patchResult.IsEmpty() { + logrus.Debug("Template is different: " + pretty.Compare(sts1.Spec, sts2.Spec)) return false } @@ -208,17 +203,19 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app // if there is existing disruptions on Pods // Or if we are not scaling Down the current statefulset if rcc.thereIsPodDisruption() { - if rcc.weAreScalingDown(dcRackStatus) && rcc.hasOneDisruptedPod() { - logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Info("Cluster has 1 Pod Disrupted" + - "but that may be normal as we are decommissioning") - } else if rcc.cc.Spec.UnlockNextOperation { + // It's not seen as a disruption in that case + // if rcc.weAreScalingDown(dcRackStatus) && rcc.hasOneDisruptedPod() { + // logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, + // "dc-rack": dcRackName}).Info("Cluster has 1 Pod Disrupted" + + // "but that may be normal as we are decommissioning") + // } else + if rcc.cc.Spec.UnlockNextOperation { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Warn("Cluster has 1 Pod Disrupted" + + "dc-rack": dcRackName}).Warn("Cluster has 1 disrupted pod" + "but we have unlock the next operation") } else { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Info("Cluster has Disruption on Pods, " + + "dc-rack": dcRackName}).Info("Cluster has disruption on Pods, " + "we wait before applying any change to statefulset") return api.ContinueResyncLoop, nil } @@ -256,7 +253,7 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app //Hack for ScaleDown: //because before applying a scaledown at Kubernetes (statefulset) level we need to execute a cassandra decommission //we want the statefulset to only perform one scaledown at a time. - //we have some call which will block the call of this method until the decommission is not OK, so here + //we have some call which will block the call of this method as long as the decommission is running, so here //we just need to change the scaledown value if more than 1 at a time. if *rcc.storedStatefulSet.Spec.Replicas-*statefulSet.Spec.Replicas > 1 { *statefulSet.Spec.Replicas = *rcc.storedStatefulSet.Spec.Replicas - 1 From fa614e82467cc33f7756665b26e9b1a34e2b5039 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:25:57 -0400 Subject: [PATCH 02/13] decommission - Do not compare revision to updated stfs object --- .../cassandracluster/cassandra_status.go | 7 ++++-- .../cassandracluster/cassandra_status_test.go | 2 ++ .../cassandracluster/pod_operation.go | 23 +++++++++++-------- .../cassandracluster/statefulset.go | 8 +++---- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/pkg/controller/cassandracluster/cassandra_status.go b/pkg/controller/cassandracluster/cassandra_status.go index d8a6b9509..217dfbe8b 100644 --- a/pkg/controller/cassandracluster/cassandra_status.go +++ b/pkg/controller/cassandracluster/cassandra_status.go @@ -144,14 +144,17 @@ func (rcc *ReconcileCassandraCluster) getNextCassandraClusterStatus(cc *api.Cass now := metav1.Now() lastAction.StartTime = &now lastAction.Status = api.StatusOngoing + logrus.WithFields(logrus.Fields{"cluster": cc.Name, + "dc-rack": dcRackName, "hasDisruption": rcc.thereIsPodDisruption(), + "lastAction.Status": lastAction.Status}).Debug("CYRIL 4") } return nil } //needToWaitDelayBeforeCheck will return if last action start time is < to api.DefaultDelayWait -//that mean start operation is too soon to check to an end operation or other available operations -//this is mostly to let the cassandra cluster and the operator to have the time to correctly stage the action +//that means the last operation was started only a few seconds ago and checking now would not make any sense +//this is mostly to give cassandra and the operator enough time to correctly stage the action //DefaultDelayWait is of 2 minutes func needToWaitDelayBeforeCheck(cc *api.CassandraCluster, dcRackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) bool { diff --git a/pkg/controller/cassandracluster/cassandra_status_test.go b/pkg/controller/cassandracluster/cassandra_status_test.go index e62c42ad4..987e8b9d1 100644 --- a/pkg/controller/cassandracluster/cassandra_status_test.go +++ b/pkg/controller/cassandracluster/cassandra_status_test.go @@ -202,6 +202,8 @@ func helperCreateCassandraCluster(t *testing.T, cassandraClusterFileName string) for i := 0; i < int(sts.Status.Replicas); i++ { pod := podTemplate.DeepCopy() pod.Name = sts.Name + strconv.Itoa(i) + pod.Spec.Hostname = pod.Name + pod.Spec.Subdomain = cc.Name if err = rcc.CreatePod(pod); err != nil { t.Fatalf("can't create pod %s: (%v)", pod.Name, err) } diff --git a/pkg/controller/cassandracluster/pod_operation.go b/pkg/controller/cassandracluster/pod_operation.go index 18d185e65..2d982260a 100644 --- a/pkg/controller/cassandracluster/pod_operation.go +++ b/pkg/controller/cassandracluster/pod_operation.go @@ -484,23 +484,28 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionFinalizing(cc *api.Cassa pvcName := "data-" + podLastOperation.Pods[0] logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pvc": pvcName}).Info("Decommission done -> we delete PVC") - pvc, err := rcc.GetPVC(cc.Namespace, pvcName) - if err != nil { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "pvc": pvcName}).Error("Cannot get PVC") - } - if err == nil { - err = rcc.deletePVC(pvc) - if err != nil { + if pvc, err := rcc.GetPVC(cc.Namespace, pvcName); err == nil { + if rcc.deletePVC(pvc) != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pvc": pvcName}).Error("Error deleting PVC, Please make manual Actions..") } else { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pvc": pvcName}).Info("PVC deleted") } + } else if !apierrors.IsNotFound(err) { + // Error when looking for the PVC let's retry + return breakResyncLoop, nil + } + + dcRackStatus := status.CassandraRackStatus[dcRackName] + if rcc.weAreScalingDown(dcRackStatus) { + // We have more decommissions to do + podLastOperation.Status = api.StatusContinue + } else { + // We are done with decommissioning + podLastOperation.Status = api.StatusDone } - podLastOperation.Status = api.StatusDone podLastOperation.PodsOK = []string{lastPod.Name} now := metav1.Now() podLastOperation.EndTime = &now diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index ab4fd3e6b..b18cdb20f 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -87,20 +87,20 @@ func (rcc *ReconcileCassandraCluster) CreateStatefulSet(statefulSet *appsv1.Stat //UpdateStatefulSet updates an existing statefulset ss func (rcc *ReconcileCassandraCluster) UpdateStatefulSet(statefulSet *appsv1.StatefulSet) error { - err := rcc.client.Update(context.TODO(), statefulSet) - if err != nil { + revision := statefulSet.ResourceVersion + if err := rcc.client.Update(context.TODO(), statefulSet); err != nil { if !apierrors.IsAlreadyExists(err) { return fmt.Errorf("statefulset already exists: %cc", err) } return fmt.Errorf("failed to update cassandra statefulset: %cc", err) } //Check that the new revision of statefulset has been taken into account - err = wait.Poll(retryInterval, timeout, func() (done bool, err error) { + err := wait.Poll(retryInterval, timeout, func() (done bool, err error) { newSts, err := rcc.GetStatefulSet(statefulSet.Namespace, statefulSet.Name) if err != nil && !apierrors.IsNotFound(err) { return false, fmt.Errorf("failed to get cassandra statefulset: %cc", err) } - if statefulSet.ResourceVersion != newSts.ResourceVersion { + if revision != newSts.ResourceVersion { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, "statefulset": statefulSet.Name}).Info( "Statefulset has new revision, we continue") return true, nil From 023dfa8a69589d05de96a76f9db0582722eb0151 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:31:51 -0400 Subject: [PATCH 03/13] decommission - Refactor sort functions in stfs comparison --- .../cassandracluster/statefulset.go | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index b18cdb20f..341466a79 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -116,7 +116,18 @@ func (rcc *ReconcileCassandraCluster) UpdateStatefulSet(statefulSet *appsv1.Stat return nil } -// sts1 = stored statefulset and sts2 = new generated statefulset +func sortPVCs(pvcs *[]v1.PersistentVolumeClaim) { + sort.Slice(*pvcs, func(i, j int) bool { + return (*pvcs)[i].Name < (*pvcs)[j].Name + }) +} + +func sortContainers(containers *[]v1.Container) { + sort.Slice(*containers, func(i, j int) bool { + return (*containers)[i].Name < (*containers)[j].Name + }) +} + func statefulSetsAreEqual(sts1, sts2 *appsv1.StatefulSet) bool { //updates to statefulset spec for fields other than 'replicas', 'template', and 'updateStrategy' are forbidden. @@ -148,25 +159,14 @@ func statefulSetsAreEqual(sts1, sts2 *appsv1.StatefulSet) bool { return false } - sort.Slice(sts1.Spec.VolumeClaimTemplates, func(i, j int) bool { - return sts1.Spec.VolumeClaimTemplates[i].Name < sts1.Spec.VolumeClaimTemplates[j].Name - }) + sortPVCs(&sts1.Spec.VolumeClaimTemplates) + sortPVCs(&sts2.Spec.VolumeClaimTemplates) - sort.Slice(sts2.Spec.VolumeClaimTemplates, func(i, j int) bool { - return sts2.Spec.VolumeClaimTemplates[i].Name < sts2.Spec.VolumeClaimTemplates[j].Name - }) - - sort.Slice(sts1Spec.Containers, func(i, j int) bool { - return sts1Spec.Containers[i].Name < sts1Spec.Containers[j].Name - }) - - sort.Slice(sts2Spec.Containers, func(i, j int) bool { - return sts2Spec.Containers[i].Name < sts2Spec.Containers[j].Name - }) + sortContainers(&sts1Spec.Containers) + sortContainers(&sts2Spec.Containers) for i := 0; i < len(sts1.Spec.VolumeClaimTemplates); i++ { sts2.Spec.VolumeClaimTemplates[i].TypeMeta = sts1.Spec.VolumeClaimTemplates[i].TypeMeta - sts2.Spec.VolumeClaimTemplates[i].Status = sts1.Spec.VolumeClaimTemplates[i].Status if sts2.Spec.VolumeClaimTemplates[i].Spec.VolumeMode == nil { sts2.Spec.VolumeClaimTemplates[i].Spec.VolumeMode = sts1.Spec.VolumeClaimTemplates[i].Spec.VolumeMode From 18d37c6339f6f8277c11f8ed5f5f3f1fdf2ddf58 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:33:54 -0400 Subject: [PATCH 04/13] decommission - If Scaling down and replicas are not ready we break the loop --- pkg/controller/cassandracluster/statefulset.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index 341466a79..77d13997a 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -255,6 +255,16 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app //we want the statefulset to only perform one scaledown at a time. //we have some call which will block the call of this method as long as the decommission is running, so here //we just need to change the scaledown value if more than 1 at a time. + + // If we're scaling down and Ready Replica is not yet the value we expect we break the loop + if rcc.weAreScalingDown(dcRackStatus) && + rcc.storedStatefulSet.Status.ReadyReplicas != rcc.storedStatefulSet.Status.Replicas { + logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, + "dc-rack": dcRackName}).Infof("CYRIL 500 - STFS not ready %d ready replicas for %d replicas asked", + rcc.storedStatefulSet.Status.ReadyReplicas, rcc.storedStatefulSet.Status.Replicas) + return api.BreakResyncLoop, nil + } + if *rcc.storedStatefulSet.Spec.Replicas-*statefulSet.Spec.Replicas > 1 { *statefulSet.Spec.Replicas = *rcc.storedStatefulSet.Spec.Replicas - 1 } From 8845133c718b0e651849f0236a6b1d0de6fe991e Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:34:14 -0400 Subject: [PATCH 05/13] decommission - Add unit tests --- .../cassandracluster/decommission_test.go | 268 ++++++++++++++++++ .../testdata/cassandracluster-1DC.yaml | 22 ++ 2 files changed, 290 insertions(+) create mode 100644 pkg/controller/cassandracluster/decommission_test.go create mode 100644 pkg/controller/cassandracluster/testdata/cassandracluster-1DC.yaml diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go new file mode 100644 index 000000000..9d8ec0aa6 --- /dev/null +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -0,0 +1,268 @@ +package cassandracluster + +import ( + "context" + "fmt" + "testing" + + "github.com/jarcoal/httpmock" + "github.com/stretchr/testify/assert" + v1 "k8s.io/api/core/v1" + policyv1beta1 "k8s.io/api/policy/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +func reconcileValidation(t *testing.T, rcc *ReconcileCassandraCluster, request reconcile.Request) { + if res, err := rcc.Reconcile(request); err != nil { + t.Fatalf("reconcile: (%v)", err) + } else if !res.Requeue && res.RequeueAfter == 0 { + t.Error("reconcile did not requeue request as expected") + } +} + +func createCassandraClusterWithNoDisruption(t *testing.T, cassandraClusterFileName string) (*ReconcileCassandraCluster, + *reconcile.Request) { + rcc, req := helperCreateCassandraCluster(t, "cassandracluster-1DC.yaml") + + pdb := &policyv1beta1.PodDisruptionBudget{ + ObjectMeta: metav1.ObjectMeta{ + Name: rcc.cc.Name, + Namespace: rcc.cc.Namespace, + }, + } + + rcc.client.Get(context.TODO(), req.NamespacedName, pdb) + // No disruption + pdb.Status.DisruptionsAllowed = 1 + rcc.client.Update(context.TODO(), pdb) + + return rcc, req +} + +func TestOneDecommission(t *testing.T) { + rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") + + cassandraCluster := rcc.cc.DeepCopy() + cassandraCluster.Spec.NodesPerRacks = 2 + rcc.client.Update(context.TODO(), cassandraCluster) + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + lastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "NORMAL", + "timestamp": 1528850319, + "status": 200}`)) + + for i := 0; i < 2; i++ { + otherPod := fmt.Sprintf("cassandra-demo-dc1-rack1%d.%s", i, rcc.cc.Name) + + httpmock.RegisterResponder("POST", JolokiaURL(otherPod, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) + + } + reconcileValidation(t, rcc, *req) + + stfsName := cassandraCluster.Name + "-dc1-rack1" + stfs, _ := rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + + assert := assert.New(t) + + assert.Equal(int32(3), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "LEAVING", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + assert.Equal(int32(3), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "DECOMMISSIONED", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + + stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + assert.Equal(int32(2), *stfs.Spec.Replicas) + + info := httpmock.GetCallCountInfo() + assert.Equal(2, info["POST http://cassandra-demo-dc1-rack12.cassandra-demo:8778/jolokia/"]) + + // pods, _ := rcc.ListPods(rcc.cc.Namespace, k8s.LabelsForCassandraDCRack(rcc.cc, "dc1", "rack1")) + // fmt.Println(len(pods.Items)) + + // Need to manually delete pod managed by the fake client + rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: stfsName + "2", + Namespace: rcc.cc.Namespace}}) + + decommissionedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name + + httpmock.RegisterResponder("POST", JolokiaURL(decommissionedPod, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "NORMAL", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) +} + +func TestMultipleDecommissions(t *testing.T) { + rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") + + cassandraCluster := rcc.cc.DeepCopy() + cassandraCluster.Spec.NodesPerRacks = 1 + rcc.client.Update(context.TODO(), cassandraCluster) + + httpmock.Activate() + defer httpmock.DeactivateAndReset() + + for i := 0; i <= 1; i++ { + nonLastPod := "cassandra-demo-dc1-rack10" + "." + rcc.cc.Name + + httpmock.RegisterResponder("POST", JolokiaURL(nonLastPod, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) + } + + lastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "NORMAL", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + + stfsName := cassandraCluster.Name + "-dc1-rack1" + stfs, _ := rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + + assert := assert.New(t) + + assert.Equal(int32(3), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "LEAVING", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + assert.Equal(int32(3), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "DECOMMISSIONED", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + + stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + assert.Equal(int32(2), *stfs.Spec.Replicas) + + // Need to manually delete pod managed by the fake client + rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: stfsName + "2", + Namespace: rcc.cc.Namespace}}) + + previousLastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name + + httpmock.RegisterResponder("POST", JolokiaURL(previousLastPod, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "NORMAL", + "timestamp": 1528850319, + "status": 200}`)) + reconcileValidation(t, rcc, *req) + + stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + assert.Equal(int32(2), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "LEAVING", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + assert.Equal(int32(2), *stfs.Spec.Replicas) + + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "DECOMMISSIONED", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + + stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + assert.Equal(int32(1), *stfs.Spec.Replicas) + + // Need to manually delete pod managed by the fake client + rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: stfsName + "1", + Namespace: rcc.cc.Namespace}}) + + previousLastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name + lastPod = "cassandra-demo-dc1-rack10" + "." + rcc.cc.Name + + httpmock.RegisterResponder("POST", JolokiaURL(previousLastPod, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) + httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), + httpmock.NewStringResponder(200, `{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "NORMAL", + "timestamp": 1528850319, + "status": 200}`)) + + reconcileValidation(t, rcc, *req) + + // stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) + // assert.Equal(int32(1), *stfs.Spec.Replicas) + +} diff --git a/pkg/controller/cassandracluster/testdata/cassandracluster-1DC.yaml b/pkg/controller/cassandracluster/testdata/cassandracluster-1DC.yaml new file mode 100644 index 000000000..a891ff231 --- /dev/null +++ b/pkg/controller/cassandracluster/testdata/cassandracluster-1DC.yaml @@ -0,0 +1,22 @@ +apiVersion: db.orange.com/v1alpha1 +kind: CassandraCluster +metadata: + name: cassandra-demo + labels: + cluster: k8s.pic + namespace: ns +spec: + dataCapacity: 3Gi + nodesPerRacks: 3 + deletePVC: true + autoPilot: true + resources: + limits: &limits + cpu: 1 + memory: 2Gi + requests: *limits + topology: + dc: + - name: dc1 + rack: + - name: rack1 From 9d964c5a87be293dc5d04e66d7133a8904905dca Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Thu, 9 Jul 2020 23:55:19 -0400 Subject: [PATCH 06/13] decommission - Rename old pod and add more tests --- .../cassandracluster/decommission_test.go | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index 9d8ec0aa6..9ff50e792 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -43,6 +43,10 @@ func createCassandraClusterWithNoDisruption(t *testing.T, cassandraClusterFileNa func TestOneDecommission(t *testing.T) { rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") + assert := assert.New(t) + + assert.Equal(int32(3), rcc.cc.Spec.NodesPerRacks) + cassandraCluster := rcc.cc.DeepCopy() cassandraCluster.Spec.NodesPerRacks = 2 rcc.client.Update(context.TODO(), cassandraCluster) @@ -71,8 +75,6 @@ func TestOneDecommission(t *testing.T) { stfsName := cassandraCluster.Name + "-dc1-rack1" stfs, _ := rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - assert := assert.New(t) - assert.Equal(int32(3), *stfs.Spec.Replicas) httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), @@ -104,18 +106,15 @@ func TestOneDecommission(t *testing.T) { info := httpmock.GetCallCountInfo() assert.Equal(2, info["POST http://cassandra-demo-dc1-rack12.cassandra-demo:8778/jolokia/"]) - // pods, _ := rcc.ListPods(rcc.cc.Namespace, k8s.LabelsForCassandraDCRack(rcc.cc, "dc1", "rack1")) - // fmt.Println(len(pods.Items)) - // Need to manually delete pod managed by the fake client rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ Name: stfsName + "2", Namespace: rcc.cc.Namespace}}) - decommissionedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + deletedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name - httpmock.RegisterResponder("POST", JolokiaURL(decommissionedPod, jolokiaPort), + httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), httpmock.NewNotFoundResponder(t.Fatal)) httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), httpmock.NewStringResponder(200, `{"request": @@ -197,10 +196,10 @@ func TestMultipleDecommissions(t *testing.T) { Name: stfsName + "2", Namespace: rcc.cc.Namespace}}) - previousLastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + deletedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name - httpmock.RegisterResponder("POST", JolokiaURL(previousLastPod, jolokiaPort), + httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), httpmock.NewNotFoundResponder(t.Fatal)) httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), httpmock.NewStringResponder(200, `{"request": @@ -246,10 +245,10 @@ func TestMultipleDecommissions(t *testing.T) { Name: stfsName + "1", Namespace: rcc.cc.Namespace}}) - previousLastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name + deletedPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name lastPod = "cassandra-demo-dc1-rack10" + "." + rcc.cc.Name - httpmock.RegisterResponder("POST", JolokiaURL(previousLastPod, jolokiaPort), + httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), httpmock.NewNotFoundResponder(t.Fatal)) httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), httpmock.NewStringResponder(200, `{"request": From 3f70a3e2e6ee7431cc4f87890805cd6676097cd0 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 12 Jul 2020 01:12:53 -0400 Subject: [PATCH 07/13] decommission - Update log messages --- .../cassandracluster/cassandra_status.go | 31 +- .../cassandracluster_controller.go | 3 +- .../cassandracluster/decommission_test.go | 299 ++++++++---------- .../cassandracluster/node_operations.go | 6 +- .../cassandracluster/node_operations_test.go | 47 +-- pkg/controller/cassandracluster/pod.go | 4 +- .../cassandracluster/pod_operation.go | 17 +- pkg/controller/cassandracluster/reconcile.go | 5 +- .../cassandracluster/statefulset.go | 4 + 9 files changed, 179 insertions(+), 237 deletions(-) diff --git a/pkg/controller/cassandracluster/cassandra_status.go b/pkg/controller/cassandracluster/cassandra_status.go index 217dfbe8b..39bdefc48 100644 --- a/pkg/controller/cassandracluster/cassandra_status.go +++ b/pkg/controller/cassandracluster/cassandra_status.go @@ -430,10 +430,14 @@ func (rcc *ReconcileCassandraCluster) UpdateStatusIfActionEnded(cc *api.Cassandr // The Phase is: Initializing -> Running <--> Pending // The Phase is a very high level view of the cluster, for a better view we need to see Actions and Pod Operations func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.CassandraCluster, dcName string, - rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) error { + rackName string, storedStatefulSet *appsv1.StatefulSet, status *api.CassandraClusterStatus) { dcRackName := cc.GetDCRackName(dcName, rackName) lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction + logrusFields := logrus.Fields{"cluster": cc.Name, "rack": dcRackName, + "ReadyReplicas": storedStatefulSet.Status.ReadyReplicas, "Replicas": storedStatefulSet.Status.Replicas, + "RequestedReplicas": *storedStatefulSet.Spec.Replicas} + if status.CassandraRackStatus[dcRackName].Phase == api.ClusterPhaseInitial.Name { nodesPerRacks := cc.GetNodesPerRacks(dcRackName) //If we are stuck in initializing state, we can rollback the add of dc which implies decommissioning nodes @@ -442,26 +446,23 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas "rack": dcRackName}).Warn("Aborting Initializing..., start ScaleDown") setDecommissionStatus(status, dcRackName) ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) - return nil + return } ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) //Do we have reach requested number of replicas ? if isStatefulSetNotReady(storedStatefulSet) { - logrus.Infof("[%s][%s]: Initializing StatefulSet: Replicas Number Not OK: %d on %d, ready[%d]", - cc.Name, dcRackName, storedStatefulSet.Status.Replicas, *storedStatefulSet.Spec.Replicas, - storedStatefulSet.Status.ReadyReplicas) - return nil + logrus.WithFields(logrusFields).Infof("Initializing StatefulSet: Replicas Number Not OK") + return } //If yes, just check that lastPod is running podsList, err := rcc.ListPods(cc.Namespace, k8s.LabelsForCassandraDCRack(cc, dcName, rackName)) if err != nil || len(podsList.Items) < 1 { - return nil + return } if len(podsList.Items) < int(nodesPerRacks) { - logrus.Infof("[%s][%s]: StatefulSet is waiting for scaleUp", cc.Name, dcRackName) - return nil + logrus.Infof("[%s][%s]: StatefulSet is scaling up", cc.Name, dcRackName) } pod := podsList.Items[nodesPerRacks-1] if cassandraPodIsReady(&pod) { @@ -471,25 +472,19 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas lastAction.EndTime = &now lastAction.Status = api.StatusDone logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, lastAction.Name, storedStatefulSet.Status.ReadyReplicas) - return nil } - return nil } - //We are no more in Initializing state + //No more in Initializing state if isStatefulSetNotReady(storedStatefulSet) { - logrus.Infof("[%s][%s]: StatefulSet(%s) Replicas Number Not OK: %d on %d, ready[%d]", cc.Name, - dcRackName, lastAction.Name, storedStatefulSet.Status.Replicas, *storedStatefulSet.Spec.Replicas, - storedStatefulSet.Status.ReadyReplicas) + logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas number not okay") status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) } else if status.CassandraRackStatus[dcRackName].Phase != api.ClusterPhaseRunning.Name { - logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, - lastAction.Name, storedStatefulSet.Status.ReadyReplicas) + logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas number not okay") status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) } - return nil } func setDecommissionStatus(status *api.CassandraClusterStatus, dcRackName string) { diff --git a/pkg/controller/cassandracluster/cassandracluster_controller.go b/pkg/controller/cassandracluster/cassandracluster_controller.go index 274b0f930..4d9c98f64 100644 --- a/pkg/controller/cassandracluster/cassandracluster_controller.go +++ b/pkg/controller/cassandracluster/cassandracluster_controller.go @@ -144,8 +144,7 @@ func (rcc *ReconcileCassandraCluster) Reconcile(request reconcile.Request) (reco } //ReconcileRack will also add and initiate new racks, we must not go through racks before this method - err = rcc.ReconcileRack(cc, status) - if err != nil { + if err = rcc.ReconcileRack(cc, status); err != nil { return requeue5, err } diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index 9ff50e792..ee4633207 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -3,6 +3,7 @@ package cassandracluster import ( "context" "fmt" + "strconv" "testing" "github.com/jarcoal/httpmock" @@ -40,228 +41,178 @@ func createCassandraClusterWithNoDisruption(t *testing.T, cassandraClusterFileNa return rcc, req } +func registerJolokiaOperationModeResponder(host podName, op operationMode) { + fmt.Println(string(op)) + httpmock.RegisterResponder("POST", JolokiaURL(host.FullName, jolokiaPort), + httpmock.NewStringResponder(200, fmt.Sprintf(`{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "OperationMode", + "type": "read"}, + "value": "%s", + "timestamp": 1528850319, + "status": 200}`, string(op)))) +} + +func registerFatalJolokiaResponder(t *testing.T, host podName) { + httpmock.RegisterResponder("POST", JolokiaURL(host.FullName, jolokiaPort), + httpmock.NewNotFoundResponder(t.Fatal)) +} + +func jolokiaCallsCount(name podName) int { + info := httpmock.GetCallCountInfo() + return info[fmt.Sprintf("POST http://%s:8778/jolokia/", name.FullName)] +} + +type podName struct { + Name string + FullName string +} + +func podHost(stfsName string, id int8, rcc *ReconcileCassandraCluster) podName { + name := stfsName + strconv.Itoa(int(id)) + return podName{name, name + "." + rcc.cc.Name} +} + +func deletePodNotDeletedByFakeClient(rcc *ReconcileCassandraCluster, host podName) { + // Need to manually delete pod managed by the fake client + rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ + Name: host.Name, + Namespace: rcc.cc.Namespace}}) +} + func TestOneDecommission(t *testing.T) { rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") + httpmock.Activate() + defer httpmock.DeactivateAndReset() assert := assert.New(t) assert.Equal(int32(3), rcc.cc.Spec.NodesPerRacks) cassandraCluster := rcc.cc.DeepCopy() + + datacenters := cassandraCluster.Spec.Topology.DC + assert.Equal(1, len(datacenters)) + assert.Equal(1, len(datacenters[0].Rack)) + + dc := datacenters[0] + stfsName := cassandraCluster.Name + fmt.Sprintf("-%s-%s", dc.Name, dc.Rack[0].Name) + cassandraCluster.Spec.NodesPerRacks = 2 rcc.client.Update(context.TODO(), cassandraCluster) - httpmock.Activate() - defer httpmock.DeactivateAndReset() - lastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) - - for i := 0; i < 2; i++ { - otherPod := fmt.Sprintf("cassandra-demo-dc1-rack1%d.%s", i, rcc.cc.Name) - - httpmock.RegisterResponder("POST", JolokiaURL(otherPod, jolokiaPort), - httpmock.NewNotFoundResponder(t.Fatal)) + lastPod := podHost(stfsName, 2, rcc) + for id := 0; id < 2; id++ { + registerFatalJolokiaResponder(t, podHost(stfsName, int8(id), rcc)) } + registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + assertStatefulsetReplicas(t, rcc, 3, cassandraCluster.Namespace, stfsName) - stfsName := cassandraCluster.Name + "-dc1-rack1" - stfs, _ := rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - - assert.Equal(int32(3), *stfs.Spec.Replicas) - - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "LEAVING", - "timestamp": 1528850319, - "status": 200}`)) - + registerJolokiaOperationModeResponder(lastPod, LEAVING) reconcileValidation(t, rcc, *req) - assert.Equal(int32(3), *stfs.Spec.Replicas) - - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "DECOMMISSIONED", - "timestamp": 1528850319, - "status": 200}`)) + assert.Equal(2, jolokiaCallsCount(lastPod)) + assertStatefulsetReplicas(t, rcc, 3, cassandraCluster.Namespace, stfsName) + registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + assertStatefulsetReplicas(t, rcc, 2, cassandraCluster.Namespace, stfsName) - stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - assert.Equal(int32(2), *stfs.Spec.Replicas) + deletedPod := podHost(stfsName, 2, rcc) + assert.Equal(2, jolokiaCallsCount(deletedPod)) - info := httpmock.GetCallCountInfo() - assert.Equal(2, info["POST http://cassandra-demo-dc1-rack12.cassandra-demo:8778/jolokia/"]) - - // Need to manually delete pod managed by the fake client - rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ - Name: stfsName + "2", - Namespace: rcc.cc.Namespace}}) - - deletedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name - lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name - - httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), - httpmock.NewNotFoundResponder(t.Fatal)) - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) + lastPod = podHost(stfsName, 1, rcc) + deletePodNotDeletedByFakeClient(rcc, deletedPod) + registerFatalJolokiaResponder(t, deletedPod) + registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) + assert.Equal(1, jolokiaCallsCount(lastPod)) } -func TestMultipleDecommissions(t *testing.T) { - rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") - - cassandraCluster := rcc.cc.DeepCopy() - cassandraCluster.Spec.NodesPerRacks = 1 - rcc.client.Update(context.TODO(), cassandraCluster) +func assertStatefulsetReplicas(t *testing.T, rcc *ReconcileCassandraCluster, expected int, namespace, stfsName string){ + assert := assert.New(t) + stfs, _ := rcc.GetStatefulSet(namespace, stfsName) + assert.Equal(int32(expected), *stfs.Spec.Replicas) +} +func TestMultipleDecommissions(t *testing.T) { + assert := assert.New(t) httpmock.Activate() defer httpmock.DeactivateAndReset() - for i := 0; i <= 1; i++ { - nonLastPod := "cassandra-demo-dc1-rack10" + "." + rcc.cc.Name - - httpmock.RegisterResponder("POST", JolokiaURL(nonLastPod, jolokiaPort), - httpmock.NewNotFoundResponder(t.Fatal)) - } + rcc, req := createCassandraClusterWithNoDisruption(t, "cassandracluster-1DC.yaml") - lastPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name + cassandraCluster := rcc.cc.DeepCopy() - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) + datacenters := cassandraCluster.Spec.Topology.DC + assert.Equal(1, len(datacenters)) + dc := datacenters[0] + assert.Equal(1, len(dc.Rack)) - reconcileValidation(t, rcc, *req) + stfsName := cassandraCluster.Name + fmt.Sprintf("-%s-%s", dc.Name, dc.Rack[0].Name) - stfsName := cassandraCluster.Name + "-dc1-rack1" - stfs, _ := rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - - assert := assert.New(t) + cassandraCluster.Spec.NodesPerRacks = 1 + rcc.client.Update(context.TODO(), cassandraCluster) - assert.Equal(int32(3), *stfs.Spec.Replicas) + for id := 0; id <= 1; id++ { + registerFatalJolokiaResponder(t, podHost(stfsName, int8(0), rcc)) + } - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "LEAVING", - "timestamp": 1528850319, - "status": 200}`)) + lastPod := podHost(stfsName, 2, rcc) + registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(int32(3), *stfs.Spec.Replicas) - - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "DECOMMISSIONED", - "timestamp": 1528850319, - "status": 200}`)) + assert.Equal(2, jolokiaCallsCount(lastPod)) + numberOfReplicas := 3 + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) + registerJolokiaOperationModeResponder(lastPod, LEAVING) reconcileValidation(t, rcc, *req) + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) + assert.Equal(2, jolokiaCallsCount(lastPod)) - stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - assert.Equal(int32(2), *stfs.Spec.Replicas) - - // Need to manually delete pod managed by the fake client - rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ - Name: stfsName + "2", - Namespace: rcc.cc.Namespace}}) - - deletedPod := "cassandra-demo-dc1-rack12" + "." + rcc.cc.Name - lastPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name - - httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), - httpmock.NewNotFoundResponder(t.Fatal)) - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) + registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + numberOfReplicas -= 1 + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) - stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - assert.Equal(int32(2), *stfs.Spec.Replicas) - - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "LEAVING", - "timestamp": 1528850319, - "status": 200}`)) + deletedPod := podHost(stfsName, 2, rcc) + deletePodNotDeletedByFakeClient(rcc, deletedPod) + lastPod = podHost(stfsName, 1, rcc) reconcileValidation(t, rcc, *req) - assert.Equal(int32(2), *stfs.Spec.Replicas) - - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "DECOMMISSIONED", - "timestamp": 1528850319, - "status": 200}`)) + assert.Equal(2, jolokiaCallsCount(deletedPod)) + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) + registerFatalJolokiaResponder(t, deletedPod) + registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) - stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - assert.Equal(int32(1), *stfs.Spec.Replicas) - - // Need to manually delete pod managed by the fake client - rcc.client.Delete(context.TODO(), &v1.Pod{ObjectMeta: metav1.ObjectMeta{ - Name: stfsName + "1", - Namespace: rcc.cc.Namespace}}) - - deletedPod = "cassandra-demo-dc1-rack11" + "." + rcc.cc.Name - lastPod = "cassandra-demo-dc1-rack10" + "." + rcc.cc.Name - - httpmock.RegisterResponder("POST", JolokiaURL(deletedPod, jolokiaPort), - httpmock.NewNotFoundResponder(t.Fatal)) - httpmock.RegisterResponder("POST", JolokiaURL(lastPod, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) + registerJolokiaOperationModeResponder(lastPod, LEAVING) + reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) + registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) + assert.Equal(2, jolokiaCallsCount(lastPod)) + numberOfReplicas -= 1 + assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) - // stfs, _ = rcc.GetStatefulSet(cassandraCluster.Namespace, stfsName) - // assert.Equal(int32(1), *stfs.Spec.Replicas) + deletedPod = podHost(stfsName, 1, rcc) + deletePodNotDeletedByFakeClient(rcc, deletedPod) + lastPod = podHost(stfsName, 0, rcc) + registerFatalJolokiaResponder(t, deletedPod) + registerJolokiaOperationModeResponder(lastPod, NORMAL) + reconcileValidation(t, rcc, *req) + assert.Equal(1, jolokiaCallsCount(lastPod)) } + diff --git a/pkg/controller/cassandracluster/node_operations.go b/pkg/controller/cassandracluster/node_operations.go index 0f59c6f6c..fb6f3442a 100644 --- a/pkg/controller/cassandracluster/node_operations.go +++ b/pkg/controller/cassandracluster/node_operations.go @@ -251,14 +251,14 @@ func (jolokiaClient *JolokiaClient) NodeRemove(hostid string) error { } /*NodeOperationMode returns OperationMode of a node using a jolokia client and returns any error*/ -func (jolokiaClient *JolokiaClient) NodeOperationMode() (string, error) { +func (jolokiaClient *JolokiaClient) NodeOperationMode() (operationMode, error) { request := go_jolokia.NewJolokiaRequest(go_jolokia.READ, "org.apache.cassandra.db:type=StorageService", nil, "OperationMode") result, err := checkJolokiaErrors(jolokiaClient.executeReadRequest(request)) if err != nil { - return "", fmt.Errorf("Cannot get OperationMode: %v", err.Error()) + return UNKNOWN, fmt.Errorf("Cannot get OperationMode: %v", err.Error()) } v, _ := result.Value.(string) - return v, nil + return operationMode(v), nil } func (jolokiaClient *JolokiaClient) hasStreamingSessions() (bool, error) { diff --git a/pkg/controller/cassandracluster/node_operations_test.go b/pkg/controller/cassandracluster/node_operations_test.go index 41bc9528f..a8abc7a1b 100644 --- a/pkg/controller/cassandracluster/node_operations_test.go +++ b/pkg/controller/cassandracluster/node_operations_test.go @@ -1,17 +1,3 @@ -// Copyright 2019 Orange -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - package cassandracluster import ( @@ -432,28 +418,34 @@ func TestNodeDecommission(t *testing.T) { func TestNodeOperationMode(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "OperationMode", - "type": "read"}, - "value": "NORMAL", - "timestamp": 1528850319, - "status": 200}`)) + registerJolokiaOperationModeResponder(podName{FullName: host}, NORMAL) jolokiaClient, _ := NewJolokiaClient(host, JolokiaPort, nil, v1.LocalObjectReference{}, "ns") operationMode, err := jolokiaClient.NodeOperationMode() if err != nil { t.Errorf("NodeOperationMode failed with : %v", err) } - if operationMode != "NORMAL" { + if operationMode != NORMAL { t.Errorf("NodeOperationMode returned a bad answer: %s", operationMode) } } +func registerJolokiaLeavingNodesResponder(value interface{}) { + jsonValue, _ := json.Marshal(value) + httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), + httpmock.NewStringResponder(200, fmt.Sprintf(`{"request": + {"mbean": "org.apache.cassandra.db:type=StorageService", + "attribute": "LeavingNodes", + "type": "read"}, + "value": %v, + "timestamp": 1528850319, + "status": 200}`, string(jsonValue)))) +} + func TestLeavingNodes(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() + registerJolokiaLeavingNodesResponder([]string{"127.0.0.1"}) httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), httpmock.NewStringResponder(200, `{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", @@ -476,14 +468,7 @@ func TestLeavingNodes(t *testing.T) { func TestHostIDMap(t *testing.T) { httpmock.Activate() defer httpmock.DeactivateAndReset() - httpmock.RegisterResponder("POST", JolokiaURL(host, jolokiaPort), - httpmock.NewStringResponder(200, `{"request": - {"mbean": "org.apache.cassandra.db:type=StorageService", - "attribute": "LeavingNodes", - "type": "read"}, - "value": {"10.244.3.20": "ac0b9f2b-1eb4-40ca-bc6e-68b37575f019"}, - "timestamp": 1528850319, - "status": 200}`)) + registerJolokiaLeavingNodesResponder(map[string]string{"10.244.3.20": "ac0b9f2b-1eb4-40ca-bc6e-68b37575f019"}) jolokiaClient, _ := NewJolokiaClient(host, JolokiaPort, nil, v1.LocalObjectReference{}, "ns") hostIDMap, err := jolokiaClient.hostIDMap() diff --git a/pkg/controller/cassandracluster/pod.go b/pkg/controller/cassandracluster/pod.go index c1d0bf4df..ae5dd3299 100644 --- a/pkg/controller/cassandracluster/pod.go +++ b/pkg/controller/cassandracluster/pod.go @@ -41,8 +41,8 @@ var reEndingNumber = regexp.MustCompile("[0-9]+$") // PodContainersReady returns true if all container in the Pod are ready func PodContainersReady(pod *v1.Pod) bool { if pod.Status.ContainerStatuses != nil && len(pod.Status.ContainerStatuses) > 0 { - for _, c := range pod.Status.ContainerStatuses { - if c.Ready == false { + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.Ready == false { return false } } diff --git a/pkg/controller/cassandracluster/pod_operation.go b/pkg/controller/cassandracluster/pod_operation.go index 2d982260a..08aee9d03 100644 --- a/pkg/controller/cassandracluster/pod_operation.go +++ b/pkg/controller/cassandracluster/pod_operation.go @@ -48,6 +48,15 @@ type op struct { PostAction func(*ReconcileCassandraCluster, *api.CassandraCluster, string, v1.Pod) error } +type operationMode string + +const ( + NORMAL operationMode = "NORMAL" + LEAVING = "LEAVING" + DECOMMISSIONED = "DECOMMISSIONED" + UNKNOWN = "UNKNOWN" +) + var podOperationMap = map[string]op{ api.OperationCleanup: op{(*ReconcileCassandraCluster).runCleanup, (*JolokiaClient).hasCleanupCompactions, nil}, api.OperationRebuild: op{(*ReconcileCassandraCluster).runRebuild, (*JolokiaClient).hasStreamingSessions, nil}, @@ -319,7 +328,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste //LastPod Still Exists if !PodContainersReady(lastPod) && lastPod.DeletionTimestamp != nil { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "lastPod": lastPod.Name}).Infof("We already asked Statefulset to scaleDown, waiting..") + "lastPod": lastPod.Name}).Infof("Statefulset is scaling down, waiting..") return breakResyncLoop, nil } @@ -340,7 +349,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste return breakResyncLoop, err } - if operationMode == "NORMAL" { + if operationMode == NORMAL { t, err := k8s.LabelTime2Time(lastPod.Labels["operation-start"]) if err != nil { logrus.WithFields(logrus.Fields{"operation-start": lastPod.Labels["operation-start"]}).Debugf("Can't parse time") @@ -360,7 +369,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste return breakResyncLoop, nil } - if operationMode == "DECOMMISSIONED" || operationMode == "" { + if operationMode == DECOMMISSIONED || operationMode == UNKNOWN { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "lastPod": lastPod.Name, "operationMode": operationMode}).Infof("Node has left the ring, " + "waiting for statefulset Scaledown") @@ -438,7 +447,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionToDo(cc *api.CassandraCl return breakResyncLoop, err } - if operationMode == "DECOMMISSIONED" || operationMode == "" || operationMode == "LEAVING" { + if operationMode == DECOMMISSIONED || operationMode == UNKNOWN || operationMode == LEAVING { logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pod": lastPod.Name}).Info("Node is leaving or has already been decommissioned") return breakResyncLoop, nil diff --git a/pkg/controller/cassandracluster/reconcile.go b/pkg/controller/cassandracluster/reconcile.go index e4eb746a2..470e9b980 100644 --- a/pkg/controller/cassandracluster/reconcile.go +++ b/pkg/controller/cassandracluster/reconcile.go @@ -534,7 +534,7 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, } if breakLoop { logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "dc-rack": dcRackName}).Debug("We just update Statefulset " + + "dc-rack": dcRackName}).Debug("Statefulset is getting updated " + "we break ReconcileRack") return nil } @@ -688,8 +688,7 @@ func (rcc *ReconcileCassandraCluster) CheckPodsState(cc *api.CassandraCluster, return nil } - logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "err": err}).Debug("Get first available pod") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "err": err}).Debug("Get last available pod") firstPod, err := GetLastOrFirstPodReady(podsList, true) if err != nil { diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index 77d13997a..aabe74857 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -266,6 +266,10 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app } if *rcc.storedStatefulSet.Spec.Replicas-*statefulSet.Spec.Replicas > 1 { + logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, + "dc-rack": dcRackName}).Debugf("Must scale down one node at a time. Update stfs " + + "replicas to %d instead of %d for now", + *rcc.storedStatefulSet.Spec.Replicas-1, *statefulSet.Spec.Replicas) *statefulSet.Spec.Replicas = *rcc.storedStatefulSet.Spec.Replicas - 1 } From e294ea0cd6575bc91ac04117eb0648aec2e67a72 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 12 Jul 2020 02:58:14 -0400 Subject: [PATCH 08/13] decommission - Do not treat Ongoing and Finalizing operations altogether --- .../cassandracluster/cassandra_status.go | 19 ++--- .../cassandracluster/decommission_test.go | 5 +- .../cassandracluster/pod_operation.go | 71 ++++++++++--------- pkg/controller/cassandracluster/reconcile.go | 4 +- .../cassandracluster/statefulset.go | 21 +----- 5 files changed, 56 insertions(+), 64 deletions(-) diff --git a/pkg/controller/cassandracluster/cassandra_status.go b/pkg/controller/cassandracluster/cassandra_status.go index 39bdefc48..e712499e2 100644 --- a/pkg/controller/cassandracluster/cassandra_status.go +++ b/pkg/controller/cassandracluster/cassandra_status.go @@ -16,6 +16,7 @@ package cassandracluster import ( "context" + "fmt" "reflect" "strconv" "time" @@ -161,15 +162,15 @@ func needToWaitDelayBeforeCheck(cc *api.CassandraCluster, dcRackName string, sto lastAction := &status.CassandraRackStatus[dcRackName].CassandraLastAction if lastAction.StartTime != nil { - t := *lastAction.StartTime now := metav1.Now() if t.Add(api.DefaultDelayWait * time.Second).After(now.Time) { logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "rack": dcRackName}).Info("The Operator Waits " + - strconv.Itoa(api.DefaultDelayWait) + - " seconds for the action to start correctly") + "rack": dcRackName}).Info( + fmt.Sprintf("The Operator Waits %s seconds for the action to start correctly", + strconv.Itoa(api.DefaultDelayWait)), + ) return true } } @@ -453,7 +454,7 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas //Do we have reach requested number of replicas ? if isStatefulSetNotReady(storedStatefulSet) { - logrus.WithFields(logrusFields).Infof("Initializing StatefulSet: Replicas Number Not OK") + logrus.WithFields(logrusFields).Infof("Initializing StatefulSet: Replicas count is not okay") return } //If yes, just check that lastPod is running @@ -462,7 +463,7 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas return } if len(podsList.Items) < int(nodesPerRacks) { - logrus.Infof("[%s][%s]: StatefulSet is scaling up", cc.Name, dcRackName) + logrus.WithFields(logrusFields).Infof("StatefulSet is scaling up") } pod := podsList.Items[nodesPerRacks-1] if cassandraPodIsReady(&pod) { @@ -471,17 +472,17 @@ func (rcc *ReconcileCassandraCluster) UpdateCassandraRackStatusPhase(cc *api.Cas now := metav1.Now() lastAction.EndTime = &now lastAction.Status = api.StatusDone - logrus.Infof("[%s][%s]: StatefulSet(%s): Replicas Number OK: ready[%d]", cc.Name, dcRackName, lastAction.Name, storedStatefulSet.Status.ReadyReplicas) + logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is okay") } } //No more in Initializing state if isStatefulSetNotReady(storedStatefulSet) { - logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas number not okay") + logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is not okay") status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhasePending.Name ClusterPhaseMetric.set(api.ClusterPhasePending, cc.Name) } else if status.CassandraRackStatus[dcRackName].Phase != api.ClusterPhaseRunning.Name { - logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas number not okay") + logrus.WithFields(logrusFields).Infof("StatefulSet: Replicas count is not okay") status.CassandraRackStatus[dcRackName].Phase = api.ClusterPhaseRunning.Name ClusterPhaseMetric.set(api.ClusterPhaseRunning, cc.Name) } diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index ee4633207..3ddf45def 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -181,12 +181,15 @@ func TestMultipleDecommissions(t *testing.T) { numberOfReplicas -= 1 assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) + // This reconcile does nothing as the pod has not been deleted yet by the statefulset + reconcileValidation(t, rcc, *req) + deletedPod := podHost(stfsName, 2, rcc) deletePodNotDeletedByFakeClient(rcc, deletedPod) lastPod = podHost(stfsName, 1, rcc) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(deletedPod)) + assert.Equal(3, jolokiaCallsCount(deletedPod)) assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) registerFatalJolokiaResponder(t, deletedPod) diff --git a/pkg/controller/cassandracluster/pod_operation.go b/pkg/controller/cassandracluster/pod_operation.go index 08aee9d03..f8d44fdab 100644 --- a/pkg/controller/cassandracluster/pod_operation.go +++ b/pkg/controller/cassandracluster/pod_operation.go @@ -309,33 +309,35 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste return rcc.ensureDecommissionToDo(cc, dcName, rackName, status) - case api.StatusOngoing, api.StatusFinalizing: - - if podLastOperation.Pods == nil || podLastOperation.Pods[0] == "" { - return breakResyncLoop, fmt.Errorf("For Status Ongoing we should have a PodLastOperation Pods item") - } - + case api.StatusFinalizing: lastPod, err := rcc.GetPod(cc.Namespace, podLastOperation.Pods[0]) if err != nil { - //If Node is already Gone, We Delete PVC if apierrors.IsNotFound(err) { - return rcc.ensureDecommissionFinalizing(cc, dcName, rackName, status, lastPod) + return rcc.deletePodPVC(cc, dcName, rackName, status, lastPod) } - return breakResyncLoop, fmt.Errorf("failed to get last cassandra's pods '%s': %v", - podLastOperation.Pods[0], err) + return breakResyncLoop, fmt.Errorf( + "Failed to get last cassandra's pods '%s': %v", podLastOperation.Pods[0], err, + ) } - //LastPod Still Exists - if !PodContainersReady(lastPod) && lastPod.DeletionTimestamp != nil { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "lastPod": lastPod.Name}).Infof("Statefulset is scaling down, waiting..") - return breakResyncLoop, nil + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, + "lastPod": lastPod.Name}).Infof("Statefulset is scaling down, waiting..") + return breakResyncLoop, nil + + case api.StatusOngoing: + if podLastOperation.Pods == nil || podLastOperation.Pods[0] == "" { + return breakResyncLoop, fmt.Errorf("Status is Ongoing, we should have a PodLastOperation Pods item") + } + + lastPod, err := rcc.GetPod(cc.Namespace, podLastOperation.Pods[0]) + if err != nil { + return breakResyncLoop, fmt.Errorf( + "Failed to get last pod '%s': %v", podLastOperation.Pods[0], err) } hostName := fmt.Sprintf("%s.%s", lastPod.Spec.Hostname, lastPod.Spec.Subdomain) - jolokiaClient, err := NewJolokiaClient(hostName, JolokiaPort, rcc, - cc.Spec.ImageJolokiaSecret, cc.Namespace) + jolokiaClient, err := NewJolokiaClient(hostName, JolokiaPort, rcc, cc.Spec.ImageJolokiaSecret, cc.Namespace) if err != nil { return breakResyncLoop, err @@ -352,27 +354,31 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste if operationMode == NORMAL { t, err := k8s.LabelTime2Time(lastPod.Labels["operation-start"]) if err != nil { - logrus.WithFields(logrus.Fields{"operation-start": lastPod.Labels["operation-start"]}).Debugf("Can't parse time") + logrus.WithFields(logrus.Fields{ + "operation-start": lastPod.Labels["operation-start"], + }).Debugf("Can't parse time") } now, _ := k8s.LabelTime2Time(k8s.LabelTime()) if t.Add(api.DefaultDelayWaitForDecommission * time.Second).After(now) { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "pod": lastPod.Name, "operationMode": operationMode, - "DefaultDelayWaitForDecommission": api.DefaultDelayWaitForDecommission}).Info("Decommission was applied less " + - "than DefaultDelayWaitForDecommission seconds, waiting") + logrus.WithFields(logrus.Fields{ + "cluster": cc.Name, "rack": dcRackName, "pod": lastPod.Name, + "operationMode": operationMode, + "DefaultDelayWaitForDecommission": api.DefaultDelayWaitForDecommission, + }).Info("Decommission was applied less than DefaultDelayWaitForDecommission seconds, waiting") } else { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, "pod": lastPod.Name, - "operationMode": operationMode}).Info("Seems that decommission has not correctly been applied, trying again..") + logrus.WithFields(logrus.Fields{ + "cluster": cc.Name, "rack": dcRackName, "pod": lastPod.Name, "operationMode": operationMode, + }).Info("Seems that decommission has not correctly been applied, trying again..") status.CassandraRackStatus[dcRackName].PodLastOperation.Status = api.StatusToDo } return breakResyncLoop, nil } if operationMode == DECOMMISSIONED || operationMode == UNKNOWN { - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, - "lastPod": lastPod.Name, "operationMode": operationMode}).Infof("Node has left the ring, " + - "waiting for statefulset Scaledown") + logrus.WithFields(logrus.Fields{ + "cluster": cc.Name, "rack": dcRackName, "lastPod": lastPod.Name, "operationMode": operationMode, + }).Infof("Node has left the ring, waiting for statefulset Scaledown") podLastOperation.Status = api.StatusFinalizing return continueResyncLoop, nil } @@ -381,7 +387,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste "operationMode": operationMode}).Info("Cassandra Node is decommissioning, we need to wait") return breakResyncLoop, nil - //In case of PodLastOperation Done we set LastAction to Continue to see if we need to decommission more + //Set LastAction to Continue in case more decommissions are needed case api.StatusDone: if podLastOperation.PodsOK == nil || podLastOperation.PodsOK[0] == "" { return breakResyncLoop, fmt.Errorf("For Status Done we should have a PodLastOperation.PodsOK item") @@ -483,9 +489,9 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionToDo(cc *api.CassandraCl return breakResyncLoop, nil } -//ensureDecommissionFinalizing +//deletePodPVC // State To-DO -> Ongoing -func (rcc *ReconcileCassandraCluster) ensureDecommissionFinalizing(cc *api.CassandraCluster, dcName, rackName string, +func (rcc *ReconcileCassandraCluster) deletePodPVC(cc *api.CassandraCluster, dcName, rackName string, status *api.CassandraClusterStatus, lastPod *v1.Pod) (bool, error) { dcRackName := cc.GetDCRackName(dcName, rackName) podLastOperation := &status.CassandraRackStatus[dcRackName].PodLastOperation @@ -508,10 +514,10 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionFinalizing(cc *api.Cassa dcRackStatus := status.CassandraRackStatus[dcRackName] if rcc.weAreScalingDown(dcRackStatus) { - // We have more decommissions to do + // More decommissions to do podLastOperation.Status = api.StatusContinue } else { - // We are done with decommissioning + // No more decommissions podLastOperation.Status = api.StatusDone } @@ -519,7 +525,6 @@ func (rcc *ReconcileCassandraCluster) ensureDecommissionFinalizing(cc *api.Cassa now := metav1.Now() podLastOperation.EndTime = &now podLastOperation.Pods = []string{} - //Important, We must break loop if multipleScaleDown has been asked return breakResyncLoop, nil } diff --git a/pkg/controller/cassandracluster/reconcile.go b/pkg/controller/cassandracluster/reconcile.go index 470e9b980..94bc85178 100644 --- a/pkg/controller/cassandracluster/reconcile.go +++ b/pkg/controller/cassandracluster/reconcile.go @@ -457,8 +457,6 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, "initialize it in status", dcName, rackName) ClusterPhaseMetric.set(api.ClusterPhaseInitial, cc.Name) cc.InitCassandraRackinStatus(status, dcName, rackName) - //Return will stop operator reconcile loop until next one - //used here to write CassandraClusterStatus properly return nil } dcRackStatus := status.CassandraRackStatus[dcRackName] @@ -478,7 +476,7 @@ func (rcc *ReconcileCassandraCluster) ReconcileRack(cc *api.CassandraCluster, //Update CassandraClusterPhase rcc.UpdateCassandraRackStatusPhase(cc, dcName, rackName, storedStatefulSet, status) - //Find if there is an Action to execute or to end + //Find if there is an Action to execute/end rcc.getNextCassandraClusterStatus(cc, dc, rack, dcName, rackName, storedStatefulSet, status) //If not Initializing cluster execute pod operations queued diff --git a/pkg/controller/cassandracluster/statefulset.go b/pkg/controller/cassandracluster/statefulset.go index aabe74857..a9e9d1681 100644 --- a/pkg/controller/cassandracluster/statefulset.go +++ b/pkg/controller/cassandracluster/statefulset.go @@ -203,20 +203,14 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app // if there is existing disruptions on Pods // Or if we are not scaling Down the current statefulset if rcc.thereIsPodDisruption() { - // It's not seen as a disruption in that case - // if rcc.weAreScalingDown(dcRackStatus) && rcc.hasOneDisruptedPod() { - // logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - // "dc-rack": dcRackName}).Info("Cluster has 1 Pod Disrupted" + - // "but that may be normal as we are decommissioning") - // } else if rcc.cc.Spec.UnlockNextOperation { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Warn("Cluster has 1 disrupted pod" + + "dc-rack": dcRackName}).Warn("Cluster has a disruption " + "but we have unlock the next operation") } else { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Info("Cluster has disruption on Pods, " + - "we wait before applying any change to statefulset") + "dc-rack": dcRackName}).Info("Cluster has a disruption, " + + "waiting before applying any changes to statefulset") return api.ContinueResyncLoop, nil } } @@ -256,15 +250,6 @@ func (rcc *ReconcileCassandraCluster) CreateOrUpdateStatefulSet(statefulSet *app //we have some call which will block the call of this method as long as the decommission is running, so here //we just need to change the scaledown value if more than 1 at a time. - // If we're scaling down and Ready Replica is not yet the value we expect we break the loop - if rcc.weAreScalingDown(dcRackStatus) && - rcc.storedStatefulSet.Status.ReadyReplicas != rcc.storedStatefulSet.Status.Replicas { - logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, - "dc-rack": dcRackName}).Infof("CYRIL 500 - STFS not ready %d ready replicas for %d replicas asked", - rcc.storedStatefulSet.Status.ReadyReplicas, rcc.storedStatefulSet.Status.Replicas) - return api.BreakResyncLoop, nil - } - if *rcc.storedStatefulSet.Spec.Replicas-*statefulSet.Spec.Replicas > 1 { logrus.WithFields(logrus.Fields{"cluster": rcc.cc.Name, "dc-rack": dcRackName}).Debugf("Must scale down one node at a time. Update stfs " + From 913d4996ec983cca17c0085fac494433f261613a Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 12 Jul 2020 03:36:05 -0400 Subject: [PATCH 09/13] decommission - Fix consistancy of CheckPodsState --- .../cassandracluster/decommission_test.go | 29 ++++++++----------- pkg/controller/cassandracluster/reconcile.go | 4 +-- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index 3ddf45def..0b9496fd9 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -103,26 +103,24 @@ func TestOneDecommission(t *testing.T) { lastPod := podHost(stfsName, 2, rcc) - for id := 0; id < 2; id++ { - registerFatalJolokiaResponder(t, podHost(stfsName, int8(id), rcc)) - } + registerFatalJolokiaResponder(t, podHost(stfsName, int8(1), rcc)) registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) assertStatefulsetReplicas(t, rcc, 3, cassandraCluster.Namespace, stfsName) registerJolokiaOperationModeResponder(lastPod, LEAVING) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) assertStatefulsetReplicas(t, rcc, 3, cassandraCluster.Namespace, stfsName) registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) assertStatefulsetReplicas(t, rcc, 2, cassandraCluster.Namespace, stfsName) deletedPod := podHost(stfsName, 2, rcc) - assert.Equal(2, jolokiaCallsCount(deletedPod)) + assert.Equal(1, jolokiaCallsCount(deletedPod)) lastPod = podHost(stfsName, 1, rcc) deletePodNotDeletedByFakeClient(rcc, deletedPod) @@ -130,7 +128,7 @@ func TestOneDecommission(t *testing.T) { registerFatalJolokiaResponder(t, deletedPod) registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(1, jolokiaCallsCount(lastPod)) + assert.Equal(0, jolokiaCallsCount(lastPod)) } func assertStatefulsetReplicas(t *testing.T, rcc *ReconcileCassandraCluster, expected int, namespace, stfsName string){ @@ -158,26 +156,24 @@ func TestMultipleDecommissions(t *testing.T) { cassandraCluster.Spec.NodesPerRacks = 1 rcc.client.Update(context.TODO(), cassandraCluster) - for id := 0; id <= 1; id++ { - registerFatalJolokiaResponder(t, podHost(stfsName, int8(0), rcc)) - } + registerFatalJolokiaResponder(t, podHost(stfsName, int8(1), rcc)) lastPod := podHost(stfsName, 2, rcc) registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) numberOfReplicas := 3 assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) registerJolokiaOperationModeResponder(lastPod, LEAVING) reconcileValidation(t, rcc, *req) assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) numberOfReplicas -= 1 assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) @@ -195,12 +191,12 @@ func TestMultipleDecommissions(t *testing.T) { registerFatalJolokiaResponder(t, deletedPod) registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) registerJolokiaOperationModeResponder(lastPod, LEAVING) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) @@ -216,6 +212,5 @@ func TestMultipleDecommissions(t *testing.T) { registerFatalJolokiaResponder(t, deletedPod) registerJolokiaOperationModeResponder(lastPod, NORMAL) reconcileValidation(t, rcc, *req) - assert.Equal(1, jolokiaCallsCount(lastPod)) } diff --git a/pkg/controller/cassandracluster/reconcile.go b/pkg/controller/cassandracluster/reconcile.go index 94bc85178..77688e9ea 100644 --- a/pkg/controller/cassandracluster/reconcile.go +++ b/pkg/controller/cassandracluster/reconcile.go @@ -686,9 +686,9 @@ func (rcc *ReconcileCassandraCluster) CheckPodsState(cc *api.CassandraCluster, return nil } - logrus.WithFields(logrus.Fields{"cluster": cc.Name, "err": err}).Debug("Get last available pod") + logrus.WithFields(logrus.Fields{"cluster": cc.Name, "err": err}).Debug("Get first available pod") - firstPod, err := GetLastOrFirstPodReady(podsList, true) + firstPod, err := GetLastOrFirstPodReady(podsList, false) if err != nil { return err } From 5c5ad18dd5e426e7cb517a4905ed7457ca9fa0d7 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 12 Jul 2020 03:43:21 -0400 Subject: [PATCH 10/13] decommission - Fix also TestMultipleDecommissions --- pkg/controller/cassandracluster/decommission_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index 0b9496fd9..7866d9291 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -185,7 +185,7 @@ func TestMultipleDecommissions(t *testing.T) { lastPod = podHost(stfsName, 1, rcc) reconcileValidation(t, rcc, *req) - assert.Equal(3, jolokiaCallsCount(deletedPod)) + assert.Equal(1, jolokiaCallsCount(deletedPod)) assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) registerFatalJolokiaResponder(t, deletedPod) @@ -201,7 +201,7 @@ func TestMultipleDecommissions(t *testing.T) { registerJolokiaOperationModeResponder(lastPod, DECOMMISSIONED) reconcileValidation(t, rcc, *req) - assert.Equal(2, jolokiaCallsCount(lastPod)) + assert.Equal(1, jolokiaCallsCount(lastPod)) numberOfReplicas -= 1 assertStatefulsetReplicas(t, rcc, numberOfReplicas, cassandraCluster.Namespace, stfsName) From 3a8fc98ec188cfb7cd7ba697236f2de97595d99d Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 12 Jul 2020 11:01:49 -0400 Subject: [PATCH 11/13] decommission - More refactoring --- .../cassandracluster/pod_operation.go | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pkg/controller/cassandracluster/pod_operation.go b/pkg/controller/cassandracluster/pod_operation.go index f8d44fdab..57001f026 100644 --- a/pkg/controller/cassandracluster/pod_operation.go +++ b/pkg/controller/cassandracluster/pod_operation.go @@ -315,9 +315,7 @@ func (rcc *ReconcileCassandraCluster) ensureDecommission(cc *api.CassandraCluste if apierrors.IsNotFound(err) { return rcc.deletePodPVC(cc, dcName, rackName, status, lastPod) } - return breakResyncLoop, fmt.Errorf( - "Failed to get last cassandra's pods '%s': %v", podLastOperation.Pods[0], err, - ) + return breakResyncLoop, fmt.Errorf("Failed to get pod %s: %v", podLastOperation.Pods[0], err) } logrus.WithFields(logrus.Fields{"cluster": cc.Name, "rack": dcRackName, @@ -512,14 +510,7 @@ func (rcc *ReconcileCassandraCluster) deletePodPVC(cc *api.CassandraCluster, dcN return breakResyncLoop, nil } - dcRackStatus := status.CassandraRackStatus[dcRackName] - if rcc.weAreScalingDown(dcRackStatus) { - // More decommissions to do - podLastOperation.Status = api.StatusContinue - } else { - // No more decommissions - podLastOperation.Status = api.StatusDone - } + SetStatusForMoreDecommissions(podLastOperation, rcc.weAreScalingDown(status.CassandraRackStatus[dcRackName])) podLastOperation.PodsOK = []string{lastPod.Name} now := metav1.Now() @@ -528,6 +519,14 @@ func (rcc *ReconcileCassandraCluster) deletePodPVC(cc *api.CassandraCluster, dcN return breakResyncLoop, nil } +func SetStatusForMoreDecommissions(podLastOperation *api.PodLastOperation, moreDecommisions bool) { + if moreDecommisions { + podLastOperation.Status = api.StatusContinue + } else { + podLastOperation.Status = api.StatusDone + } +} + func (rcc *ReconcileCassandraCluster) podsSlice(cc *api.CassandraCluster, status *api.CassandraClusterStatus, podLastOperation api.PodLastOperation, dcRackName, operationName, operatorName string) ([]v1.Pod, bool) { checkOnly := false From 74a273c3e7c966610869648cc4164fd2c7c5154d Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 19 Jul 2020 09:59:28 -0400 Subject: [PATCH 12/13] decommission - Apply review comments --- pkg/controller/cassandracluster/cassandra_status.go | 3 --- pkg/controller/cassandracluster/decommission_test.go | 1 - pkg/controller/cassandracluster/pod.go | 2 +- pkg/k8s/util.go | 4 ---- 4 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pkg/controller/cassandracluster/cassandra_status.go b/pkg/controller/cassandracluster/cassandra_status.go index e712499e2..430f75d16 100644 --- a/pkg/controller/cassandracluster/cassandra_status.go +++ b/pkg/controller/cassandracluster/cassandra_status.go @@ -145,9 +145,6 @@ func (rcc *ReconcileCassandraCluster) getNextCassandraClusterStatus(cc *api.Cass now := metav1.Now() lastAction.StartTime = &now lastAction.Status = api.StatusOngoing - logrus.WithFields(logrus.Fields{"cluster": cc.Name, - "dc-rack": dcRackName, "hasDisruption": rcc.thereIsPodDisruption(), - "lastAction.Status": lastAction.Status}).Debug("CYRIL 4") } return nil diff --git a/pkg/controller/cassandracluster/decommission_test.go b/pkg/controller/cassandracluster/decommission_test.go index 7866d9291..3acecd346 100644 --- a/pkg/controller/cassandracluster/decommission_test.go +++ b/pkg/controller/cassandracluster/decommission_test.go @@ -42,7 +42,6 @@ func createCassandraClusterWithNoDisruption(t *testing.T, cassandraClusterFileNa } func registerJolokiaOperationModeResponder(host podName, op operationMode) { - fmt.Println(string(op)) httpmock.RegisterResponder("POST", JolokiaURL(host.FullName, jolokiaPort), httpmock.NewStringResponder(200, fmt.Sprintf(`{"request": {"mbean": "org.apache.cassandra.db:type=StorageService", diff --git a/pkg/controller/cassandracluster/pod.go b/pkg/controller/cassandracluster/pod.go index ae5dd3299..17575b30a 100644 --- a/pkg/controller/cassandracluster/pod.go +++ b/pkg/controller/cassandracluster/pod.go @@ -42,7 +42,7 @@ var reEndingNumber = regexp.MustCompile("[0-9]+$") func PodContainersReady(pod *v1.Pod) bool { if pod.Status.ContainerStatuses != nil && len(pod.Status.ContainerStatuses) > 0 { for _, containerStatus := range pod.Status.ContainerStatuses { - if containerStatus.Ready == false { + if !containerStatus.Ready { return false } } diff --git a/pkg/k8s/util.go b/pkg/k8s/util.go index 8af6d222d..f3d2d0384 100644 --- a/pkg/k8s/util.go +++ b/pkg/k8s/util.go @@ -61,10 +61,6 @@ func LabelsForCassandra(cc *api.CassandraCluster) map[string]string { return MergeLabels(cc.GetLabels(), m) } -//RemoveString remove a string from a slice -//s := []string{"one", "two", "three"} -//s = RemoveString(s, "two") -//fmt.Println(s) // Prints [one three] func RemoveString(s []string, r string) []string { for i, v := range s { if v == r { From c2f9676a0fdb6284b933d7a7b9e1d93685ad4228 Mon Sep 17 00:00:00 2001 From: Cyril Scetbon Date: Sun, 19 Jul 2020 22:46:03 -0400 Subject: [PATCH 13/13] decommission - Update CircleCI config as k3d v3 is out --- .circleci/config.yml | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 776d3ed34..90c1778eb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -40,10 +40,9 @@ orbs: - run: name: Set up K3d command: | - wget -q -O - https://raw.githubusercontent.com/rancher/k3d/master/install.sh | TAG=v3.0.0-rc.1 bash - k3d create cluster --image ${K3S_IMAGE} --wait 0 + wget -q -O - https://raw.githubusercontent.com/rancher/k3d/main/install.sh | TAG=v3.0.0 bash + k3d cluster create --image ${K3S_IMAGE} --wait 0 mkdir -p ${HOME}/.kube - k3d get kubeconfig 0 - checkout: path: /home/circleci/casskop - run: @@ -61,7 +60,7 @@ orbs: docker pull $(cat casskop-build-image-tar) docker save $(cat casskop-build-image-tar) > casskop-build-image.tar fi - k3d load image casskop-build-image.tar + k3d image import casskop-build-image.tar - save_cache: name: Save Casskop build image key: '{{ checksum "casskop-build-image-tar" }}' @@ -74,14 +73,8 @@ orbs: sudo chmod o+w /usr/local/bin/ wget -P /usr/local/bin/ https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl chmod +x /usr/local/bin/kubectl - KUBECONFIG="$(k3d get-kubeconfig)" kubectl get nodes - attach_workspace: # Attach artifact from workdir at: /home/circleci - - run: # Check we correctly access to K8s information with kubectl cli - name: Test k8s - command: | - kubectl get nodes - kubectl get pods # Acceptance test - run: # Run acceptance test through 'docker-e2e-test-fix-arg' makefile step name: Operator acceptance test