Skip to content

Commit

Permalink
Merge pull request #8595 from ykakarap/pr-double-rollout_ms-preflight…
Browse files Browse the repository at this point in the history
…-check

✨   MS preflight checks to improve cluster stability
  • Loading branch information
k8s-ci-robot authored May 31, 2023
2 parents 89429b8 + 5b6a0f7 commit e2bab2f
Show file tree
Hide file tree
Showing 20 changed files with 1,061 additions and 74 deletions.
44 changes: 44 additions & 0 deletions api/v1beta1/common_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,18 @@ const (
// MachineSkipRemediationAnnotation is the annotation used to mark the machines that should not be considered for remediation by MachineHealthCheck reconciler.
MachineSkipRemediationAnnotation = "cluster.x-k8s.io/skip-remediation"

// MachineSetSkipPreflightChecksAnnotation is the annotation used to provide a comma-separated list of
// preflight checks that should be skipped during the MachineSet reconciliation.
// Supported items are:
// - KubeadmVersion (skips the kubeadm version skew preflight check)
// - KubernetesVersion (skips the kubernetes version skew preflight check)
// - ControlPlaneStable (skips checking that the control plane is neither provisioning nor upgrading)
// - All (skips all preflight checks)
// Example: "machineset.cluster.x-k8s.io/skip-preflight-checks": "ControlPlaneStable,KubernetesVersion".
// Note: The annotation can also be set on a MachineDeployment as MachineDeployment annotations are synced to
// the MachineSet.
MachineSetSkipPreflightChecksAnnotation = "machineset.cluster.x-k8s.io/skip-preflight-checks"

// ClusterSecretType defines the type of secret created by core components.
// Note: This is used by core CAPI, CAPBK, and KCP to determine whether a secret is created by the controllers
// themselves or supplied by the user (e.g. bring your own certificates).
Expand Down Expand Up @@ -173,6 +185,38 @@ const (
VariableDefinitionFromInline = "inline"
)

// MachineSetPreflightCheck defines a valid MachineSet preflight check.
type MachineSetPreflightCheck string

const (
// MachineSetPreflightCheckAll can be used to represent all the MachineSet preflight checks.
MachineSetPreflightCheckAll MachineSetPreflightCheck = "All"

// MachineSetPreflightCheckKubeadmVersionSkew is the name of the preflight check
// that verifies if the machine being created or remediated for the MachineSet conforms to the kubeadm version
// skew policy that requires the machine to be at the same version as the control plane.
// Note: This is a stopgap while the root cause of the problem is fixed in kubeadm; this check will become
// a no-op when this check will be available in kubeadm, and then eventually be dropped when all the
// supported Kuberenetes/kubeadm versions have implemented the fix.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
// the ControlPlane has a version, the MachineSet has a version and the MachineSet uses the Kubeadm bootstrap
// provider.
MachineSetPreflightCheckKubeadmVersionSkew MachineSetPreflightCheck = "KubeadmVersionSkew"

// MachineSetPreflightCheckKubernetesVersionSkew is the name of the preflight check that verifies
// if the machines being created or remediated for the MachineSet conform to the Kubernetes version skew policy
// that requires the machines to be at a version that is not more than 2 minor lower than the ControlPlane version.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster),
// the ControlPlane has a version and the MachineSet has a version.
MachineSetPreflightCheckKubernetesVersionSkew MachineSetPreflightCheck = "KubernetesVersionSkew"

// MachineSetPreflightCheckControlPlaneIsStable is the name of the preflight check
// that verifies if the control plane is not provisioning and not upgrading.
// The preflight check is only run if a ControlPlane is used (controlPlaneRef must exist in the Cluster)
// and the ControlPlane has a version.
MachineSetPreflightCheckControlPlaneIsStable MachineSetPreflightCheck = "ControlPlaneIsStable"
)

// NodeUninitializedTaint can be added to Nodes at creation by the bootstrap provider, e.g. the
// KubeadmBootstrap provider will add the taint.
// This taint is used to prevent workloads to be scheduled on Nodes before the node is initialized by Cluster API.
Expand Down
9 changes: 9 additions & 0 deletions api/v1beta1/machinedeployment_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"sigs.k8s.io/cluster-api/feature"
"sigs.k8s.io/cluster-api/util/version"
)

Expand Down Expand Up @@ -214,6 +215,14 @@ func (m *MachineDeployment) validate(old *MachineDeployment) error {
)
}

// MachineSet preflight checks that should be skipped could also be set as annotation on the MachineDeployment
// since MachineDeployment annotations are synced to the MachineSet.
if feature.Gates.Enabled(feature.MachineSetPreflightChecks) {
if err := validateSkippedMachineSetPreflightChecks(m); err != nil {
allErrs = append(allErrs, err)
}
}

if old != nil && old.Spec.ClusterName != m.Spec.ClusterName {
allErrs = append(
allErrs,
Expand Down
43 changes: 43 additions & 0 deletions api/v1beta1/machineset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/validation/field"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"

"sigs.k8s.io/cluster-api/feature"
capilabels "sigs.k8s.io/cluster-api/internal/labels"
"sigs.k8s.io/cluster-api/util/version"
)
Expand Down Expand Up @@ -120,6 +123,12 @@ func (m *MachineSet) validate(old *MachineSet) error {
)
}

if feature.Gates.Enabled(feature.MachineSetPreflightChecks) {
if err := validateSkippedMachineSetPreflightChecks(m); err != nil {
allErrs = append(allErrs, err)
}
}

if old != nil && old.Spec.ClusterName != m.Spec.ClusterName {
allErrs = append(
allErrs,
Expand Down Expand Up @@ -149,3 +158,37 @@ func (m *MachineSet) validate(old *MachineSet) error {

return apierrors.NewInvalid(GroupVersion.WithKind("MachineSet").GroupKind(), m.Name, allErrs)
}

func validateSkippedMachineSetPreflightChecks(o client.Object) *field.Error {
if o == nil {
return nil
}
skip := o.GetAnnotations()[MachineSetSkipPreflightChecksAnnotation]
if skip == "" {
return nil
}

supported := sets.New[MachineSetPreflightCheck](
MachineSetPreflightCheckAll,
MachineSetPreflightCheckKubeadmVersionSkew,
MachineSetPreflightCheckKubernetesVersionSkew,
MachineSetPreflightCheckControlPlaneIsStable,
)

skippedList := strings.Split(skip, ",")
invalid := []MachineSetPreflightCheck{}
for i := range skippedList {
skipped := MachineSetPreflightCheck(strings.TrimSpace(skippedList[i]))
if !supported.Has(skipped) {
invalid = append(invalid, skipped)
}
}
if len(invalid) > 0 {
return field.Invalid(
field.NewPath("metadata", "annotations", MachineSetSkipPreflightChecksAnnotation),
invalid,
fmt.Sprintf("skipped preflight check(s) must be among: %v", sets.List(supported)),
)
}
return nil
}
70 changes: 70 additions & 0 deletions api/v1beta1/machineset_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,3 +228,73 @@ func TestMachineSetVersionValidation(t *testing.T) {
})
}
}

func TestValidateSkippedMachineSetPreflightChecks(t *testing.T) {
tests := []struct {
name string
ms *MachineSet
expectErr bool
}{
{
name: "should pass if the machine set skip preflight checks annotation is not set",
ms: &MachineSet{},
expectErr: false,
},
{
name: "should pass if not preflight checks are skipped",
ms: &MachineSet{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
MachineSetSkipPreflightChecksAnnotation: "",
},
},
},
expectErr: false,
},
{
name: "should pass if only valid preflight checks are skipped (single)",
ms: &MachineSet{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
MachineSetSkipPreflightChecksAnnotation: string(MachineSetPreflightCheckKubeadmVersionSkew),
},
},
},
expectErr: false,
},
{
name: "should pass if only valid preflight checks are skipped (multiple)",
ms: &MachineSet{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
MachineSetSkipPreflightChecksAnnotation: string(MachineSetPreflightCheckKubeadmVersionSkew) + "," + string(MachineSetPreflightCheckControlPlaneIsStable),
},
},
},
expectErr: false,
},
{
name: "should fail if invalid preflight checks are skipped",
ms: &MachineSet{
ObjectMeta: metav1.ObjectMeta{
Annotations: map[string]string{
MachineSetSkipPreflightChecksAnnotation: string(MachineSetPreflightCheckKubeadmVersionSkew) + ",invalid-preflight-check-name",
},
},
},
expectErr: true,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)
err := validateSkippedMachineSetPreflightChecks(tt.ms)
if tt.expectErr {
g.Expect(err).NotTo(BeNil())
} else {
g.Expect(err).To(BeNil())
}
})
}
}
2 changes: 1 addition & 1 deletion config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ spec:
args:
- "--leader-elect"
- "--metrics-bind-addr=localhost:8080"
- "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},ClusterResourceSet=${EXP_CLUSTER_RESOURCE_SET:=false},ClusterTopology=${CLUSTER_TOPOLOGY:=false},RuntimeSDK=${EXP_RUNTIME_SDK:=false}"
- "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false},ClusterResourceSet=${EXP_CLUSTER_RESOURCE_SET:=false},ClusterTopology=${CLUSTER_TOPOLOGY:=false},RuntimeSDK=${EXP_RUNTIME_SDK:=false},MachineSetPreflightChecks=${EXP_MACHINE_SET_PREFLIGHT_CHECKS:=false}"
image: controller:latest
name: manager
env:
Expand Down
1 change: 1 addition & 0 deletions docs/book/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
- [Healthchecking](./tasks/automated-machine-management/healthchecking.md)
- [Experimental Features](./tasks/experimental-features/experimental-features.md)
- [MachinePools](./tasks/experimental-features/machine-pools.md)
- [MachineSetPreflightChecks](./tasks/experimental-features/machineset-preflight-checks.md)
- [ClusterResourceSet](./tasks/experimental-features/cluster-resource-set.md)
- [ClusterClass](./tasks/experimental-features/cluster-class/index.md)
- [Writing a ClusterClass](./tasks/experimental-features/cluster-class/write-clusterclass.md)
Expand Down
1 change: 1 addition & 0 deletions docs/book/src/developer/testing.md
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ kustomize_substitutions:
EXP_CLUSTER_RESOURCE_SET: "true"
EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION: "true"
EXP_RUNTIME_SDK: "true"
EXP_MACHINE_SET_PREFLIGHT_CHECKS: "true"
```
</aside>
Expand Down
1 change: 1 addition & 0 deletions docs/book/src/developer/tilt.md
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ kustomize_substitutions:
EXP_CLUSTER_RESOURCE_SET: "true"
EXP_KUBEADM_BOOTSTRAP_FORMAT_IGNITION: "true"
EXP_RUNTIME_SDK: "true"
EXP_MACHINE_SET_PREFLIGHT_CHECKS: "true"
```

{{#tabs name:"tab-tilt-kustomize-substitution" tabs:"AWS,Azure,DigitalOcean,GCP,vSphere"}}
Expand Down
Loading

0 comments on commit e2bab2f

Please sign in to comment.