Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[release-1.9] 🌱 Improve KCP scale up when using failure domains #11604

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 17 additions & 10 deletions controlplane/kubeadm/internal/control_plane.go
Original file line number Diff line number Diff line change
@@ -153,9 +153,10 @@ func (c *ControlPlane) FailureDomains() clusterv1.FailureDomains {
}

// MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it.
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) (*clusterv1.Machine, error) {
fd := c.FailureDomainWithMostMachines(ctx, machines)
machinesInFailureDomain := machines.Filter(collections.InFailureDomains(fd))
// Note: if there are eligibleMachines machines in failure domain that do not exists anymore, getting rid of those machines take precedence.
func (c *ControlPlane) MachineInFailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) (*clusterv1.Machine, error) {
fd := c.FailureDomainWithMostMachines(ctx, eligibleMachines)
machinesInFailureDomain := eligibleMachines.Filter(collections.InFailureDomains(fd))
machineToMark := machinesInFailureDomain.Oldest()
if machineToMark == nil {
return nil, errors.New("failed to pick control plane Machine to mark for deletion")
@@ -171,11 +172,11 @@ func (c *ControlPlane) MachineWithDeleteAnnotation(machines collections.Machines
return annotatedMachines
}

// FailureDomainWithMostMachines returns a fd which exists both in machines and control-plane machines and has the most
// control-plane machines on it.
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machines collections.Machines) *string {
// FailureDomainWithMostMachines returns the fd with most machines in it and at least one eligible machine in it.
// Note: if there are eligibleMachines machines in failure domain that do not exist anymore, cleaning up those failure domains takes precedence.
func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, eligibleMachines collections.Machines) *string {
// See if there are any Machines that are not in currently defined failure domains first.
notInFailureDomains := machines.Filter(
notInFailureDomains := eligibleMachines.Filter(
collections.Not(collections.InFailureDomains(c.FailureDomains().FilterControlPlane().GetIDs()...)),
)
if len(notInFailureDomains) > 0 {
@@ -184,15 +185,21 @@ func (c *ControlPlane) FailureDomainWithMostMachines(ctx context.Context, machin
// in the cluster status.
return notInFailureDomains.Oldest().Spec.FailureDomain
}
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, machines)

// Pick the failure domain with most machines in it and at least one eligible machine in it.
return failuredomains.PickMost(ctx, c.Cluster.Status.FailureDomains.FilterControlPlane(), c.Machines, eligibleMachines)
}

// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines.
// NextFailureDomainForScaleUp returns the failure domain with the fewest number of up-to-date, not deleted machines
// (the ultimate goal is to achieve ideal spreading of machines at stable state/when only up-to-date machines will exist).
//
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
func (c *ControlPlane) NextFailureDomainForScaleUp(ctx context.Context) (*string, error) {
if len(c.Cluster.Status.FailureDomains.FilterControlPlane()) == 0 {
return nil, nil
}
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
return failuredomains.PickFewest(ctx, c.FailureDomains().FilterControlPlane(), c.Machines, c.UpToDateMachines().Filter(collections.Not(collections.HasDeletionTimestamp))), nil
}

// InitialControlPlaneConfig returns a new KubeadmConfigSpec that is to be used for an initializing control plane.
28 changes: 21 additions & 7 deletions controlplane/kubeadm/internal/controllers/scale.go
Original file line number Diff line number Diff line change
@@ -248,17 +248,31 @@ func preflightCheckCondition(kind string, obj conditions.Getter, condition clust
return nil
}

// selectMachineForScaleDown select a machine candidate for scaling down. The selection is a two phase process:
//
// In the first phase it selects a subset of machines eligible for deletion:
// - if there are outdated machines with the delete machine annotation, use them as eligible subset (priority to user requests, part 1)
// - if there are machines (also not outdated) with the delete machine annotation, use them (priority to user requests, part 2)
// - if there are outdated machines with unhealthy control plane components, use them (priority to restore control plane health)
// - if there are outdated machines consider all the outdated machines as eligible subset (rollout)
// - otherwise consider all the machines
//
// Once the subset of machines eligible for deletion is identified, one machine is picked out of this subset by
// selecting the machine in the failure domain with most machines (including both eligible and not eligible machines).
func selectMachineForScaleDown(ctx context.Context, controlPlane *internal.ControlPlane, outdatedMachines collections.Machines) (*clusterv1.Machine, error) {
machines := controlPlane.Machines
// Select the subset of machines eligible for scale down.
eligibleMachines := controlPlane.Machines
switch {
case controlPlane.MachineWithDeleteAnnotation(outdatedMachines).Len() > 0:
machines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
case controlPlane.MachineWithDeleteAnnotation(machines).Len() > 0:
machines = controlPlane.MachineWithDeleteAnnotation(machines)
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(outdatedMachines)
case controlPlane.MachineWithDeleteAnnotation(eligibleMachines).Len() > 0:
eligibleMachines = controlPlane.MachineWithDeleteAnnotation(eligibleMachines)
case controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines).Len() > 0:
machines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
eligibleMachines = controlPlane.UnhealthyMachinesWithUnhealthyControlPlaneComponents(outdatedMachines)
case outdatedMachines.Len() > 0:
machines = outdatedMachines
eligibleMachines = outdatedMachines
}
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, machines)

// Pick an eligible machine from the failure domain with most machines in (including both eligible and not eligible machines)
return controlPlane.MachineInFailureDomainWithMostMachines(ctx, eligibleMachines)
}
112 changes: 74 additions & 38 deletions util/failuredomains/failure_domains.go
Original file line number Diff line number Diff line change
@@ -30,8 +30,9 @@ import (
)

type failureDomainAggregation struct {
id string
count int
id string
countPriority int
countAll int
}
type failureDomainAggregations []failureDomainAggregation

@@ -43,67 +44,87 @@ func (f failureDomainAggregations) Len() int {
// Less reports whether the element with
// index i should sort before the element with index j.
func (f failureDomainAggregations) Less(i, j int) bool {
return f[i].count < f[j].count
// If a failure domain has less priority machines then the other, it goes first
if f[i].countPriority < f[j].countPriority {
return true
}
if f[i].countPriority > f[j].countPriority {
return false
}

// If a failure domain has the same number of priority machines then the other,
// use the number of overall machines to pick which one goes first.
if f[i].countAll < f[j].countAll {
return true
}
if f[i].countAll > f[j].countAll {
return false
}

// If both failure domain have the same number of priority machines and overall machines, we keep the order
// in the list which ensure a certain degree of randomness because the list originates from a map.
// This helps to spread machines e.g. when concurrently working on many clusters.
return i < j
}

// Swap swaps the elements with indexes i and j.
func (f failureDomainAggregations) Swap(i, j int) {
f[i], f[j] = f[j], f[i]
}

// PickMost returns a failure domain that is in machines and has most of the group of machines on.
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, groupMachines, machines collections.Machines) *string {
// orderDescending sorts failure domains according to all machines belonging to the group.
fds := orderDescending(ctx, failureDomains, groupMachines)
for _, fd := range fds {
for _, m := range machines {
if m.Spec.FailureDomain == nil {
continue
}
if *m.Spec.FailureDomain == fd.id {
return &fd.id
}
}
}
return nil
}

// orderDescending returns the sorted failure domains in decreasing order.
func orderDescending(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
aggregations := pick(ctx, failureDomains, machines)
// PickMost returns the failure domain from which we have to delete a control plane machine, which is the failure domain with most machines and at least one eligible machine in it.
func PickMost(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, eligibleMachines collections.Machines) *string {
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, eligibleMachines)
if len(aggregations) == 0 {
return nil
}
sort.Sort(sort.Reverse(aggregations))
return aggregations
if len(aggregations) > 0 && aggregations[0].countPriority > 0 {
return ptr.To(aggregations[0].id)
}
return nil
}

// PickFewest returns the failure domain with the fewest number of machines.
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) *string {
aggregations := pick(ctx, failureDomains, machines)
// PickFewest returns the failure domain that will be used for placement of a new control plane machine, which is the failure domain with the fewest
// number of up-to-date, not deleted machines.
//
// Ensuring proper spreading of up-to-date, not deleted machines, is the highest priority to achieve ideal spreading of machines
// at stable state/when only up-to-date machines will exist.
//
// In case of tie (more failure domain with the same number of up-to-date, not deleted machines) the failure domain with the fewest number of
// machine overall is picked to ensure a better spreading of machines while the rollout is performed.
func PickFewest(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, upToDateMachines collections.Machines) *string {
aggregations := countByFailureDomain(ctx, failureDomains, allMachines, upToDateMachines)
if len(aggregations) == 0 {
return nil
}
sort.Sort(aggregations)
return ptr.To(aggregations[0].id)
}

func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines collections.Machines) failureDomainAggregations {
// countByFailureDomain returns failure domains with the number of machines in it.
// Note: countByFailureDomain computes both the number of machines as well as the number of a subset of machines with higher priority.
// E.g. for deletion out of date machines have higher priority vs other machines.
func countByFailureDomain(ctx context.Context, failureDomains clusterv1.FailureDomains, allMachines, priorityMachines collections.Machines) failureDomainAggregations {
log := ctrl.LoggerFrom(ctx)

if len(failureDomains) == 0 {
return failureDomainAggregations{}
}

counters := map[string]int{}
counters := map[string]failureDomainAggregation{}

// Initialize the known failure domain keys to find out if an existing machine is in an unsupported failure domain.
for fd := range failureDomains {
counters[fd] = 0
for id := range failureDomains {
counters[id] = failureDomainAggregation{
id: id,
countPriority: 0,
countAll: 0,
}
}

// Count how many machines are in each failure domain.
for _, m := range machines {
for _, m := range allMachines {
if m.Spec.FailureDomain == nil {
continue
}
@@ -116,15 +137,30 @@ func pick(ctx context.Context, failureDomains clusterv1.FailureDomains, machines
log.Info(fmt.Sprintf("Unknown failure domain %q for Machine %s (known failure domains: %v)", id, m.GetName(), knownFailureDomains))
continue
}
counters[id]++
a := counters[id]
a.countAll++
counters[id] = a
}

aggregations := make(failureDomainAggregations, 0)

// Gather up tuples of failure domains ids and counts
for fd, count := range counters {
aggregations = append(aggregations, failureDomainAggregation{id: fd, count: count})
for _, m := range priorityMachines {
if m.Spec.FailureDomain == nil {
continue
}
id := *m.Spec.FailureDomain
if _, ok := failureDomains[id]; !ok {
continue
}
a := counters[id]
a.countPriority++
counters[id] = a
}

// Collect failure domain aggregations.
// Note: by creating the list from a map, we get a certain degree of randomness that helps to spread machines
// e.g. when concurrently working on many clusters.
aggregations := make(failureDomainAggregations, 0)
for _, count := range counters {
aggregations = append(aggregations, count)
}
return aggregations
}
729 changes: 710 additions & 19 deletions util/failuredomains/failure_domains_test.go

Large diffs are not rendered by default.