Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Cherry-pick rel-v1.30] Support extended resources and ephemeral-storage for scale-from-zero specified in MachineClass NodeTemplate #336

Merged
merged 1 commit into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 48 additions & 21 deletions cluster-autoscaler/cloudprovider/mcm/mcm_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,11 @@ import (
"fmt"
v1appslister "k8s.io/client-go/listers/apps/v1"
"k8s.io/utils/pointer"
"maps"
"math/rand"
"net/http"
"os"
"slices"
"strconv"
"strings"
"time"
Expand Down Expand Up @@ -111,6 +113,12 @@ var (
machineGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machines"}
machineSetGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinesets"}
machineDeploymentGVR = schema.GroupVersionResource{Group: machineGroup, Version: machineVersion, Resource: "machinedeployments"}

// ErrInvalidNodeTemplate is a sentinel error that indicates that the nodeTemplate is invalid.
ErrInvalidNodeTemplate = errors.New("invalid node template")
coreResourceNames = []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, "gpu"}
extraResourceNames = []v1.ResourceName{gpu.ResourceNvidiaGPU, v1.ResourcePods, v1.ResourceEphemeralStorage}
knownResourceNames = slices.Concat(coreResourceNames, extraResourceNames)
)

// McmManager manages the client communication for MachineDeployments.
Expand All @@ -130,12 +138,13 @@ type McmManager struct {
}

type instanceType struct {
InstanceType string
VCPU resource.Quantity
Memory resource.Quantity
GPU resource.Quantity
EphemeralStorage resource.Quantity
PodCount resource.Quantity
InstanceType string
VCPU resource.Quantity
Memory resource.Quantity
GPU resource.Quantity
EphemeralStorage resource.Quantity
PodCount resource.Quantity
ExtendedResources apiv1.ResourceList
}

type nodeTemplate struct {
Expand Down Expand Up @@ -689,25 +698,23 @@ func generateInstanceStatus(machine *v1alpha1.Machine) *cloudprovider.InstanceSt
func validateNodeTemplate(nodeTemplateAttributes *v1alpha1.NodeTemplate) error {
var allErrs []error

capacityAttributes := []v1.ResourceName{"cpu", "gpu", "memory"}

for _, attribute := range capacityAttributes {
for _, attribute := range coreResourceNames {
if _, ok := nodeTemplateAttributes.Capacity[attribute]; !ok {
errMessage := fmt.Errorf("CPU, GPU and memory fields are mandatory")
errMessage := fmt.Errorf("the core resource fields %q are mandatory: %w", coreResourceNames, ErrInvalidNodeTemplate)
klog.Warning(errMessage)
allErrs = append(allErrs, errMessage)
break
}
}

if nodeTemplateAttributes.Region == "" || nodeTemplateAttributes.InstanceType == "" || nodeTemplateAttributes.Zone == "" {
errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory")
errMessage := fmt.Errorf("InstanceType, Region and Zone attributes are mandatory: %w", ErrInvalidNodeTemplate)
klog.Warning(errMessage)
allErrs = append(allErrs, errMessage)
}

if allErrs != nil {
return fmt.Errorf("%s", allErrs)
return errors.Join(allErrs...)
}

return nil
Expand Down Expand Up @@ -771,21 +778,26 @@ func (m *McmManager) GetMachineDeploymentNodeTemplate(machinedeployment *Machine
klog.V(1).Infof("Nodes already existing in the worker pool %s", workerPool)
baseNode := filteredNodes[0]
klog.V(1).Infof("Worker pool node used to form template is %s and its capacity is cpu: %s, memory:%s", baseNode.Name, baseNode.Status.Capacity.Cpu().String(), baseNode.Status.Capacity.Memory().String())
extendedResources := filterExtendedResources(baseNode.Status.Capacity)
instance = instanceType{
VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU],
Memory: baseNode.Status.Capacity[apiv1.ResourceMemory],
GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU],
EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage],
PodCount: baseNode.Status.Capacity[apiv1.ResourcePods],
VCPU: baseNode.Status.Capacity[apiv1.ResourceCPU],
Memory: baseNode.Status.Capacity[apiv1.ResourceMemory],
GPU: baseNode.Status.Capacity[gpu.ResourceNvidiaGPU],
EphemeralStorage: baseNode.Status.Capacity[apiv1.ResourceEphemeralStorage],
PodCount: baseNode.Status.Capacity[apiv1.ResourcePods],
ExtendedResources: extendedResources,
}
} else {
klog.V(1).Infof("Generating node template only using nodeTemplate from MachineClass %s: template resources-> cpu: %s,memory: %s", machineClass.Name, nodeTemplateAttributes.Capacity.Cpu().String(), nodeTemplateAttributes.Capacity.Memory().String())
extendedResources := filterExtendedResources(nodeTemplateAttributes.Capacity)
instance = instanceType{
VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU],
Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory],
GPU: nodeTemplateAttributes.Capacity["gpu"],
VCPU: nodeTemplateAttributes.Capacity[apiv1.ResourceCPU],
Memory: nodeTemplateAttributes.Capacity[apiv1.ResourceMemory],
GPU: nodeTemplateAttributes.Capacity["gpu"],
EphemeralStorage: nodeTemplateAttributes.Capacity[apiv1.ResourceEphemeralStorage],
// Numbers pods per node will depends on the CNI used and the maxPods kubelet config, default is often 110
PodCount: resource.MustParse("110"),
PodCount: resource.MustParse("110"),
ExtendedResources: extendedResources,
}
}
instance.InstanceType = nodeTemplateAttributes.InstanceType
Expand Down Expand Up @@ -964,6 +976,12 @@ func (m *McmManager) buildNodeFromTemplate(name string, template *nodeTemplate)
node.Status.Capacity["hugepages-1Gi"] = *resource.NewQuantity(0, resource.DecimalSI)
node.Status.Capacity["hugepages-2Mi"] = *resource.NewQuantity(0, resource.DecimalSI)

// populate extended resources from nodeTemplate
if len(template.InstanceType.ExtendedResources) > 0 {
klog.V(2).Infof("Copying extended resources %v to template node.Status.Capacity", template.InstanceType.ExtendedResources)
maps.Copy(node.Status.Capacity, template.InstanceType.ExtendedResources)
}

node.Status.Allocatable = node.Status.Capacity

// NodeLabels
Expand Down Expand Up @@ -1011,3 +1029,12 @@ func isMachineFailedOrTerminating(machine *v1alpha1.Machine) bool {
}
return false
}

// filterExtendedResources removes knownResourceNames from allResources and retains only the extendedResources.
func filterExtendedResources(allResources v1.ResourceList) (extendedResources v1.ResourceList) {
extendedResources = allResources.DeepCopy()
maps.DeleteFunc(extendedResources, func(name v1.ResourceName, _ resource.Quantity) bool {
return slices.Contains(knownResourceNames, name)
})
return
}
110 changes: 110 additions & 0 deletions cluster-autoscaler/cloudprovider/mcm/mcm_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ limitations under the License.
package mcm

import (
"errors"
"github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/utils/ptr"
"maps"
"math/rand/v2"
"testing"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -134,3 +140,107 @@ func TestFilterNodes(t *testing.T) {
assert.EqualValues(t, len(filteredNodes), 1)
assert.Equal(t, filteredNodes, []*apiv1.Node{node2})
}

func TestValidateNodeTemplate(t *testing.T) {
m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300"))
nt := v1alpha1.NodeTemplate{
InstanceType: m5LargeType.InstanceType,
Capacity: make(apiv1.ResourceList),
}
err := validateNodeTemplate(&nt)
assert.NotNil(t, err)
assert.True(t, errors.Is(err, ErrInvalidNodeTemplate))

nt.Region = "europe-west1"
nt.Zone = nt.Region + "-b"

err = validateNodeTemplate(&nt)
assert.NotNil(t, err)
assert.True(t, errors.Is(err, ErrInvalidNodeTemplate))

if err != nil {
t.Logf("error %s", err)
}
}

func TestBuildNodeFromTemplate(t *testing.T) {
m := &McmManager{}
namePrefix := "bingo"
m5LargeType := createSampleInstanceType("m5.large", "sap.com/mana", resource.MustParse("300"))
labels := map[string]string{
"weapon": "light-saber",
}
nt := nodeTemplate{
InstanceType: m5LargeType,
Architecture: ptr.To("amd64"),
Labels: labels,
}
nt.Region = "europe-west1"
nt.Zone = nt.Region + "-b"
node, err := m.buildNodeFromTemplate(namePrefix, &nt)
assert.Nil(t, err)
if err != nil {
t.Logf("error %s", err)
}
assert.True(t, isSubset(labels, node.Labels), "labels should be a subset of node.Labels")
for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU} {
assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k)
}

// test with gpu resource
gpuQuantity := resource.MustParse("4")
nt.InstanceType.GPU = gpuQuantity
node, err = m.buildNodeFromTemplate(namePrefix, &nt)
assert.Nil(t, err)
if err != nil {
t.Logf("error %s", err)
}
for _, k := range []apiv1.ResourceName{apiv1.ResourceMemory, apiv1.ResourceCPU, gpu.ResourceNvidiaGPU} {
assert.Contains(t, node.Status.Capacity, k, "node.Status.Capacity should contain the mandatory resource named: %s", k)
}
actualGpuQuantity, hasGpuResource := node.Status.Capacity[gpu.ResourceNvidiaGPU]
assert.True(t, hasGpuResource, "node.Status.Capacity should have a gpu resource named %q", gpu.ResourceNvidiaGPU)
if hasGpuResource {
assert.Equal(t, gpuQuantity, actualGpuQuantity, "node.Status.Capacity should have gpu resource named %q with value %s instead of %s", gpu.ResourceDirectX, gpuQuantity, actualGpuQuantity)
}
}

func TestFilterExtendedResources(t *testing.T) {
resources := make(apiv1.ResourceList)
for _, n := range knownResourceNames {
resources[n] = *resource.NewQuantity(rand.Int64(), resource.DecimalSI)
}
customResources := make(apiv1.ResourceList)
customResources["resource.com/dongle"] = resource.MustParse("50")
customResources["quantum.com/memory"] = resource.MustParse("100Gi")

allResources := resources.DeepCopy()
maps.Copy(allResources, customResources)

extendedResources := filterExtendedResources(allResources)
t.Logf("TestFilterExtendedResources obtained: %+v", extendedResources)
assert.Equal(t, customResources, extendedResources)
}

func createSampleInstanceType(instanceTypeName string, customResourceName apiv1.ResourceName, customResourceQuantity resource.Quantity) *instanceType {
awsM5Large := AWSInstanceTypes[instanceTypeName]
extendedResources := make(apiv1.ResourceList)
extendedResources[customResourceName] = customResourceQuantity
iType := &instanceType{
InstanceType: awsM5Large.InstanceType,
VCPU: awsM5Large.VCPU,
Memory: awsM5Large.Memory,
GPU: awsM5Large.GPU,
ExtendedResources: extendedResources,
}
return iType
}

func isSubset[K comparable, V comparable](map1, map2 map[K]V) bool {
for k, v := range map1 {
if val, ok := map2[k]; !ok || val != v {
return false
}
}
return true
}
Loading