From 101642756d4f6b31e2e48796fc6a2b024107b39a Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Mon, 27 Nov 2023 15:44:39 +0200 Subject: [PATCH 1/5] balloons: add options for creating balloons close to devices - Add preferCloseToDevices in the ballooons CRD for explicit configuration. - Add implicit PreferFarFromDevices option to BalloonType definition. This anti-affinity list is filled based on affinities of this and other balloon types. If necessary in the future, we can enable explicit anti-affinities in CRDs, too. - If a balloon type has either of these preferences defined, use the balloon type specific CPU tree allocator that will implement the logic behind these options. Signed-off-by: Antti Kervinen --- .../balloons/policy/balloons-policy.go | 39 ++++++++++++++++++- cmd/plugins/balloons/policy/cputree.go | 4 ++ .../crds/config.nri_balloonspolicies.yaml | 5 +++ .../v1alpha1/resmgr/policy/balloons/config.go | 6 +++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/cmd/plugins/balloons/policy/balloons-policy.go b/cmd/plugins/balloons/policy/balloons-policy.go index 271ba25ee..7c585df7d 100644 --- a/cmd/plugins/balloons/policy/balloons-policy.go +++ b/cmd/plugins/balloons/policy/balloons-policy.go @@ -568,10 +568,12 @@ func (p *balloons) newBalloon(blnDef *BalloonDef, confCpus bool) (*Balloon, erro // are type specific allocator options, otherwise use policy // default allocator. cpuTreeAllocator := p.cpuTreeAllocator - if blnDef.AllocatorTopologyBalancing != nil || blnDef.PreferSpreadOnPhysicalCores != nil { + if blnDef.AllocatorTopologyBalancing != nil || blnDef.PreferSpreadOnPhysicalCores != nil || len(blnDef.PreferCloseToDevices) > 0 || len(blnDef.PreferFarFromDevices) > 0 { allocatorOptions := cpuTreeAllocatorOptions{ topologyBalancing: p.bpoptions.AllocatorTopologyBalancing, preferSpreadOnPhysicalCores: p.bpoptions.PreferSpreadOnPhysicalCores, + preferCloseToDevices: blnDef.PreferCloseToDevices, + preferFarFromDevices: blnDef.PreferFarFromDevices, } if blnDef.AllocatorTopologyBalancing != nil { allocatorOptions.topologyBalancing = *blnDef.AllocatorTopologyBalancing @@ -1091,6 +1093,8 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error { p.balloons = []*Balloon{} p.freeCpus = p.allowed.Clone() p.freeCpus = p.freeCpus.Difference(p.reserved) + p.fillFarFromDevices(bpoptions.BalloonDefs) + p.cpuTreeAllocator = p.cpuTree.NewAllocator(cpuTreeAllocatorOptions{ topologyBalancing: bpoptions.AllocatorTopologyBalancing, preferSpreadOnPhysicalCores: bpoptions.PreferSpreadOnPhysicalCores, @@ -1146,6 +1150,39 @@ func (p *balloons) setConfig(bpoptions *BalloonsOptions) error { return nil } +// fillFarFromDevices adds BalloonDefs implicit device anti-affinities +// towards devices that other BalloonDefs prefer to be close to. +func (p *balloons) fillFarFromDevices(blnDefs []*BalloonDef) { + // devDefClose[device][blnDef.Name] equals true if and + // only if the blnDef prefers to be close to the device. + devDefClose := map[string]map[string]bool{} + // avoidDevs is a list of devices for which at least one + // balloon type prefers to be close to. The order of devices + // in the avoidDevs list is significant: devices in the + // beginning of the list will be more effectively avoided than + // devices later in the list. + avoidDevs := []string{} + for _, blnDef := range blnDefs { + for _, closeDev := range blnDef.PreferCloseToDevices { + if _, ok := devDefClose[closeDev]; !ok { + avoidDevs = append(avoidDevs, closeDev) + devDefClose[closeDev] = map[string]bool{} + } + devDefClose[closeDev][blnDef.Name] = true + } + } + // Add every device in avoidDev to PreferFarFromDevices lists + // of those balloon types that do not prefer to be close to + // the device. + for _, avoidDev := range avoidDevs { + for _, blnDef := range blnDefs { + if !devDefClose[avoidDev][blnDef.Name] { + blnDef.PreferFarFromDevices = append(blnDef.PreferFarFromDevices, avoidDev) + } + } + } +} + // closestMems returns memory node IDs good for pinning containers // that run on given CPUs func (p *balloons) closestMems(cpus cpuset.CPUSet) idset.IDSet { diff --git a/cmd/plugins/balloons/policy/cputree.go b/cmd/plugins/balloons/policy/cputree.go index 568778cc9..6b8bad6b3 100644 --- a/cmd/plugins/balloons/policy/cputree.go +++ b/cmd/plugins/balloons/policy/cputree.go @@ -67,8 +67,12 @@ type cpuTreeAllocatorOptions struct { // the opposite (packed allocations). topologyBalancing bool preferSpreadOnPhysicalCores bool + preferCloseToDevices []string + preferFarFromDevices []string } +var emptyCpuSet = cpuset.New() + // String returns string representation of a CPU tree node. func (t *cpuTreeNode) String() string { if len(t.children) == 0 { diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index 178c7d280..07121dd55 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -106,6 +106,11 @@ spec: items: type: string type: array + preferCloseToDevices: + description: prefer creating new balloons of this type close to listed devices. + items: + type: string + type: array preferNewBalloons: description: 'PreferNewBalloons: prefer creating new balloons over adding containers to existing balloons. The default is diff --git a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go index 754e2ac2a..3b1c4f7e7 100644 --- a/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go +++ b/pkg/apis/config/v1alpha1/resmgr/policy/balloons/config.go @@ -183,6 +183,12 @@ type BalloonDef struct { // +kubebuilder:validation:Enum="";system;package;die;numa;core;thread // +kubebuilder:validation:Format:string ShareIdleCpusInSame CPUTopologyLevel `json:"shareIdleCPUsInSame,omitempty"` + // PreferCloseToDevices: prefer creating new balloons of this + // type close to listed devices. + PreferCloseToDevices []string `json:"preferCloseToDevices",omitempty` + // PreferFarFromDevices: prefer creating new balloons of this + // type far from listed devices. + PreferFarFromDevices []string `json:"preferFarFromDevices",omitempty` } // String stringifies a BalloonDef From ed991ac49eadc448fcae1efb96c0efb25d05410e Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Mon, 27 Nov 2023 16:01:48 +0200 Subject: [PATCH 2/5] balloons: implement device affinity/anti-affinity CPU allocation - Change CPU tree allocator to use a chain of ResizeCpus() functions to avoid hardcoding the order in which various aspects affecting CPU allocation are taken into account. Without the chain structure every level of ResizeCpus() would need to know what is the next level of importance in CPU allocation. Signed-off-by: Antti Kervinen --- cmd/plugins/balloons/policy/cputree.go | 185 +++++++++++++++++++++++-- 1 file changed, 176 insertions(+), 9 deletions(-) diff --git a/cmd/plugins/balloons/policy/cputree.go b/cmd/plugins/balloons/policy/cputree.go index 6b8bad6b3..3cfb2cb35 100644 --- a/cmd/plugins/balloons/policy/cputree.go +++ b/cmd/plugins/balloons/policy/cputree.go @@ -21,6 +21,7 @@ import ( "strings" system "github.com/containers/nri-plugins/pkg/sysfs" + "github.com/containers/nri-plugins/pkg/topology" "github.com/containers/nri-plugins/pkg/utils/cpuset" ) @@ -55,8 +56,9 @@ type cpuTreeNodeAttributes struct { // cpuTreeAllocator allocates CPUs from the branch of a CPU tree // where the "root" node is the topmost CPU of the branch. type cpuTreeAllocator struct { - options cpuTreeAllocatorOptions - root *cpuTreeNode + options cpuTreeAllocatorOptions + root *cpuTreeNode + cacheCloseCpuSets map[string][]cpuset.CPUSet } // cpuTreeAllocatorOptions contains parameters for the CPU allocator @@ -399,8 +401,9 @@ func (t *cpuTreeNode) SplitLevel(splitLevel CPUTopologyLevel, cpuClassifier func // CPU tree branch. func (t *cpuTreeNode) NewAllocator(options cpuTreeAllocatorOptions) *cpuTreeAllocator { ta := &cpuTreeAllocator{ - root: t, - options: options, + root: t, + options: options, + cacheCloseCpuSets: map[string][]cpuset.CPUSet{}, } if options.preferSpreadOnPhysicalCores { newTree := t.SplitLevel(CPUTopologyLevelNuma, @@ -506,8 +509,172 @@ func (ta *cpuTreeAllocator) sorterRelease(tnas []cpuTreeNodeAttributes) func(int // - removeFromCpus contains CPUs in currentCpus set from which // abs(delta) CPUs can be freed. func (ta *cpuTreeAllocator) ResizeCpus(currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { + resizers := []cpuResizerFunc{ + ta.resizeCpusOnlyIfNecessary, + ta.resizeCpusWithDevices, + ta.resizeCpusOneAtATime, + ta.resizeCpusMaxLocalSet, + ta.resizeCpusNow} + return ta.nextCpuResizer(resizers, currentCpus, freeCpus, delta) +} + +type cpuResizerFunc func(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) + +func (ta *cpuTreeAllocator) nextCpuResizer(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { + if len(resizers) == 0 { + return freeCpus, currentCpus, fmt.Errorf("internal error: a CPU resizer consulted next resizer but there was no one left") + } + remainingResizers := resizers[1:] + log.Debugf("- resizer-%d(%q, %q, %d)", len(remainingResizers), currentCpus, freeCpus, delta) + addFrom, removeFrom, err := resizers[0](remainingResizers, currentCpus, freeCpus, delta) + return addFrom, removeFrom, err +} + +// resizeCpusNow does not call next resizer. Instead it keeps all CPU +// allocations from freeCpus and CPU releases from currentCpus equally +// good. This is the terminal block of resizers chain. +func (ta *cpuTreeAllocator) resizeCpusNow(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { + return freeCpus, currentCpus, nil +} + +// resizeCpusOnlyIfNecessary is the fast path for making trivial +// reservations and to fail if resizing is not possible. +func (ta *cpuTreeAllocator) resizeCpusOnlyIfNecessary(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { + switch { + case delta == 0: + // Nothing to do. + return emptyCpuSet, emptyCpuSet, nil + case delta > 0: + if freeCpus.Size() < delta { + return freeCpus, emptyCpuSet, fmt.Errorf("not enough free CPUs (%d) to resize current CPU set from %d to %d CPUs", freeCpus.Size(), currentCpus.Size(), currentCpus.Size()+delta) + } else if freeCpus.Size() == delta { + // Allocate all the remaining free CPUs. + return freeCpus, emptyCpuSet, nil + } + case delta < 0: + if currentCpus.Size() < -delta { + return emptyCpuSet, currentCpus, fmt.Errorf("not enough current CPUs (%d) to release %d CPUs", currentCpus.Size(), -delta) + } else if currentCpus.Size() == -delta { + // Free all allocated CPUs. + return emptyCpuSet, currentCpus, nil + } + } + return ta.nextCpuResizer(resizers, currentCpus, freeCpus, delta) +} + +// resizeCpusWithDevices prefers allocating CPUs from those freeCpus +// that are topologically close to preferred devices, and releasing +// those currentCpus that are not. +func (ta *cpuTreeAllocator) resizeCpusWithDevices(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { + // allCloseCpuSets contains cpusets in the order of priority. + // Applying the first cpusets in it are prioritized over ones + // after them. + allCloseCpuSets := [][]cpuset.CPUSet{} + for _, devPath := range ta.options.preferCloseToDevices { + if closeCpuSets := ta.topologyHintCpus(devPath); len(closeCpuSets) > 0 { + allCloseCpuSets = append(allCloseCpuSets, closeCpuSets) + } + } + for _, devPath := range ta.options.preferFarFromDevices { + for _, farCpuSet := range ta.topologyHintCpus(devPath) { + allCloseCpuSets = append(allCloseCpuSets, []cpuset.CPUSet{freeCpus.Difference(farCpuSet)}) + } + } + if len(allCloseCpuSets) == 0 { + return ta.nextCpuResizer(resizers, currentCpus, freeCpus, delta) + } + if delta > 0 { + // Allocate N=delta CPUs from freeCpus based on topology hints. + // Build a new set of freeCpus with at least N CPUs based on + // intersection with CPU hints. + // In case of conflicting topology hints the first + // hints in the list are the most important. + remainingFreeCpus := freeCpus + appliedHints := 0 + totalHints := 0 + for _, closeCpuSets := range allCloseCpuSets { + for _, cpus := range closeCpuSets { + totalHints++ + newRemainingFreeCpus := remainingFreeCpus.Intersection(cpus) + if newRemainingFreeCpus.Size() >= delta { + appliedHints++ + log.Debugf(" - take hinted cpus %q, common free %q", cpus, newRemainingFreeCpus) + remainingFreeCpus = newRemainingFreeCpus + } else { + log.Debugf(" - drop hinted cpus %q, not enough common free in %q", cpus, newRemainingFreeCpus) + } + } + } + log.Debugf(" - original free cpus %q, took %d/%d hints, remaining free: %q", + freeCpus, appliedHints, totalHints, remainingFreeCpus) + return ta.nextCpuResizer(resizers, currentCpus, remainingFreeCpus, delta) + } else if delta < 0 { + // Free N=-delta CPUs from currentCpus based on topology hints. + // 1. Sort currentCpus based on topology hints (leastHintedCpus). + // 2. Pick largest hint value that has to be released (maxHints). + // 3. Free all CPUs that have a hint value smaller than maxHints. + // 4. Let next CPU resizer choose CPUs to be freed among + // CPUs with hint value maxHints. + currentCpuHints := map[int]uint64{} + for hintPriority, closeCpuSets := range allCloseCpuSets { + for _, cpus := range closeCpuSets { + for _, cpu := range cpus.Intersection(currentCpus).UnsortedList() { + currentCpuHints[cpu] += 1 << (len(allCloseCpuSets) - 1 - hintPriority) + } + } + } + leastHintedCpus := currentCpus.UnsortedList() + sort.Slice(leastHintedCpus, func(i, j int) bool { + return currentCpuHints[leastHintedCpus[i]] < currentCpuHints[leastHintedCpus[j]] + }) + maxHints := currentCpuHints[leastHintedCpus[-delta]] + currentToFreeForSure := cpuset.New() + currentToFreeMaybe := cpuset.New() + for i := 0; i < len(leastHintedCpus) && currentCpuHints[leastHintedCpus[i]] <= maxHints; i++ { + if currentCpuHints[leastHintedCpus[i]] < maxHints { + currentToFreeForSure = currentToFreeForSure.Union(cpuset.New(leastHintedCpus[i])) + } else { + currentToFreeMaybe = currentToFreeMaybe.Union(cpuset.New(leastHintedCpus[i])) + } + } + remainingDelta := delta + currentToFreeForSure.Size() + log.Debugf(" - device hints: from cpus %q: free for sure: %q and %d more from: %q", + currentCpus, currentToFreeForSure, -remainingDelta, currentToFreeMaybe) + _, freeFromMaybe, err := ta.nextCpuResizer(resizers, currentToFreeMaybe, freeCpus, remainingDelta) + // Do not include possible extra CPUs from + // freeFromMaybe to make sure that all CPUs with least + // hints will be freed. + for _, cpu := range freeFromMaybe.UnsortedList() { + if currentToFreeForSure.Size() >= -delta { + break + } + currentToFreeForSure = currentToFreeForSure.Union(cpuset.New(cpu)) + } + return freeCpus, currentToFreeForSure, err + } + return freeCpus, currentCpus, nil +} + +// Fetch cached topology hint, return error only once per bad dev +func (ta *cpuTreeAllocator) topologyHintCpus(dev string) []cpuset.CPUSet { + if closeCpuSets, ok := ta.cacheCloseCpuSets[dev]; ok { + return closeCpuSets + } + topologyHints, err := topology.NewTopologyHints(dev) + if err != nil { + log.Errorf("failed to find topology of device %q: %v", dev, err) + ta.cacheCloseCpuSets[dev] = []cpuset.CPUSet{} + } else { + for _, topologyHint := range topologyHints { + ta.cacheCloseCpuSets[dev] = append(ta.cacheCloseCpuSets[dev], cpuset.MustParse(topologyHint.CPUs)) + } + } + return ta.cacheCloseCpuSets[dev] +} + +func (ta *cpuTreeAllocator) resizeCpusOneAtATime(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { if delta > 0 { - addFromSuperset, removeFromSuperset, err := ta.resizeCpus(currentCpus, freeCpus, delta) + addFromSuperset, removeFromSuperset, err := ta.nextCpuResizer(resizers, currentCpus, freeCpus, delta) if !ta.options.preferSpreadOnPhysicalCores || addFromSuperset.Size() == delta { return addFromSuperset, removeFromSuperset, err } @@ -519,7 +686,7 @@ func (ta *cpuTreeAllocator) ResizeCpus(currentCpus, freeCpus cpuset.CPUSet, delt // set by adding one CPU at a time. addFrom := cpuset.New() for n := 0; n < delta; n++ { - addSingleFrom, _, err := ta.resizeCpus(currentCpus, freeCpus, 1) + addSingleFrom, _, err := ta.nextCpuResizer(resizers, currentCpus, freeCpus, 1) if err != nil { return addFromSuperset, removeFromSuperset, err } @@ -544,7 +711,7 @@ func (ta *cpuTreeAllocator) ResizeCpus(currentCpus, freeCpus cpuset.CPUSet, delt removeFrom := cpuset.New() addFrom := cpuset.New() for n := 0; n < -delta; n++ { - _, removeSingleFrom, err := ta.resizeCpus(currentCpus, freeCpus, -1) + _, removeSingleFrom, err := ta.nextCpuResizer(resizers, currentCpus, freeCpus, -1) if err != nil { return addFrom, removeFrom, err } @@ -567,7 +734,7 @@ func (ta *cpuTreeAllocator) ResizeCpus(currentCpus, freeCpus cpuset.CPUSet, delt return addFrom, removeFrom, nil } -func (ta *cpuTreeAllocator) resizeCpus(currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { +func (ta *cpuTreeAllocator) resizeCpusMaxLocalSet(resizers []cpuResizerFunc, currentCpus, freeCpus cpuset.CPUSet, delta int) (cpuset.CPUSet, cpuset.CPUSet, error) { tnas := ta.root.ToAttributedSlice(currentCpus, freeCpus, func(tna *cpuTreeNodeAttributes) bool { // filter out branches with insufficient cpus @@ -591,5 +758,5 @@ func (ta *cpuTreeAllocator) resizeCpus(currentCpus, freeCpus cpuset.CPUSet, delt if len(tnas) == 0 { return freeCpus, currentCpus, fmt.Errorf("not enough free CPUs") } - return tnas[0].freeCpus, tnas[0].currentCpus, nil + return ta.nextCpuResizer(resizers, tnas[0].currentCpus, tnas[0].freeCpus, delta) } From c34742aee9f40b6093104ffd451c5fbb9fc4d966 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Mon, 27 Nov 2023 16:17:06 +0200 Subject: [PATCH 3/5] balloons: document the preferCloseToDevices option Signed-off-by: Antti Kervinen --- docs/resource-policy/policy/balloons.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/resource-policy/policy/balloons.md b/docs/resource-policy/policy/balloons.md index b5a1a7643..e64760049 100644 --- a/docs/resource-policy/policy/balloons.md +++ b/docs/resource-policy/policy/balloons.md @@ -116,6 +116,8 @@ Balloons policy parameters: request less. - `CpuClass` specifies the name of the CPU class according to which CPUs of balloons are configured. + - `PreferCloseToDevices`: prefer creating new balloons close to + listed devices. List of strings - `PreferSpreadingPods`: if `true`, containers of the same pod should be spread to different balloons of this type. The default is `false`: prefer placing containers of the same pod to the same @@ -145,6 +147,18 @@ Balloons policy parameters: the balloon. - `PreferSpreadOnPhysicalCores` overrides the policy level option with the same name in the scope of this balloon type. + - `PreferCloseToDevices` prefers creating new balloons close to + listed devices. If all preferences cannot be fulfilled, preference + to first devices in the list override preferences to devices after + them. Adding this preference to any balloon type automatically + adds corresponding anti-affinity to other balloon types that do + not prefer to be close to the same device: they prefer being + created away from the device. Example: + ``` + PreferCloseToDevices: + - /sys/class/net/eth0 + - /sys/class/block/sda + ``` - `AllocatorPriority` (0: High, 1: Normal, 2: Low, 3: None). CPU allocator parameter, used when creating new or resizing existing balloons. If there are balloon types with pre-created balloons From 1dd35bdef05cc4bcb6f5cea81da125c3624cc31e Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Mon, 27 Nov 2023 16:38:56 +0200 Subject: [PATCH 4/5] e2e: add test for balloons preferCloseToDevices Signed-off-by: Antti Kervinen --- .../balloons-allocator-opts.cfg | 7 +++- .../n4c16/test10-allocator-opts/code.var.sh | 34 ++++++++++++++++--- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/balloons-allocator-opts.cfg b/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/balloons-allocator-opts.cfg index 9be442e42..dad244d5e 100644 --- a/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/balloons-allocator-opts.cfg +++ b/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/balloons-allocator-opts.cfg @@ -10,7 +10,7 @@ config: minBalloons: 2 - name: topo1cores0 minCPUs: 2 - minBalloons: 2 + minBalloons: 1 preferSpreadOnPhysicalCores: false - name: topo0cores1 allocatorTopologyBalancing: false @@ -21,6 +21,11 @@ config: - name: topo1cores1 allocatorTopologyBalancing: true preferSpreadOnPhysicalCores: true + - name: device-node2 + preferNewBalloons: true + preferSpreadOnPhysicalCores: true + preferCloseToDevices: + - "/sys/devices/system/node/node2" instrumentation: httpEndpoint: :8891 diff --git a/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/code.var.sh b/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/code.var.sh index dc5dd2e96..c82b4072c 100644 --- a/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/code.var.sh +++ b/test/e2e/policies.test-suite/balloons/n4c16/test10-allocator-opts/code.var.sh @@ -15,7 +15,8 @@ CPUREQ="100m" MEMREQ="100M" CPULIM="100m" MEMLIM="100M" POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: policydefaults" CONTCOUNT=1 create balloons-busybox report allowed verify 'len(cores["pod0c0"]) == 2' \ - 'len(cpus["pod0c0"]) == 2' + 'len(cpus["pod0c0"]) == 2' \ + '"node2" not in nodes["pod0c0"]' # pod1 in a 2-CPU balloon @@ -23,7 +24,8 @@ CPUREQ="100m" MEMREQ="100M" CPULIM="100m" MEMLIM="100M" POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: topo1cores0" CONTCOUNT=1 create balloons-busybox report allowed verify 'len(cores["pod1c0"]) == 1' \ - 'len(cpus["pod1c0"]) == 2' + 'len(cpus["pod1c0"]) == 2' \ + '"node2" not in nodes["pod1c0"]' # pod2: container 0 resizes first from 0 to 1, container 2 from 1 to 2 CPUs, # use more cores @@ -32,7 +34,12 @@ POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: topo1cores1" CONTCOUNT= report allowed verify 'len(cores["pod2c0"]) == 2' \ 'len(cpus["pod2c0"]) == 2' \ - 'cpus["pod2c0"] == cpus["pod2c1"]' + 'cpus["pod2c0"] == cpus["pod2c1"]' \ + '"node2" not in nodes["pod2c0"]' + +# make room for pod3, because now only node2 should be empty and we +# would not be able to pack tightly elsewhere. +vm-command "kubectl delete pods pod0 pod1 pod2 --now" # pod3: container 0 resizes first from 0 to 1, container 2 from 1 to 2 CPUs, # pack tightly @@ -41,7 +48,26 @@ POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: topo0cores0" CONTCOUNT= report allowed verify 'len(cores["pod3c0"]) == 1' \ 'len(cpus["pod3c0"]) == 2' \ - 'cpus["pod3c0"] == cpus["pod3c1"]' + 'cpus["pod3c0"] == cpus["pod3c1"]' \ + '"node2" not in nodes["pod3c0"]' + +# pod4 in new balloon for which node2 should have been kept free +CPUREQ="3" MEMREQ="100M" CPULIM="6" MEMLIM="100M" +POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: device-node2" CONTCOUNT=1 create balloons-busybox +report allowed +verify '{"node2"} == nodes["pod4c0"]' \ + 'len(cores["pod4c0"]) == 2' \ + 'len(cpus["pod4c0"]) == 3' + +vm-command "kubectl delete pods pod0 pod1 pod2 --now" + +# pod5 in new balloon that will not fit on node2, ignore device hint and allocate from elsewhere +CPUREQ="2" MEMREQ="100M" CPULIM="6" MEMLIM="100M" +POD_ANNOTATION="balloon.balloons.resource-policy.nri.io: device-node2" CONTCOUNT=1 create balloons-busybox +report allowed +verify '"node2" not in nodes["pod5c0"]' \ + 'len(cores["pod5c0"]) == 2' \ + 'len(cpus["pod5c0"]) == 2' cleanup helm-terminate From f1a7dc43855e5183048d7a88c5555b5542089446 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Mon, 27 Nov 2023 16:40:12 +0200 Subject: [PATCH 5/5] balloons: add unit test for balloons device affinity Signed-off-by: Antti Kervinen --- cmd/plugins/balloons/policy/cputree_test.go | 75 +++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/cmd/plugins/balloons/policy/cputree_test.go b/cmd/plugins/balloons/policy/cputree_test.go index b1021a2f2..ba7560162 100644 --- a/cmd/plugins/balloons/policy/cputree_test.go +++ b/cmd/plugins/balloons/policy/cputree_test.go @@ -110,6 +110,15 @@ func newCpuTreeFromInt5(pdnct [5]int) (*cpuTreeNode, cpusInTopology) { return sysTree, csit } +func verifyOn(t *testing.T, nameContents string, cpus cpuset.CPUSet, csit cpusInTopology) { + for _, cpuID := range cpus.List() { + name := csit[cpuID].threadName + if !strings.Contains(name, nameContents) { + t.Errorf("cpu%d (%s) not in expected region %s", cpuID, name, nameContents) + } + } +} + func verifyNotOn(t *testing.T, nameContents string, cpus cpuset.CPUSet, csit cpusInTopology) { for _, cpuID := range cpus.List() { name := csit[cpuID].threadName @@ -230,9 +239,11 @@ func TestResizeCpus(t *testing.T) { } tcases := []struct { name string - topology [5]int // package, die, numa, core, thread count - allocatorTB bool // allocator topologyBalancing - allocatorPSoPC bool // allocator preferSpreadOnPhysicalCores + topology [5]int // package, die, numa, core, thread count + allocatorTB bool // allocator topologyBalancing + allocatorPSoPC bool // allocator preferSpreadOnPhysicalCores + allocatorPCtD []string // allocator preferCloseToDevices + allocatorPFfD []string // allocator preferFarFromDevices allocations []int deltas []int allocate bool @@ -240,6 +251,7 @@ func TestResizeCpus(t *testing.T) { expectCurrentOnSame []string expectCurrentNotOnSame []string expectAllOnSame []string + expectCurrentOn []string expectCurrentNotOn []string expectAddSizes []int expectDisjoint []TopoCcids // which ccids should be disjoint @@ -471,6 +483,51 @@ func TestResizeCpus(t *testing.T) { {"package", []int{1, 2}}, {"package", []int{1, 2, 3}}, {"package", []int{1, 2, 3, 4}}, }, }, + { + name: "prefer close to devices", + topology: [5]int{2, 1, 2, 2, 2}, + allocatorTB: true, + allocatorPSoPC: true, + allocatorPCtD: []string{ + "/sys/cpus:4-7", // close to p0d0n1c* + "/sys/cpus:3", // close to p0d0n0c01t1 + "/sys/cpus:2-7", // close to p0d0n* + }, + allocatorPFfD: []string{ + "/sys/cpus:0-1", // far from p0d0n0c00t* + }, + deltas: []int{ + 1, 3, 1, -1, + 4, -3, 1, -1, + }, + allocate: true, + operateOnCcid: []int{ + 1, 1, 1, 1, // container 1 allocates cpus 4-7 + 2, 2, 2, 2, // container 2 cannot get enough cpus from 2-7 + }, + expectCurrentOn: []string{ + "p0d0n1", // cpus:4-7 + "p0d0n1", // cpus:4-7 + "p0d0", // cpus:0-7 + "p0d0n1", // cpus:4-7 + // container 2 + "p1", // cpus:8-15 + "p1", // cpus:8-15 + "", // cpus:any + "p0d0n0c01t1", // cpus:3 + }, + expectCurrentNotOn: []string{ + "p0d0n0", // cpus:0-3 + "p0d0n0", // cpus:0-3 + "p0d0n0c00", // cpus:0-1 + "p0d0n0", // cpus:0-3 + // container 2 + "p0d0n0c00", // cpus:0-1 + "p0d0n0c00", // cpus:0-1 + "p0d0n0c00", // cpus:0-1 + "p0d0n0c00", // cpus:0-1 + }, + }, } for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { @@ -478,7 +535,14 @@ func TestResizeCpus(t *testing.T) { treeA := tree.NewAllocator(cpuTreeAllocatorOptions{ topologyBalancing: tc.allocatorTB, preferSpreadOnPhysicalCores: tc.allocatorPSoPC, + preferCloseToDevices: tc.allocatorPCtD, + preferFarFromDevices: tc.allocatorPFfD, }) + for _, dev := range append(tc.allocatorPCtD, tc.allocatorPFfD...) { + treeA.cacheCloseCpuSets[dev] = []cpuset.CPUSet{ + cpuset.MustParse(dev[len("/sys/cpus:"):]), + } + } currentCpus := cpuset.New() freeCpus := tree.Cpus() if len(tc.allocations) > 0 { @@ -546,6 +610,9 @@ func TestResizeCpus(t *testing.T) { if i < len(tc.expectCurrentNotOnSame) && tc.expectCurrentNotOnSame[i] != "" { verifyNotSame(t, tc.expectCurrentNotOnSame[i], currentCpus, csit) } + if i < len(tc.expectCurrentOn) && tc.expectCurrentOn[i] != "" { + verifyOn(t, tc.expectCurrentOn[i], currentCpus, csit) + } if i < len(tc.expectCurrentNotOn) && tc.expectCurrentNotOn[i] != "" { verifyNotOn(t, tc.expectCurrentNotOn[i], currentCpus, csit) } @@ -584,7 +651,7 @@ func TestWalk(t *testing.T) { foundLevel := CPUTopologyLevelUndefined rv := tree.DepthFirstWalk(func(tn *cpuTreeNode) error { foundName = tn.name - foundLevel = tn.level + foundLevel = string(tn.level) return nil }) if rv != nil {