From 613df117266e30d64aee903045f880af7aab403e Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Fri, 6 Oct 2023 14:35:48 +0000 Subject: [PATCH] core: plumbing to support numa aware scheduling --- api/resources.go | 32 ++ api/resources_test.go | 54 ++ client/allocrunner/taskrunner/task_runner.go | 2 +- client/client.go | 22 +- client/client_test.go | 74 ++- client/fingerprint/cpu.go | 28 +- client/fingerprint/cpu_default_test.go | 21 +- client/fingerprint/memory.go | 6 - client/fingerprint/memory_test.go | 18 +- client/fingerprint/network.go | 5 - client/fingerprint/network_test.go | 16 +- client/fingerprint/storage.go | 4 - client/fingerprint/storage_test.go | 8 - client/fingerprint/structs.go | 1 - client/fingerprint_manager_test.go | 6 +- client/lib/idset/idset.go | 18 + client/lib/numalib/hw/ids.go | 4 +- client/lib/numalib/topology.go | 24 +- client/taskenv/env.go | 4 +- command/agent/job_endpoint.go | 6 + command/agent/job_endpoint_test.go | 17 + command/node_status.go | 18 +- lib/cpuset/cpuset.go | 199 ------- lib/cpuset/cpuset_test.go | 240 --------- nomad/fsm_test.go | 4 +- nomad/job_endpoint.go | 2 + nomad/job_endpoint_hook_numa.go | 13 + nomad/job_endpoint_hook_numa_ce.go | 30 ++ nomad/job_endpoint_hook_numa_ce_test.go | 43 ++ nomad/job_endpoint_hook_vault_ce.go | 1 - nomad/job_endpoint_test.go | 17 +- nomad/mock/node.go | 16 +- nomad/node_endpoint.go | 3 +- nomad/node_endpoint_test.go | 2 +- nomad/plan_apply_test.go | 8 +- nomad/structs/diff.go | 29 + nomad/structs/funcs.go | 11 +- nomad/structs/funcs_test.go | 534 ++++--------------- nomad/structs/node_class_test.go | 2 +- nomad/structs/numa.go | 181 +++++++ nomad/structs/numa_test.go | 172 ++++++ nomad/structs/structs.go | 296 ++++------ nomad/structs/structs_test.go | 88 +-- nomad/structs/testing.go | 34 +- scheduler/benchmarks/benchmarks_test.go | 6 +- scheduler/context_test.go | 77 ++- scheduler/generic_sched_test.go | 51 +- scheduler/numa_ce.go | 33 ++ scheduler/preemption.go | 6 +- scheduler/preemption_test.go | 20 +- scheduler/rank.go | 63 ++- scheduler/rank_test.go | 160 +++--- scheduler/scheduler_sysbatch_test.go | 9 +- scheduler/scheduler_system_test.go | 11 +- scheduler/util.go | 2 + scheduler/util_test.go | 21 +- 56 files changed, 1285 insertions(+), 1487 deletions(-) delete mode 100644 lib/cpuset/cpuset.go delete mode 100644 lib/cpuset/cpuset_test.go create mode 100644 nomad/job_endpoint_hook_numa.go create mode 100644 nomad/job_endpoint_hook_numa_ce.go create mode 100644 nomad/job_endpoint_hook_numa_ce_test.go create mode 100644 nomad/structs/numa.go create mode 100644 nomad/structs/numa_test.go create mode 100644 scheduler/numa_ce.go diff --git a/api/resources.go b/api/resources.go index d41a4629363b..a6fe60d20eb3 100644 --- a/api/resources.go +++ b/api/resources.go @@ -17,6 +17,7 @@ type Resources struct { DiskMB *int `mapstructure:"disk" hcl:"disk,optional"` Networks []*NetworkResource `hcl:"network,block"` Devices []*RequestedDevice `hcl:"device,block"` + NUMA *NUMAResource `hcl:"numa,block"` // COMPAT(0.10) // XXX Deprecated. Please do not use. The field will be removed in Nomad @@ -50,6 +51,8 @@ func (r *Resources) Canonicalize() { for _, d := range r.Devices { d.Canonicalize() } + + r.NUMA.Canonicalize() } // DefaultResources is a small resources object that contains the @@ -97,6 +100,35 @@ func (r *Resources) Merge(other *Resources) { if len(other.Devices) != 0 { r.Devices = other.Devices } + if other.NUMA != nil { + r.NUMA = other.NUMA.Copy() + } +} + +// NUMAResource contains the NUMA affinity request for scheduling purposes. +// +// Applies only to Nomad Enterprise. +type NUMAResource struct { + // Affinity must be one of "none", "prefer", "require". + Affinity string `hcl:"affinity,optional"` +} + +func (n *NUMAResource) Copy() *NUMAResource { + if n == nil { + return nil + } + return &NUMAResource{ + Affinity: n.Affinity, + } +} + +func (n *NUMAResource) Canonicalize() { + if n == nil { + return + } + if n.Affinity == "" { + n.Affinity = "none" + } } type Port struct { diff --git a/api/resources_test.go b/api/resources_test.go index 4cc13f97ad9a..cbc55e47b598 100644 --- a/api/resources_test.go +++ b/api/resources_test.go @@ -55,3 +55,57 @@ func TestResources_Canonicalize(t *testing.T) { }) } } + +func TestResources_Merge(t *testing.T) { + testutil.Parallel(t) + + none := &NUMAResource{Affinity: "none"} + prefer := &NUMAResource{Affinity: "prefer"} + + cases := []struct { + name string + resource *Resources + other *Resources + exp *Resources + }{ + { + name: "merge nil numa", + resource: &Resources{NUMA: none}, + other: &Resources{NUMA: nil}, + exp: &Resources{NUMA: none}, + }, + { + name: "merge non-nil numa", + resource: &Resources{NUMA: none}, + other: &Resources{NUMA: prefer}, + exp: &Resources{NUMA: prefer}, + }, + } + + for _, tc := range cases { + tc.resource.Merge(tc.other) + must.Eq(t, tc.exp, tc.resource) + } +} + +func TestNUMAResource_Copy(t *testing.T) { + testutil.Parallel(t) + + r1 := &NUMAResource{Affinity: "none"} + r2 := r1.Copy() + r1.Affinity = "require" + must.Eq(t, "require", r1.Affinity) + must.Eq(t, "none", r2.Affinity) +} + +func TestNUMAResource_Canonicalize(t *testing.T) { + testutil.Parallel(t) + + var n1 *NUMAResource + n1.Canonicalize() + must.Nil(t, n1) + + var n2 = &NUMAResource{Affinity: ""} + n2.Canonicalize() + must.Eq(t, &NUMAResource{Affinity: "none"}, n2) +} diff --git a/client/allocrunner/taskrunner/task_runner.go b/client/allocrunner/taskrunner/task_runner.go index 1747601885a8..bad5a6cd21e8 100644 --- a/client/allocrunner/taskrunner/task_runner.go +++ b/client/allocrunner/taskrunner/task_runner.go @@ -1163,7 +1163,7 @@ func (tr *TaskRunner) buildTaskConfig() *drivers.TaskConfig { MemoryLimitBytes: memoryLimit * 1024 * 1024, CPUShares: taskResources.Cpu.CpuShares, CpusetCpus: strings.Join(cpusetCpus, ","), - PercentTicks: float64(taskResources.Cpu.CpuShares) / float64(tr.clientConfig.Node.NodeResources.Cpu.CpuShares), + PercentTicks: float64(taskResources.Cpu.CpuShares) / float64(tr.clientConfig.Node.NodeResources.Processors.Topology.UsableCompute()), }, Ports: &ports, }, diff --git a/client/client.go b/client/client.go index 1f94e7451f6b..69ba1a305c54 100644 --- a/client/client.go +++ b/client/client.go @@ -1505,7 +1505,7 @@ func (c *Client) setupNode() error { node.NodeResources = &structs.NodeResources{} node.NodeResources.MinDynamicPort = newConfig.MinDynamicPort node.NodeResources.MaxDynamicPort = newConfig.MaxDynamicPort - node.NodeResources.Cpu = newConfig.Node.NodeResources.Cpu + node.NodeResources.Processors = newConfig.Node.NodeResources.Processors } if node.ReservedResources == nil { node.ReservedResources = &structs.NodeReservedResources{} @@ -1642,19 +1642,6 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp } } - // COMPAT(0.10): Remove in 0.10 - // update the response networks with the config - // if we still have node changes, merge them - if response.Resources != nil { - response.Resources.Networks = updateNetworks( - response.Resources.Networks, - newConfig) - if !newConfig.Node.Resources.Equal(response.Resources) { - newConfig.Node.Resources.Merge(response.Resources) - nodeHasChanged = true - } - } - // update the response networks with the config // if we still have node changes, merge them if response.NodeResources != nil { @@ -1672,7 +1659,7 @@ func (c *Client) updateNodeFromFingerprint(response *fingerprint.FingerprintResp } // update config with total cpu compute if it was detected - if cpu := int(response.NodeResources.Cpu.CpuShares); cpu > 0 { + if cpu := response.NodeResources.Processors.TotalCompute(); cpu > 0 { newConfig.CpuCompute = cpu } } @@ -3248,7 +3235,7 @@ func (c *Client) setGaugeForAllocationStats(nodeID string, baseLabels []metrics. // Emit unallocated unallocatedMem := total.Memory.MemoryMB - res.Memory.MemoryMB - allocated.Flattened.Memory.MemoryMB unallocatedDisk := total.Disk.DiskMB - res.Disk.DiskMB - allocated.Shared.DiskMB - unallocatedCpu := total.Cpu.CpuShares - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares + unallocatedCpu := int64(total.Processors.Topology.UsableCompute()) - res.Cpu.CpuShares - allocated.Flattened.Cpu.CpuShares metrics.SetGaugeWithLabels([]string{"client", "unallocated", "memory"}, float32(unallocatedMem), baseLabels) metrics.SetGaugeWithLabels([]string{"client", "unallocated", "disk"}, float32(unallocatedDisk), baseLabels) @@ -3355,8 +3342,7 @@ func (c *Client) getAllocatedResources(selfNode *structs.Node) *structs.Comparab } // Add the resources - // COMPAT(0.11): Just use the allocated resources - allocated.Add(alloc.ComparableResources()) + allocated.Add(alloc.AllocatedResources.Comparable()) // Add the used network if alloc.AllocatedResources != nil { diff --git a/client/client_test.go b/client/client_test.go index 826f9df80169..ee72c6dcd0c3 100644 --- a/client/client_test.go +++ b/client/client_test.go @@ -14,12 +14,6 @@ import ( "time" memdb "github.com/hashicorp/go-memdb" - "github.com/shoenig/test" - "github.com/shoenig/test/must" - "github.com/shoenig/test/wait" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" - "github.com/hashicorp/nomad/ci" "github.com/hashicorp/nomad/client/allocrunner" "github.com/hashicorp/nomad/client/allocrunner/interfaces" @@ -28,6 +22,7 @@ import ( "github.com/hashicorp/nomad/client/fingerprint" regMock "github.com/hashicorp/nomad/client/serviceregistration/mock" cstate "github.com/hashicorp/nomad/client/state" + ctestutil "github.com/hashicorp/nomad/client/testutil" "github.com/hashicorp/nomad/command/agent/consul" "github.com/hashicorp/nomad/helper/pluginutils/catalog" "github.com/hashicorp/nomad/helper/pluginutils/singleton" @@ -40,6 +35,11 @@ import ( "github.com/hashicorp/nomad/plugins/device" psstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/hashicorp/nomad/testutil" + "github.com/shoenig/test" + "github.com/shoenig/test/must" + "github.com/shoenig/test/wait" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func testACLServer(t *testing.T, cb func(*nomad.Config)) (*nomad.Server, string, *structs.ACLToken, func()) { @@ -1358,8 +1358,11 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { defer cleanup() client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ + // overrides the detected hardware in TestClient NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, + Processors: structs.NodeProcessorResources{ + Topology: structs.MockBasicTopology(), + }, }, }) @@ -1385,8 +1388,11 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { Disk: conf.Node.NodeResources.Disk, // injected - Cpu: structs.NodeCpuResources{ - CpuShares: 123, + Processors: structs.NodeProcessorResources{ + Topology: structs.MockBasicTopology(), + }, + Cpu: structs.LegacyNodeCpuResources{ + CpuShares: 14_000, // mock has 4 cores * 3500 MHz ReservableCpuCores: conf.Node.NodeResources.Cpu.ReservableCpuCores, TotalCpuCores: conf.Node.NodeResources.Cpu.TotalCpuCores, }, @@ -1399,7 +1405,7 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { }, } - assert.EqualValues(t, expectedResources, conf.Node.NodeResources) + must.Eq(t, expectedResources, conf.Node.NodeResources) // overrides of values @@ -1429,8 +1435,11 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { Disk: conf.Node.NodeResources.Disk, // injected - Cpu: structs.NodeCpuResources{ - CpuShares: 123, + Processors: structs.NodeProcessorResources{ + Topology: structs.MockBasicTopology(), + }, + Cpu: structs.LegacyNodeCpuResources{ + CpuShares: 14_000, // mock has 4 cores * 3500 MHz ReservableCpuCores: conf.Node.NodeResources.Cpu.ReservableCpuCores, TotalCpuCores: conf.Node.NodeResources.Cpu.TotalCpuCores, }, @@ -1454,10 +1463,8 @@ func TestClient_UpdateNodeFromDevicesAccumulates(t *testing.T) { // TestClient_UpdateNodeFromFingerprintKeepsConfig asserts manually configured // network interfaces take precedence over fingerprinted ones. func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { + ctestutil.RequireLinux(t) ci.Parallel(t) - if runtime.GOOS != "linux" { - t.Skip("assertions assume linux platform") - } // Client without network configured updates to match fingerprint client, cleanup := TestClient(t, nil) @@ -1465,24 +1472,18 @@ func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, Networks: []*structs.NetworkResource{{Mode: "host", Device: "any-interface"}}, }, - Resources: &structs.Resources{ - CPU: 80, - }, }) idx := len(client.config.Node.NodeResources.Networks) - 1 - require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) - require.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[idx].Device) - require.Equal(t, 80, client.config.Node.Resources.CPU) + must.Eq(t, "any-interface", client.config.Node.NodeResources.Networks[idx].Device) // lookup an interface. client.Node starts with a hardcoded value, eth0, // and is only updated async through fingerprinter. // Let's just lookup network device; anyone will do for this test interfaces, err := net.Interfaces() - require.NoError(t, err) - require.NotEmpty(t, interfaces) + must.NoError(t, err) + must.NotNil(t, interfaces) dev := interfaces[0].Name // Client with network interface configured keeps the config @@ -1498,36 +1499,31 @@ func TestClient_UpdateNodeFromFingerprintKeepsConfig(t *testing.T) { defer cleanup() client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, Networks: []*structs.NetworkResource{ {Mode: "host", Device: "any-interface", MBits: 20}, }, }, }) - require.Equal(t, int64(123), client.config.Node.NodeResources.Cpu.CpuShares) + // only the configured device is kept - require.Equal(t, 2, len(client.config.Node.NodeResources.Networks)) - require.Equal(t, dev, client.config.Node.NodeResources.Networks[0].Device) - require.Equal(t, "bridge", client.config.Node.NodeResources.Networks[1].Mode) + must.Eq(t, 2, len(client.config.Node.NodeResources.Networks)) + must.Eq(t, dev, client.config.Node.NodeResources.Networks[0].Device) + must.Eq(t, "bridge", client.config.Node.NodeResources.Networks[1].Mode) // Network speed is applied to all NetworkResources client.config.NetworkInterface = "" client.config.NetworkSpeed = 100 client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, Networks: []*structs.NetworkResource{ {Mode: "host", Device: "any-interface", MBits: 20}, }, }, - Resources: &structs.Resources{ - CPU: 80, - }, }) - assert.Equal(t, 3, len(client.config.Node.NodeResources.Networks)) - assert.Equal(t, "any-interface", client.config.Node.NodeResources.Networks[2].Device) - assert.Equal(t, 100, client.config.Node.NodeResources.Networks[2].MBits) - assert.Equal(t, 0, client.config.Node.NodeResources.Networks[1].MBits) + must.Eq(t, 3, len(client.config.Node.NodeResources.Networks)) + must.Eq(t, "any-interface", client.config.Node.NodeResources.Networks[2].Device) + must.Eq(t, 100, client.config.Node.NodeResources.Networks[2].MBits) + must.Eq(t, 0, client.config.Node.NodeResources.Networks[1].MBits) } // Support multiple IP addresses (ipv4 vs. 6, e.g.) on the configured network interface @@ -1546,13 +1542,11 @@ func Test_UpdateNodeFromFingerprintMultiIP(t *testing.T) { client, cleanup := TestClient(t, func(c *config.Config) { c.NetworkInterface = dev c.Options["fingerprint.denylist"] = "network,cni,bridge" - c.Node.Resources.Networks = c.Node.NodeResources.Networks }) defer cleanup() client.updateNodeFromFingerprint(&fingerprint.FingerprintResponse{ NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 123}, Networks: []*structs.NetworkResource{ {Device: dev, IP: "127.0.0.1"}, {Device: dev, IP: "::1"}, @@ -1566,7 +1560,7 @@ func Test_UpdateNodeFromFingerprintMultiIP(t *testing.T) { {Device: dev, IP: "::1"}, } - require.Equal(t, nets, client.config.Node.NodeResources.Networks) + must.Eq(t, nets, client.config.Node.NodeResources.Networks) } func TestClient_computeAllocatedDeviceStats(t *testing.T) { diff --git a/client/fingerprint/cpu.go b/client/fingerprint/cpu.go index 274514a4064f..650d707833bb 100644 --- a/client/fingerprint/cpu.go +++ b/client/fingerprint/cpu.go @@ -12,7 +12,6 @@ import ( "github.com/hashicorp/nomad/client/lib/idset" "github.com/hashicorp/nomad/client/lib/numalib" "github.com/hashicorp/nomad/client/lib/numalib/hw" - "github.com/hashicorp/nomad/helper" "github.com/hashicorp/nomad/nomad/structs" "github.com/klauspost/cpuid/v2" ) @@ -21,10 +20,11 @@ import ( type CPUFingerprint struct { StaticFingerprinter logger hclog.Logger - top *numalib.Topology - // accumulates result in these resource structs - resources *structs.Resources + // builds this topology + top *numalib.Topology + + // sets the built topology for this resources response nodeResources *structs.NodeResources } @@ -32,7 +32,6 @@ type CPUFingerprint struct { func NewCPUFingerprint(logger hclog.Logger) Fingerprint { return &CPUFingerprint{ logger: logger.Named("cpu"), - resources: new(structs.Resources), // COMPAT (to be removed after 0.10) nodeResources: new(structs.NodeResources), } } @@ -148,7 +147,6 @@ func (f *CPUFingerprint) setCoreCount(response *FingerprintResponse) { response.AddAttribute("cpu.numcores", f.cores(total)) f.logger.Debug("detected CPU core count", "cores", total) } - f.nodeResources.Cpu.TotalCpuCores = uint16(total) } func (f *CPUFingerprint) setReservableCores(response *FingerprintResponse) { @@ -157,10 +155,6 @@ func (f *CPUFingerprint) setReservableCores(response *FingerprintResponse) { // topology has already reduced to the intersection of usable cores usable := f.top.UsableCores() response.AddAttribute("cpu.reservablecores", f.cores(usable.Size())) - f.nodeResources.Cpu.ReservableCpuCores = helper.ConvertSlice( - usable.Slice(), func(id hw.CoreID) uint16 { - return uint16(id) - }) default: response.AddAttribute("cpu.reservablecores", "0") } @@ -172,14 +166,6 @@ func (f *CPUFingerprint) setTotalCompute(response *FingerprintResponse) { response.AddAttribute("cpu.totalcompute", f.frequency(totalCompute)) response.AddAttribute("cpu.usablecompute", f.frequency(usableCompute)) - - f.resources.CPU = int(totalCompute) - f.nodeResources.Cpu.CpuShares = int64(totalCompute) -} - -func (f *CPUFingerprint) setResponseResources(response *FingerprintResponse) { - response.Resources = f.resources - response.NodeResources = f.nodeResources } func (f *CPUFingerprint) setNUMA(response *FingerprintResponse) { @@ -197,3 +183,9 @@ func (f *CPUFingerprint) setNUMA(response *FingerprintResponse) { return nil }) } + +func (f *CPUFingerprint) setResponseResources(response *FingerprintResponse) { + f.nodeResources.Processors = structs.NewNodeProcessorResources(f.top) + f.nodeResources.Compatibility() + response.NodeResources = f.nodeResources +} diff --git a/client/fingerprint/cpu_default_test.go b/client/fingerprint/cpu_default_test.go index e3c873814fe3..c756305e996e 100644 --- a/client/fingerprint/cpu_default_test.go +++ b/client/fingerprint/cpu_default_test.go @@ -40,10 +40,9 @@ func TestCPUFingerprint_Classic(t *testing.T) { must.MapContainsKey(t, attributes, "cpu.numcores") must.MapContainsKey(t, attributes, "cpu.modelname") must.MapContainsKey(t, attributes, "cpu.totalcompute") - must.Positive(t, response.Resources.CPU) - must.Positive(t, response.NodeResources.Cpu.CpuShares) - must.Positive(t, response.NodeResources.Cpu.SharesPerCore()) - must.SliceNotEmpty(t, response.NodeResources.Cpu.ReservableCpuCores) + must.Positive(t, response.NodeResources.Processors.Topology.UsableCompute()) + must.Positive(t, response.NodeResources.Processors.Topology.NumCores()) + must.NotEmpty(t, response.NodeResources.Processors.Topology.UsableCores()) _, frequencyPresent := attributes["cpu.frequency"] _, performancePresent := attributes["cpu.frequency.performance"] @@ -65,7 +64,7 @@ func TestCPUFingerprint_OverrideCompute(t *testing.T) { cfg := &config.Config{ ReservableCores: []hw.CoreID{0, 1, 2}, } - var originalCPU int + var originalCompute int { request := &FingerprintRequest{Config: cfg, Node: node} @@ -75,13 +74,13 @@ func TestCPUFingerprint_OverrideCompute(t *testing.T) { must.True(t, response.Detected) must.Eq(t, "3", response.Attributes["cpu.reservablecores"], must.Sprint("override of cpu.reservablecores is incorrect")) - must.Positive(t, response.Resources.CPU) - originalCPU = response.Resources.CPU + originalCompute := response.NodeResources.Processors.Topology.UsableCompute() + must.Positive(t, originalCompute) } { // Override it with a setting - cfg.CpuCompute = originalCPU + 123 + cfg.CpuCompute = originalCompute + 123 // Make sure the Fingerprinter applies the override to the node resources request := &FingerprintRequest{Config: cfg, Node: node} @@ -89,8 +88,10 @@ func TestCPUFingerprint_OverrideCompute(t *testing.T) { err := f.Fingerprint(request, &response) must.NoError(t, err) - // COMPAT(0.10): Remove in 0.10 - must.Eq(t, cfg.CpuCompute, response.Resources.CPU, must.Sprint("cpu override did not take affect")) + // topology struct + must.Eq(t, hw.MHz(cfg.CpuCompute), response.NodeResources.Processors.Topology.UsableCompute(), must.Sprint("cpu override did not take affect")) + + // legacy struct must.Eq(t, int64(cfg.CpuCompute), response.NodeResources.Cpu.CpuShares, must.Sprint("cpu override did not take affect")) must.Eq(t, strconv.Itoa(cfg.CpuCompute), response.Attributes["cpu.totalcompute"], must.Sprint("cpu override did not take affect")) must.Eq(t, "3", response.Attributes["cpu.reservablecores"], must.Sprint("cpu override did not take affect")) diff --git a/client/fingerprint/memory.go b/client/fingerprint/memory.go index 1a37a5c2b24d..0906916b2de9 100644 --- a/client/fingerprint/memory.go +++ b/client/fingerprint/memory.go @@ -47,12 +47,6 @@ func (f *MemoryFingerprint) Fingerprint(req *FingerprintRequest, resp *Fingerpri resp.AddAttribute("memory.totalbytes", fmt.Sprintf("%d", totalMemory)) memoryMB := totalMemory / bytesInMB - - // COMPAT(0.10): Unused since 0.9. - resp.Resources = &structs.Resources{ - MemoryMB: int(memoryMB), - } - resp.NodeResources = &structs.NodeResources{ Memory: structs.NodeMemoryResources{ MemoryMB: memoryMB, diff --git a/client/fingerprint/memory_test.go b/client/fingerprint/memory_test.go index 9994b6301e7e..58a57a2fcb7f 100644 --- a/client/fingerprint/memory_test.go +++ b/client/fingerprint/memory_test.go @@ -10,15 +10,12 @@ import ( "github.com/hashicorp/nomad/client/config" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/nomad/structs" - - "github.com/stretchr/testify/require" + "github.com/shoenig/test/must" ) func TestMemoryFingerprint(t *testing.T) { ci.Parallel(t) - require := require.New(t) - f := NewMemoryFingerprint(testlog.HCLogger(t)) node := &structs.Node{ Attributes: make(map[string]string), @@ -27,13 +24,10 @@ func TestMemoryFingerprint(t *testing.T) { request := &FingerprintRequest{Config: &config.Config{}, Node: node} var response FingerprintResponse err := f.Fingerprint(request, &response) - require.NoError(err) + must.NoError(t, err) assertNodeAttributeContains(t, response.Attributes, "memory.totalbytes") - require.NotNil(response.Resources, "expected response Resources to not be nil") - require.NotZero(response.Resources.MemoryMB, "expected memory to be non-zero") - require.NotNil(response.NodeResources, "expected response NodeResources to not be nil") - require.NotZero(response.NodeResources.Memory.MemoryMB, "expected memory to be non-zero") + must.Positive(t, response.NodeResources.Memory.MemoryMB) } func TestMemoryFingerprint_Override(t *testing.T) { @@ -53,9 +47,5 @@ func TestMemoryFingerprint_Override(t *testing.T) { } assertNodeAttributeContains(t, response.Attributes, "memory.totalbytes") - require := require.New(t) - require.NotNil(response.Resources) - require.EqualValues(response.Resources.MemoryMB, memoryMB) - require.NotNil(response.NodeResources) - require.EqualValues(response.NodeResources.Memory.MemoryMB, memoryMB) + must.Eq(t, response.NodeResources.Memory.MemoryMB, int64(memoryMB)) } diff --git a/client/fingerprint/network.go b/client/fingerprint/network.go index d9e9f6cb5c1c..fdcf2f9bc350 100644 --- a/client/fingerprint/network.go +++ b/client/fingerprint/network.go @@ -105,11 +105,6 @@ func (f *NetworkFingerprint) Fingerprint(req *FingerprintRequest, resp *Fingerpr return err } - // COMPAT(0.10): Remove in 0.10 - resp.Resources = &structs.Resources{ - Networks: nwResources, - } - resp.NodeResources = &structs.NodeResources{ Networks: nwResources, } diff --git a/client/fingerprint/network_test.go b/client/fingerprint/network_test.go index 99f29702de17..cf428d097386 100644 --- a/client/fingerprint/network_test.go +++ b/client/fingerprint/network_test.go @@ -222,12 +222,12 @@ func TestNetworkFingerprint_basic(t *testing.T) { t.Fatalf("Bad IP match: %s", ip) } - if response.Resources == nil || len(response.Resources.Networks) == 0 { + if len(response.NodeResources.Networks) == 0 { t.Fatal("Expected to find Network Resources") } // Test at least the first Network Resource - net := response.Resources.Networks[0] + net := response.NodeResources.Networks[0] if net.IP == "" { t.Fatal("Expected Network Resource to not be empty") } @@ -300,12 +300,12 @@ func TestNetworkFingerPrint_default_device(t *testing.T) { t.Fatalf("Bad IP match: %s", ip) } - if response.Resources == nil || len(response.Resources.Networks) == 0 { + if len(response.NodeResources.Networks) == 0 { t.Fatal("Expected to find Network Resources") } // Test at least the first Network Resource - net := response.Resources.Networks[0] + net := response.NodeResources.Networks[0] if net.IP == "" { t.Fatal("Expected Network Resource to not be empty") } @@ -349,12 +349,12 @@ func TestNetworkFingerPrint_LinkLocal_Allowed(t *testing.T) { t.Fatalf("Bad IP match: %s", ip) } - if response.Resources == nil || len(response.Resources.Networks) == 0 { + if len(response.NodeResources.Networks) == 0 { t.Fatal("Expected to find Network Resources") } // Test at least the first Network Resource - net := response.Resources.Networks[0] + net := response.NodeResources.Networks[0] if net.IP == "" { t.Fatal("Expected Network Resource to not be empty") } @@ -402,12 +402,12 @@ func TestNetworkFingerPrint_LinkLocal_Allowed_MixedIntf(t *testing.T) { t.Fatalf("Bad IP match: %s", ip) } - if response.Resources == nil || len(response.Resources.Networks) == 0 { + if len(response.NodeResources.Networks) == 0 { t.Fatal("Expected to find Network Resources") } // Test at least the first Network Resource - net := response.Resources.Networks[0] + net := response.NodeResources.Networks[0] if net.IP == "" { t.Fatal("Expected Network Resource to not be empty") } diff --git a/client/fingerprint/storage.go b/client/fingerprint/storage.go index 7ac0c733e979..6492ba0e633b 100644 --- a/client/fingerprint/storage.go +++ b/client/fingerprint/storage.go @@ -60,10 +60,6 @@ func (f *StorageFingerprint) Fingerprint(req *FingerprintRequest, resp *Fingerpr resp.AddAttribute("unique.storage.bytesfree", strconv.FormatUint(free, 10)) // set the disk size for the response - // COMPAT(0.10): Remove in 0.10 - resp.Resources = &structs.Resources{ - DiskMB: int(free / bytesPerMegabyte), - } resp.NodeResources = &structs.NodeResources{ Disk: structs.NodeDiskResources{ DiskMB: int64(free / bytesPerMegabyte), diff --git a/client/fingerprint/storage_test.go b/client/fingerprint/storage_test.go index ac46a318d94e..8f050f25a97a 100644 --- a/client/fingerprint/storage_test.go +++ b/client/fingerprint/storage_test.go @@ -43,14 +43,6 @@ func TestStorageFingerprint(t *testing.T) { t.Fatalf("unique.storage.bytesfree %d is larger than unique.storage.bytestotal %d", free, total) } - // COMPAT(0.10): Remove in 0.10 - if response.Resources == nil { - t.Fatalf("Node Resources was nil") - } - if response.Resources.DiskMB == 0 { - t.Errorf("Expected node.Resources.DiskMB to be non-zero") - } - if response.NodeResources == nil || response.NodeResources.Disk.DiskMB == 0 { t.Errorf("Expected node.Resources.DiskMB to be non-zero") } diff --git a/client/fingerprint/structs.go b/client/fingerprint/structs.go index f6d330682c23..ca959e81e814 100644 --- a/client/fingerprint/structs.go +++ b/client/fingerprint/structs.go @@ -21,7 +21,6 @@ type FingerprintRequest struct { type FingerprintResponse struct { Attributes map[string]string Links map[string]string - Resources *structs.Resources // COMPAT(0.10): Remove in 0.10 NodeResources *structs.NodeResources // Detected is a boolean indicating whether the fingerprinter detected diff --git a/client/fingerprint_manager_test.go b/client/fingerprint_manager_test.go index 82d1089d0e28..472b12863d36 100644 --- a/client/fingerprint_manager_test.go +++ b/client/fingerprint_manager_test.go @@ -31,9 +31,9 @@ func TestFingerprintManager_Run_ResourcesFingerprint(t *testing.T) { node := testClient.config.Node - must.Positive(t, node.Resources.CPU) - must.Positive(t, node.Resources.MemoryMB) - must.Positive(t, node.Resources.DiskMB) + must.Positive(t, node.NodeResources.Processors.TotalCompute()) + must.Positive(t, node.NodeResources.Memory.MemoryMB) + must.Positive(t, node.NodeResources.Disk.DiskMB) } func TestFimgerprintManager_Run_InWhitelist(t *testing.T) { diff --git a/client/lib/idset/idset.go b/client/lib/idset/idset.go index d9a009fe1035..2ab613c8fca1 100644 --- a/client/lib/idset/idset.go +++ b/client/lib/idset/idset.go @@ -106,6 +106,11 @@ func From[T, U ID](slice []U) *Set[T] { return result } +func (s *Set[T]) Difference(other *Set[T]) *Set[T] { + diff := s.items.Difference(other.items) + return &Set[T]{items: diff.(*set.Set[T])} +} + // Contains returns whether the Set contains item. func (s *Set[T]) Contains(item T) bool { return s.items.Contains(item) @@ -192,3 +197,16 @@ func (s *Set[T]) Empty() bool { } return s.items.Empty() } + +// InsertSlice is used to bludgen a slice of integers into s. +func InsertSlice[T ID, X ~uint16](s *Set[T], items ...X) { + for _, item := range items { + s.Insert(T(item)) + } +} + +// Superset returns true of s is a superset of other. +func (s *Set[T]) Superset(other *Set[T]) bool { + // todo(shoenig) optimize + return s.items.ContainsSlice(other.items.Slice()) +} diff --git a/client/lib/numalib/hw/ids.go b/client/lib/numalib/hw/ids.go index 811df0e4b286..4d3566ef8042 100644 --- a/client/lib/numalib/hw/ids.go +++ b/client/lib/numalib/hw/ids.go @@ -10,7 +10,9 @@ package hw type ( // A NodeID represents a NUMA node. There could be more than // one NUMA node per socket. - NodeID uint8 + // + // Must be an alias because go-msgpack cannot handle the real type. + NodeID = uint8 // A SocketID represents a physicsl CPU socket. SocketID uint8 diff --git a/client/lib/numalib/topology.go b/client/lib/numalib/topology.go index f8dcc64e3cfb..d7efa700b11d 100644 --- a/client/lib/numalib/topology.go +++ b/client/lib/numalib/topology.go @@ -158,6 +158,9 @@ func (st *Topology) String() string { // value is used instead even if it violates the above invariant. func (st *Topology) TotalCompute() hw.MHz { if st.OverrideTotalCompute > 0 { + // TODO(shoenig) Starting in Nomad 1.7 we should warn about setting + // cpu_total_compute override, and suggeset users who think they still + // need this to file a bug so we can understand what is not detectable. return st.OverrideTotalCompute } @@ -173,13 +176,23 @@ func (st *Topology) TotalCompute() hw.MHz { // the TotalCompute of the system. Nomad must subtract off any reserved compute // (reserved.cpu or reserved.cores) from the total hardware compute. func (st *Topology) UsableCompute() hw.MHz { + if st.OverrideTotalCompute > 0 { + // TODO(shoenig) Starting in Nomad 1.7 we should warn about setting + // cpu_total_compute override, and suggeset users who think they still + // need this to file a bug so we can understand what is not detectable. + return st.OverrideTotalCompute + } + var total hw.MHz for _, cpu := range st.Cores { + // only use cores allowable by config if !cpu.Disable { total += cpu.MHz() } } - return total + + // only use compute allowable by config + return total - st.OverrideWitholdCompute } // NumCores returns the number of logical cores detected. This includes both @@ -244,3 +257,12 @@ func (st *Topology) Compute() cpustats.Compute { NumCores: st.NumCores(), } } + +func (st *Topology) Equal(o *Topology) bool { + if st == nil || o == nil { + return st == o + } + // simply iterates each core; the topology never changes for a node once + // it has been created at agent startup + return st.TotalCompute() == o.TotalCompute() +} diff --git a/client/taskenv/env.go b/client/taskenv/env.go index 51d5a887474e..845c7c52e099 100644 --- a/client/taskenv/env.go +++ b/client/taskenv/env.go @@ -12,10 +12,10 @@ import ( "strings" "sync" + "github.com/hashicorp/nomad/client/lib/idset" "github.com/hashicorp/nomad/helper" hargs "github.com/hashicorp/nomad/helper/args" "github.com/hashicorp/nomad/helper/escapingfs" - "github.com/hashicorp/nomad/lib/cpuset" "github.com/hashicorp/nomad/nomad/structs" "github.com/hashicorp/nomad/plugins/drivers" "github.com/zclconf/go-cty/cty" @@ -773,7 +773,7 @@ func (b *Builder) setAlloc(alloc *structs.Allocation) *Builder { // Populate task resources if tr, ok := alloc.AllocatedResources.Tasks[b.taskName]; ok { b.cpuLimit = tr.Cpu.CpuShares - b.cpuCores = cpuset.New(tr.Cpu.ReservedCores...).String() + b.cpuCores = idset.From[uint16](tr.Cpu.ReservedCores).String() b.memLimit = tr.Memory.MemoryMB b.memMaxLimit = tr.Memory.MemoryMaxMB diff --git a/command/agent/job_endpoint.go b/command/agent/job_endpoint.go index 2821020dc9af..a048f210f579 100644 --- a/command/agent/job_endpoint.go +++ b/command/agent/job_endpoint.go @@ -1422,6 +1422,12 @@ func ApiResourcesToStructs(in *api.Resources) *structs.Resources { } } + if in.NUMA != nil { + out.NUMA = &structs.NUMA{ + Affinity: in.NUMA.Affinity, + } + } + return out } diff --git a/command/agent/job_endpoint_test.go b/command/agent/job_endpoint_test.go index 364e977a0cd6..06bbaded8c4e 100644 --- a/command/agent/job_endpoint_test.go +++ b/command/agent/job_endpoint_test.go @@ -3732,6 +3732,23 @@ func TestConversion_apiResourcesToStructs(t *testing.T) { MemoryMaxMB: 300, }, }, + { + "with numa", + &api.Resources{ + CPU: pointer.Of(100), + MemoryMB: pointer.Of(200), + NUMA: &api.NUMAResource{ + Affinity: "prefer", + }, + }, + &structs.Resources{ + CPU: 100, + MemoryMB: 200, + NUMA: &structs.NUMA{ + Affinity: "prefer", + }, + }, + }, } for _, c := range cases { diff --git a/command/node_status.go b/command/node_status.go index 84ab80f0da94..df1d9aaceb5b 100644 --- a/command/node_status.go +++ b/command/node_status.go @@ -932,14 +932,12 @@ func getAllocatedResources(client *api.Client, runningAllocs []*api.Allocation, func computeNodeTotalResources(node *api.Node) api.Resources { total := api.Resources{} - r := node.Resources - res := node.Reserved - if res == nil { - res = &api.Resources{} - } - total.CPU = pointer.Of(*r.CPU - *res.CPU) - total.MemoryMB = pointer.Of(*r.MemoryMB - *res.MemoryMB) - total.DiskMB = pointer.Of(*r.DiskMB - *res.DiskMB) + r := node.NodeResources + res := node.ReservedResources + + total.CPU = pointer.Of[int](int(r.Cpu.CpuShares) - int(res.Cpu.CpuShares)) + total.MemoryMB = pointer.Of[int](int(r.Memory.MemoryMB) - int(res.Memory.MemoryMB)) + total.DiskMB = pointer.Of[int](int(r.Disk.DiskMB) - int(res.Disk.DiskMB)) return total } @@ -1001,7 +999,7 @@ func getHostResources(hostStats *api.HostStats, node *api.Node) ([]string, error if physical { resources[1] = fmt.Sprintf("%v/%d MHz|%s/%s|%s/%s", math.Floor(hostStats.CPUTicksConsumed), - *node.Resources.CPU, + node.NodeResources.Cpu.CpuShares, humanize.IBytes(hostStats.Memory.Used), humanize.IBytes(hostStats.Memory.Total), humanize.IBytes(diskUsed), @@ -1012,7 +1010,7 @@ func getHostResources(hostStats *api.HostStats, node *api.Node) ([]string, error // since nomad doesn't collect the stats data. resources[1] = fmt.Sprintf("%v/%d MHz|%s/%s|(%s)", math.Floor(hostStats.CPUTicksConsumed), - *node.Resources.CPU, + node.NodeResources.Cpu.CpuShares, humanize.IBytes(hostStats.Memory.Used), humanize.IBytes(hostStats.Memory.Total), storageDevice, diff --git a/lib/cpuset/cpuset.go b/lib/cpuset/cpuset.go deleted file mode 100644 index 732c238043c1..000000000000 --- a/lib/cpuset/cpuset.go +++ /dev/null @@ -1,199 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: BUSL-1.1 - -package cpuset - -import ( - "fmt" - "math" - "reflect" - "sort" - "strconv" - "strings" -) - -// CPUSet is a set like object that provides methods helpful when working with cpus with systems -// such as the Linux cpuset cgroup subsystem. A CPUSet is immutable and can be safely accessed concurrently. -type CPUSet struct { - cpus map[uint16]struct{} -} - -// New initializes a new CPUSet with 0 or more containing cpus -func New(cpus ...uint16) CPUSet { - cpuset := CPUSet{ - cpus: make(map[uint16]struct{}), - } - - for _, v := range cpus { - cpuset.cpus[v] = struct{}{} - } - - return cpuset -} - -// Copy returns a deep copy of CPUSet c. -func (c CPUSet) Copy() CPUSet { - cpus := make(map[uint16]struct{}, len(c.cpus)) - for k := range c.cpus { - cpus[k] = struct{}{} - } - return CPUSet{ - cpus: cpus, - } -} - -// String returns the cpuset as a comma delimited set of core values and ranged -func (c CPUSet) String() string { - if c.Size() == 0 { - return "" - } - cores := c.ToSlice() - cpusetStrs := []string{} - cur := [2]uint16{cores[0], cores[0]} - for i := 1; i < len(cores); i++ { - if cores[i] == cur[1]+1 { - cur[1] = cores[i] - continue - } - - if cur[0] == cur[1] { - cpusetStrs = append(cpusetStrs, fmt.Sprintf("%d", cur[0])) - } else { - cpusetStrs = append(cpusetStrs, fmt.Sprintf("%d-%d", cur[0], cur[1])) - } - - // new range - cur = [2]uint16{cores[i], cores[i]} - } - if cur[0] == cur[1] { - cpusetStrs = append(cpusetStrs, fmt.Sprintf("%d", cur[0])) - } else { - cpusetStrs = append(cpusetStrs, fmt.Sprintf("%d-%d", cur[0], cur[1])) - } - - return strings.Join(cpusetStrs, ",") -} - -// Size returns to the number of cpus contained in the CPUSet -func (c CPUSet) Size() int { - return len(c.cpus) -} - -// ToSlice returns a sorted slice of uint16 CPU IDs contained in the CPUSet. -func (c CPUSet) ToSlice() []uint16 { - cpus := []uint16{} - for k := range c.cpus { - cpus = append(cpus, k) - } - sort.Slice(cpus, func(i, j int) bool { return cpus[i] < cpus[j] }) - return cpus -} - -// Union returns a new set that is the union of this CPUSet and the supplied other. -// Ex. [0,1,2,3].Union([2,3,4,5]) = [0,1,2,3,4,5] -func (c CPUSet) Union(other CPUSet) CPUSet { - s := New() - for k := range c.cpus { - s.cpus[k] = struct{}{} - } - for k := range other.cpus { - s.cpus[k] = struct{}{} - } - return s -} - -// Difference returns a new set that is the difference of this CPUSet and the supplied other. -// [0,1,2,3].Difference([2,3,4]) = [0,1] -func (c CPUSet) Difference(other CPUSet) CPUSet { - s := New() - for k := range c.cpus { - s.cpus[k] = struct{}{} - } - for k := range other.cpus { - delete(s.cpus, k) - } - return s - -} - -// IsSubsetOf returns true if all cpus of the this CPUSet are present in the other CPUSet. -func (c CPUSet) IsSubsetOf(other CPUSet) bool { - for cpu := range c.cpus { - if _, ok := other.cpus[cpu]; !ok { - return false - } - } - return true -} - -func (c CPUSet) IsSupersetOf(other CPUSet) bool { - for cpu := range other.cpus { - if _, ok := c.cpus[cpu]; !ok { - return false - } - } - return true -} - -// ContainsAny returns true if any cpus in other CPUSet are present -func (c CPUSet) ContainsAny(other CPUSet) bool { - for cpu := range other.cpus { - if _, ok := c.cpus[cpu]; ok { - return true - } - } - return false -} - -// Equal tests the equality of the elements in the CPUSet -func (c CPUSet) Equal(other CPUSet) bool { - return reflect.DeepEqual(c.cpus, other.cpus) -} - -// Parse parses the Linux cpuset format into a CPUSet -// -// Ref: http://man7.org/linux/man-pages/man7/cpuset.7.html#FORMATS -func Parse(s string) (CPUSet, error) { - cpuset := New() - s = strings.TrimSpace(s) - if s == "" { - return cpuset, nil - } - sets := strings.Split(s, ",") - for _, set := range sets { - bounds := strings.Split(set, "-") - if len(bounds) == 1 { - v, err := strconv.Atoi(bounds[0]) - if err != nil { - return New(), err - } - - if v > math.MaxUint16 { - return New(), fmt.Errorf("failed to parse element %s, more than max allowed cores", set) - } - cpuset.cpus[uint16(v)] = struct{}{} - continue - } - if len(bounds) > 2 { - return New(), fmt.Errorf("failed to parse element %s, more than 1 '-' found", set) - } - - lower, err := strconv.Atoi(bounds[0]) - if err != nil { - return New(), err - } - upper, err := strconv.Atoi(bounds[1]) - if err != nil { - return New(), err - } - - for v := lower; v <= upper; v++ { - if v > math.MaxUint16 { - return New(), fmt.Errorf("failed to parse element %s, more than max allowed cores", set) - } - cpuset.cpus[uint16(v)] = struct{}{} - } - } - - return cpuset, nil -} diff --git a/lib/cpuset/cpuset_test.go b/lib/cpuset/cpuset_test.go deleted file mode 100644 index 839a3409afc3..000000000000 --- a/lib/cpuset/cpuset_test.go +++ /dev/null @@ -1,240 +0,0 @@ -// Copyright (c) HashiCorp, Inc. -// SPDX-License-Identifier: BUSL-1.1 - -package cpuset - -import ( - "testing" - - "github.com/hashicorp/nomad/ci" - "github.com/stretchr/testify/require" -) - -func TestCPUSet_Size(t *testing.T) { - ci.Parallel(t) - - set := New(0, 1, 2, 3) - require.Equal(t, 4, set.Size()) - require.Equal(t, 0, New().Size()) -} - -func TestCPUSet_ToSlice(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - desc string - in CPUSet - out []uint16 - }{ - { - "empty cpuset", - New(), - []uint16{}, - }, - { - "in order", - New(0, 1, 2, 3, 4, 5, 6, 7), - []uint16{0, 1, 2, 3, 4, 5, 6, 7}, - }, - { - "out of order", - New(3, 1, 2, 0), - []uint16{0, 1, 2, 3}, - }, - } - - for _, c := range cases { - require.Exactly(t, c.out, c.in.ToSlice(), c.desc) - } -} - -func TestCPUSet_Equal(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - shouldEqual bool - }{ - {New(), New(), true}, - {New(5), New(5), true}, - {New(1, 2, 3, 4, 5), New(1, 2, 3, 4, 5), true}, - - {New(), New(5), false}, - {New(5), New(), false}, - {New(), New(1, 2, 3, 4, 5), false}, - {New(1, 2, 3, 4, 5), New(), false}, - {New(5), New(1, 2, 3, 4, 5), false}, - {New(1, 2, 3, 4, 5), New(5), false}, - } - - for _, c := range cases { - require.Equal(t, c.shouldEqual, c.a.Equal(c.b)) - } -} - -func TestCPUSet_Union(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - expected CPUSet - }{ - {New(), New(), New()}, - - {New(), New(0), New(0)}, - {New(0), New(), New(0)}, - {New(0), New(0), New(0)}, - - {New(), New(0, 1, 2, 3), New(0, 1, 2, 3)}, - {New(0, 1), New(0, 1, 2, 3), New(0, 1, 2, 3)}, - {New(2, 3), New(4, 5), New(2, 3, 4, 5)}, - {New(3, 4), New(0, 1, 2, 3), New(0, 1, 2, 3, 4)}, - } - - for _, c := range cases { - require.Exactly(t, c.expected.ToSlice(), c.a.Union(c.b).ToSlice()) - } -} - -func TestCPUSet_Difference(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - expected CPUSet - }{ - {New(), New(), New()}, - - {New(), New(0), New()}, - {New(0), New(), New(0)}, - {New(0), New(0), New()}, - - {New(0, 1), New(0, 1, 2, 3), New()}, - {New(2, 3), New(4, 5), New(2, 3)}, - {New(3, 4), New(0, 1, 2, 3), New(4)}, - } - - for _, c := range cases { - require.Exactly(t, c.expected.ToSlice(), c.a.Difference(c.b).ToSlice()) - } -} - -func TestCPUSet_IsSubsetOf(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - isSubset bool - }{ - {New(0), New(0), true}, - {New(), New(0), true}, - {New(0), New(), false}, - {New(1, 2), New(0, 1, 2, 3), true}, - {New(2, 1), New(0, 1, 2, 3), true}, - {New(3, 4), New(0, 1, 2, 3), false}, - } - - for _, c := range cases { - require.Equal(t, c.isSubset, c.a.IsSubsetOf(c.b)) - } -} - -func TestCPUSet_IsSupersetOf(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - isSuperset bool - }{ - {New(0), New(0), true}, - {New(0), New(), true}, - {New(), New(0), false}, - {New(0, 1, 2, 3), New(0), true}, - {New(0, 1, 2, 3), New(2, 3), true}, - {New(0, 1, 2, 3), New(2, 3, 4), false}, - } - - for _, c := range cases { - require.Equal(t, c.isSuperset, c.a.IsSupersetOf(c.b)) - } -} - -func TestCPUSet_ContainsAny(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - a CPUSet - b CPUSet - containsAny bool - }{ - {New(0), New(0), true}, - {New(0), New(), false}, - {New(), New(0), false}, - {New(0, 1, 2, 3), New(0), true}, - {New(0, 1, 2, 3), New(2, 3), true}, - {New(0, 1, 2, 3), New(2, 3, 4), true}, - } - - for _, c := range cases { - require.Equal(t, c.containsAny, c.a.ContainsAny(c.b)) - } -} - -func TestParse(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - cpuset string - expected CPUSet - }{ - {"", New()}, - {"\n", New()}, - {"1", New(1)}, - {"1\n", New(1)}, - {"0,1,2,3", New(0, 1, 2, 3)}, - {"0-3", New(0, 1, 2, 3)}, - {"0,2-3,5", New(0, 2, 3, 5)}, - } - - for _, c := range cases { - result, err := Parse(c.cpuset) - require.NoError(t, err) - require.True(t, result.Equal(c.expected)) - } -} - -func TestCPUSet_String(t *testing.T) { - ci.Parallel(t) - - cases := []struct { - cpuset CPUSet - expected string - }{ - {New(), ""}, - {New(0, 1, 2, 3), "0-3"}, - {New(1, 3), "1,3"}, - {New(0, 2, 3, 5), "0,2-3,5"}, - } - - for _, c := range cases { - require.Equal(t, c.expected, c.cpuset.String()) - } -} - -func TestCPUSet_Copy(t *testing.T) { - ci.Parallel(t) - - original := New(1, 2, 3, 4, 5) - copied := original.Copy() - require.True(t, original.Equal(copied)) - - delete(copied.cpus, 3) - require.False(t, original.Equal(copied)) - require.True(t, original.Equal(New(1, 2, 3, 4, 5))) - require.True(t, copied.Equal(New(1, 2, 4, 5))) -} diff --git a/nomad/fsm_test.go b/nomad/fsm_test.go index 3fec797a1765..0b92a0c390b5 100644 --- a/nomad/fsm_test.go +++ b/nomad/fsm_test.go @@ -2426,9 +2426,7 @@ func TestFSM_SnapshotRestore_Nodes(t *testing.T) { fsm2 := testSnapshotRestore(t, fsm) state2 := fsm2.State() out, _ := state2.NodeByID(nil, node.ID) - if !reflect.DeepEqual(node, out) { - t.Fatalf("bad: \n%#v\n%#v", out, node) - } + must.Eq(t, node, out) } func TestFSM_SnapshotRestore_NodePools(t *testing.T) { diff --git a/nomad/job_endpoint.go b/nomad/job_endpoint.go index 95f45254d7fa..ba8ad0f330e9 100644 --- a/nomad/job_endpoint.go +++ b/nomad/job_endpoint.go @@ -76,6 +76,7 @@ func NewJobEndpoints(s *Server, ctx *RPCContext) *Job { jobImpliedConstraints{}, jobNodePoolMutatingHook{srv: s}, jobImplicitIdentitiesHook{srv: s}, + jobNumaHook{}, }, validators: []jobValidator{ jobConnectHook{}, @@ -86,6 +87,7 @@ func NewJobEndpoints(s *Server, ctx *RPCContext) *Job { jobNodePoolValidatingHook{srv: s}, &jobValidate{srv: s}, &memoryOversubscriptionValidate{srv: s}, + jobNumaHook{}, }, } } diff --git a/nomad/job_endpoint_hook_numa.go b/nomad/job_endpoint_hook_numa.go new file mode 100644 index 000000000000..cf8e690ea3e6 --- /dev/null +++ b/nomad/job_endpoint_hook_numa.go @@ -0,0 +1,13 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package nomad + +// jobNumaHook implements a job Validating and Mutating admission controller. +// +// The implementations of Validate and Mutate are in _ce/_ent files. +type jobNumaHook struct{} + +func (jobNumaHook) Name() string { + return "numa" +} diff --git a/nomad/job_endpoint_hook_numa_ce.go b/nomad/job_endpoint_hook_numa_ce.go new file mode 100644 index 000000000000..168409a4c04b --- /dev/null +++ b/nomad/job_endpoint_hook_numa_ce.go @@ -0,0 +1,30 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent + +package nomad + +import ( + "errors" + + "github.com/hashicorp/nomad/nomad/structs" +) + +// Validate ensures job does not contain any task making use of the +// resources.numa block, which is only supported in Nomad Enterprise. +func (jobNumaHook) Validate(job *structs.Job) ([]error, error) { + for _, tg := range job.TaskGroups { + for _, task := range tg.Tasks { + if task.Resources.NUMA.Requested() { + return nil, errors.New("numa scheduling requires Nomad Enterprise") + } + } + } + return nil, nil +} + +// Mutate does nothing. +func (jobNumaHook) Mutate(job *structs.Job) (*structs.Job, []error, error) { + return job, nil, nil +} diff --git a/nomad/job_endpoint_hook_numa_ce_test.go b/nomad/job_endpoint_hook_numa_ce_test.go new file mode 100644 index 000000000000..09f9fa32f7fc --- /dev/null +++ b/nomad/job_endpoint_hook_numa_ce_test.go @@ -0,0 +1,43 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent + +package nomad + +import ( + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/nomad/mock" + "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" +) + +func Test_jobNumaHook_Validate(t *testing.T) { + ci.Parallel(t) + + // ce does not allow numa block + job := mock.Job() + job.TaskGroups[0].Tasks[0].Resources.NUMA = &structs.NUMA{ + Affinity: "require", + } + + hook := jobNumaHook{} + warnings, err := hook.Validate(job) + must.SliceEmpty(t, warnings) + must.EqError(t, err, "numa scheduling requires Nomad Enterprise") +} + +func Test_jobNumaHook_Mutate(t *testing.T) { + ci.Parallel(t) + + // does not get mutated in CE + job := mock.Job() + + hook := jobNumaHook{} + result, warns, err := hook.Mutate(job) + must.NoError(t, err) + must.SliceEmpty(t, warns) + must.Eq(t, job, result) +} diff --git a/nomad/job_endpoint_hook_vault_ce.go b/nomad/job_endpoint_hook_vault_ce.go index 624ad650c206..2bcfa25e9cee 100644 --- a/nomad/job_endpoint_hook_vault_ce.go +++ b/nomad/job_endpoint_hook_vault_ce.go @@ -2,7 +2,6 @@ // SPDX-License-Identifier: BUSL-1.1 //go:build !ent -// +build !ent package nomad diff --git a/nomad/job_endpoint_test.go b/nomad/job_endpoint_test.go index 01608f0508fe..9d5f301e59d8 100644 --- a/nomad/job_endpoint_test.go +++ b/nomad/job_endpoint_test.go @@ -15,6 +15,9 @@ import ( msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" @@ -128,8 +131,18 @@ func TestJobEndpoint_Register_NonOverlapping(t *testing.T) { // Create a mock node with easy to check resources node := mock.Node() - node.Resources = nil // Deprecated in 0.9 - node.NodeResources.Cpu.CpuShares = 700 + node.NodeResources.Processors = structs.NodeProcessorResources{ + Topology: &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: []numalib.Core{{ + ID: 0, + Grade: numalib.Performance, + BaseSpeed: 700, + }}, + }, + } + node.NodeResources.Compatibility() must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1, node)) codec := rpcClient(t, s1) diff --git a/nomad/mock/node.go b/nomad/mock/node.go index 918acebfaf4c..d696a01cceb8 100644 --- a/nomad/mock/node.go +++ b/nomad/mock/node.go @@ -34,12 +34,6 @@ func Node() *structs.Node { "consul.version": "1.11.4", }, - // TODO Remove once clientv2 gets merged - Resources: &structs.Resources{ - CPU: 4000, - MemoryMB: 8192, - DiskMB: 100 * 1024, - }, Reserved: &structs.Resources{ CPU: 100, MemoryMB: 256, @@ -55,8 +49,8 @@ func Node() *structs.Node { }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4000, + Processors: structs.NodeProcessorResources{ + Topology: structs.MockBasicTopology(), }, Memory: structs.NodeMemoryResources{ MemoryMB: 8192, @@ -114,7 +108,13 @@ func Node() *structs.Node { Status: structs.NodeStatusReady, SchedulingEligibility: structs.NodeSchedulingEligible, } + + // compute and assign node class _ = node.ComputeClass() + + // generate legacy things + node.NodeResources.Compatibility() + return node } diff --git a/nomad/node_endpoint.go b/nomad/node_endpoint.go index d9199ba3d312..87a9c4bad891 100644 --- a/nomad/node_endpoint.go +++ b/nomad/node_endpoint.go @@ -1013,8 +1013,7 @@ func (n *Node) Evaluate(args *structs.NodeEvaluateRequest, reply *structs.NodeUp } // GetNode is used to request information about a specific node -func (n *Node) GetNode(args *structs.NodeSpecificRequest, - reply *structs.SingleNodeResponse) error { +func (n *Node) GetNode(args *structs.NodeSpecificRequest, reply *structs.SingleNodeResponse) error { authErr := n.srv.Authenticate(n.ctx, args) if done, err := n.srv.forward("Node.GetNode", args, args, reply); done { diff --git a/nomad/node_endpoint_test.go b/nomad/node_endpoint_test.go index c9b273efd417..de4a2a583448 100644 --- a/nomad/node_endpoint_test.go +++ b/nomad/node_endpoint_test.go @@ -2057,7 +2057,7 @@ func TestClientEndpoint_GetNode(t *testing.T) { node.StatusUpdatedAt = resp2.Node.StatusUpdatedAt node.SecretID = "" node.Events = resp2.Node.Events - require.Equal(t, node, resp2.Node) + must.Eq(t, node, resp2.Node) // assert that the node register event was set correctly if len(resp2.Node.Events) != 1 { diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go index 254336062cbd..e59a21bd26d5 100644 --- a/nomad/plan_apply_test.go +++ b/nomad/plan_apply_test.go @@ -442,8 +442,10 @@ func TestPlanApply_EvalPlan_Preemption(t *testing.T) { state := testStateStore(t) node := mock.Node() node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2000, + Cpu: structs.LegacyNodeCpuResources{ + CpuShares: 2000, + TotalCpuCores: 2, + ReservableCpuCores: []uint16{0, 1}, }, Memory: structs.NodeMemoryResources{ MemoryMB: 4192, @@ -459,6 +461,8 @@ func TestPlanApply_EvalPlan_Preemption(t *testing.T) { }, }, } + node.NodeResources.Compatibility() + state.UpsertNode(structs.MsgTypeTestSetup, 1000, node) preemptedAlloc := mock.Alloc() diff --git a/nomad/structs/diff.go b/nomad/structs/diff.go index 98acf91b8341..4c688483a675 100644 --- a/nomad/structs/diff.go +++ b/nomad/structs/diff.go @@ -2201,6 +2201,11 @@ func (r *Resources) Diff(other *Resources, contextual bool) *ObjectDiff { diff.Objects = append(diff.Objects, nDiffs...) } + // NUMA resources diff + if nDiff := r.NUMA.Diff(other.NUMA, contextual); nDiff != nil { + diff.Objects = append(diff.Objects, nDiff) + } + return diff } @@ -2380,6 +2385,30 @@ func portDiffs(old, new []Port, dynamic bool, contextual bool) []*ObjectDiff { } +func (r *NUMA) Diff(other *NUMA, contextual bool) *ObjectDiff { + if r.Equal(other) { + return nil + } + + diff := &ObjectDiff{Type: DiffTypeNone, Name: "NUMA"} + var oldPrimitiveFlat, newPrimitiveFlat map[string]string + + if r == nil { + diff.Type = DiffTypeAdded + newPrimitiveFlat = flatmap.Flatten(other, nil, true) + } else if other == nil { + diff.Type = DiffTypeDeleted + oldPrimitiveFlat = flatmap.Flatten(r, nil, true) + } else { + diff.Type = DiffTypeEdited + oldPrimitiveFlat = flatmap.Flatten(r, nil, true) + newPrimitiveFlat = flatmap.Flatten(other, nil, true) + } + diff.Fields = fieldDiffs(oldPrimitiveFlat, newPrimitiveFlat, contextual) + + return diff +} + // Diff returns a diff of two requested devices. If contextual diff is enabled, // non-changed fields will still be returned. func (r *RequestedDevice) Diff(other *RequestedDevice, contextual bool) *ObjectDiff { diff --git a/nomad/structs/funcs.go b/nomad/structs/funcs.go index 9ab168d46e98..25333fc800eb 100644 --- a/nomad/structs/funcs.go +++ b/nomad/structs/funcs.go @@ -152,7 +152,7 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi continue } - cr := alloc.ComparableResources() + cr := alloc.AllocatedResources.Comparable() used.Add(cr) // Adding the comparable resource unions reserved core sets, need to check if reserved cores overlap @@ -171,8 +171,8 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi // Check that the node resources (after subtracting reserved) are a // super set of those that are being allocated - available := node.ComparableResources() - available.Subtract(node.ComparableReservedResources()) + available := node.NodeResources.Comparable() + available.Subtract(node.ReservedResources.Comparable()) if superset, dimension := available.Superset(used); !superset { return false, dimension, used, nil } @@ -211,9 +211,8 @@ func AllocsFit(node *Node, allocs []*Allocation, netIdx *NetworkIndex, checkDevi } func computeFreePercentage(node *Node, util *ComparableResources) (freePctCpu, freePctRam float64) { - // COMPAT(0.11): Remove in 0.11 - reserved := node.ComparableReservedResources() - res := node.ComparableResources() + reserved := node.ReservedResources.Comparable() + res := node.NodeResources.Comparable() // Determine the node availability nodeCpu := float64(res.Flattened.Cpu.CpuShares) diff --git a/nomad/structs/funcs_test.go b/nomad/structs/funcs_test.go index 41784e308e25..f132e92ef349 100644 --- a/nomad/structs/funcs_test.go +++ b/nomad/structs/funcs_test.go @@ -10,7 +10,11 @@ import ( "github.com/hashicorp/nomad/acl" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/uuid" + "github.com/shoenig/test/must" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -86,208 +90,23 @@ func TestFilterTerminalAllocs(t *testing.T) { } } -// COMPAT(0.11): Remove in 0.11 -func TestAllocsFit_PortsOvercommitted_Old(t *testing.T) { - ci.Parallel(t) - - n := &Node{ - Resources: &Resources{ - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - MBits: 100, - }, - }, - }, - } - - a1 := &Allocation{ - Job: &Job{ - TaskGroups: []*TaskGroup{ - { - Name: "web", - EphemeralDisk: DefaultEphemeralDisk(), - }, - }, - }, - TaskResources: map[string]*Resources{ - "web": { - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "10.0.0.1", - MBits: 50, - ReservedPorts: []Port{{"main", 8000, 80, ""}}, - }, - }, - }, - }, - } - - // Should fit one allocation - fit, dim, _, err := AllocsFit(n, []*Allocation{a1}, nil, false) - if err != nil { - t.Fatalf("err: %v", err) - } - if !fit { - t.Fatalf("Bad: %s", dim) - } - - // Should not fit second allocation - fit, _, _, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false) - if err != nil { - t.Fatalf("err: %v", err) - } - if fit { - t.Fatalf("Bad") - } -} - -// COMPAT(0.11): Remove in 0.11 -func TestAllocsFit_Old(t *testing.T) { - ci.Parallel(t) - - require := require.New(t) - - n := &Node{ - Resources: &Resources{ - CPU: 2000, - MemoryMB: 2048, - DiskMB: 10000, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - MBits: 100, - }, - }, - }, - Reserved: &Resources{ - CPU: 1000, - MemoryMB: 1024, - DiskMB: 5000, - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "10.0.0.1", - MBits: 50, - ReservedPorts: []Port{{"main", 80, 0, ""}}, - }, - }, - }, - } - - a1 := &Allocation{ - Resources: &Resources{ - CPU: 1000, - MemoryMB: 1024, - DiskMB: 5000, - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "10.0.0.1", - MBits: 50, - ReservedPorts: []Port{{"main", 8000, 80, ""}}, - }, - }, - }, - } - - // Should fit one allocation - fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) - require.NoError(err) - require.True(fit) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) - - // Should not fit second allocation - fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false) - require.NoError(err) - require.False(fit) - require.EqualValues(2000, used.Flattened.Cpu.CpuShares) - require.EqualValues(2048, used.Flattened.Memory.MemoryMB) -} - -// COMPAT(0.11): Remove in 0.11 -func TestAllocsFit_TerminalAlloc_Old(t *testing.T) { - ci.Parallel(t) - - require := require.New(t) - - n := &Node{ - Resources: &Resources{ - CPU: 2000, - MemoryMB: 2048, - DiskMB: 10000, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - MBits: 100, - }, - }, - }, - Reserved: &Resources{ - CPU: 1000, - MemoryMB: 1024, - DiskMB: 5000, - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "10.0.0.1", - MBits: 50, - ReservedPorts: []Port{{"main", 80, 0, ""}}, - }, - }, - }, - } - - a1 := &Allocation{ - Resources: &Resources{ - CPU: 1000, - MemoryMB: 1024, - DiskMB: 5000, - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "10.0.0.1", - MBits: 50, - ReservedPorts: []Port{{"main", 8000, 0, ""}}, - }, - }, - }, - } - - // Should fit one allocation - fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) - require.NoError(err) - require.True(fit) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) - - // Should fit second allocation since it is terminal - a2 := a1.Copy() - a2.DesiredStatus = AllocDesiredStatusStop - a2.ClientStatus = AllocClientStatusComplete - fit, _, used, err = AllocsFit(n, []*Allocation{a1, a2}, nil, false) - require.NoError(err) - require.True(fit) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) -} - -func TestAllocsFit(t *testing.T) { - ci.Parallel(t) - - require := require.New(t) - +func node2k() *Node { n := &Node{ NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 2000, - TotalCpuCores: 2, - ReservableCpuCores: []uint16{0, 1}, + Processors: NodeProcessorResources{ + Topology: &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: []numalib.Core{{ + ID: 0, + Grade: numalib.Performance, + BaseSpeed: 1000, + }, { + ID: 1, + Grade: numalib.Performance, + BaseSpeed: 1000, + }}, + }, }, Memory: NodeMemoryResources{ MemoryMB: 2048, @@ -329,6 +148,14 @@ func TestAllocsFit(t *testing.T) { }, }, } + n.NodeResources.Compatibility() + return n +} + +func TestAllocsFit(t *testing.T) { + ci.Parallel(t) + + n := node2k() a1 := &Allocation{ AllocatedResources: &AllocatedResources{ @@ -365,17 +192,17 @@ func TestAllocsFit(t *testing.T) { // Should fit one allocation fit, dim, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) - require.NoError(err) - require.True(fit, "failed for dimension %q", dim) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.True(t, fit, must.Sprintf("failed for dimension %q", dim)) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) // Should not fit second allocation fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false) - require.NoError(err) - require.False(fit) - require.EqualValues(2000, used.Flattened.Cpu.CpuShares) - require.EqualValues(2048, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.False(t, fit) + must.Eq(t, 2000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 2048, used.Flattened.Memory.MemoryMB) a2 := &Allocation{ AllocatedResources: &AllocatedResources{ @@ -404,62 +231,26 @@ func TestAllocsFit(t *testing.T) { // Should fit one allocation fit, dim, used, err = AllocsFit(n, []*Allocation{a2}, nil, false) - require.NoError(err) - require.True(fit, "failed for dimension %q", dim) - require.EqualValues(500, used.Flattened.Cpu.CpuShares) - require.EqualValues([]uint16{0}, used.Flattened.Cpu.ReservedCores) - require.EqualValues(512, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.True(t, fit, must.Sprintf("failed for dimension %q", dim)) + must.Eq(t, 500, used.Flattened.Cpu.CpuShares) + must.Eq(t, []uint16{0}, used.Flattened.Cpu.ReservedCores) + must.Eq(t, 512, used.Flattened.Memory.MemoryMB) // Should not fit second allocation fit, dim, used, err = AllocsFit(n, []*Allocation{a2, a2}, nil, false) - require.NoError(err) - require.False(fit) - require.EqualValues("cores", dim) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues([]uint16{0}, used.Flattened.Cpu.ReservedCores) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.False(t, fit) + must.Eq(t, "cores", dim) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, []uint16{0}, used.Flattened.Cpu.ReservedCores) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) } func TestAllocsFit_TerminalAlloc(t *testing.T) { ci.Parallel(t) - require := require.New(t) - - n := &Node{ - NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 2000, - }, - Memory: NodeMemoryResources{ - MemoryMB: 2048, - }, - Disk: NodeDiskResources{ - DiskMB: 10000, - }, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - IP: "10.0.0.1", - MBits: 100, - }, - }, - }, - ReservedResources: &NodeReservedResources{ - Cpu: NodeReservedCpuResources{ - CpuShares: 1000, - }, - Memory: NodeReservedMemoryResources{ - MemoryMB: 1024, - }, - Disk: NodeReservedDiskResources{ - DiskMB: 5000, - }, - Networks: NodeReservedNetworkResources{ - ReservedHostPorts: "80", - }, - }, - } + n := node2k() a1 := &Allocation{ AllocatedResources: &AllocatedResources{ @@ -489,20 +280,20 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) { // Should fit one allocation fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) - require.NoError(err) - require.True(fit) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.True(t, fit) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) // Should fit second allocation since it is terminal a2 := a1.Copy() a2.DesiredStatus = AllocDesiredStatusStop a2.ClientStatus = AllocClientStatusComplete fit, dim, used, err := AllocsFit(n, []*Allocation{a1, a2}, nil, false) - require.NoError(err) - require.True(fit, dim) - require.EqualValues(1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(1024, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.True(t, fit, must.Sprintf("bad dimension: %q", dim)) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) } // TestAllocsFit_ClientTerminalAlloc asserts that allocs which have a terminal @@ -510,42 +301,7 @@ func TestAllocsFit_TerminalAlloc(t *testing.T) { func TestAllocsFit_ClientTerminalAlloc(t *testing.T) { ci.Parallel(t) - n := &Node{ - ID: "test-node", - NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 2000, - }, - Memory: NodeMemoryResources{ - MemoryMB: 2048, - }, - Disk: NodeDiskResources{ - DiskMB: 10000, - }, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - IP: "10.0.0.1", - MBits: 100, - }, - }, - }, - ReservedResources: &NodeReservedResources{ - Cpu: NodeReservedCpuResources{ - CpuShares: 1000, - }, - Memory: NodeReservedMemoryResources{ - MemoryMB: 1024, - }, - Disk: NodeReservedDiskResources{ - DiskMB: 5000, - }, - Networks: NodeReservedNetworkResources{ - ReservedHostPorts: "80", - }, - }, - } + n := node2k() liveAlloc := &Allocation{ ID: "test-alloc-live", @@ -584,10 +340,10 @@ func TestAllocsFit_ClientTerminalAlloc(t *testing.T) { // *Should* fit both allocations since deadAlloc is not running on the // client fit, _, used, err := AllocsFit(n, []*Allocation{liveAlloc, deadAlloc}, nil, false) - require.NoError(t, err) - require.True(t, fit) - require.EqualValues(t, 1000, used.Flattened.Cpu.CpuShares) - require.EqualValues(t, 1024, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.True(t, fit) + must.Eq(t, 1000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1024, used.Flattened.Memory.MemoryMB) } // TestAllocsFit_ServerTerminalAlloc asserts that allocs which have a terminal @@ -596,42 +352,7 @@ func TestAllocsFit_ClientTerminalAlloc(t *testing.T) { func TestAllocsFit_ServerTerminalAlloc(t *testing.T) { ci.Parallel(t) - n := &Node{ - ID: "test-node", - NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 2000, - }, - Memory: NodeMemoryResources{ - MemoryMB: 2048, - }, - Disk: NodeDiskResources{ - DiskMB: 10000, - }, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "10.0.0.0/8", - IP: "10.0.0.1", - MBits: 100, - }, - }, - }, - ReservedResources: &NodeReservedResources{ - Cpu: NodeReservedCpuResources{ - CpuShares: 1000, - }, - Memory: NodeReservedMemoryResources{ - MemoryMB: 1024, - }, - Disk: NodeReservedDiskResources{ - DiskMB: 5000, - }, - Networks: NodeReservedNetworkResources{ - ReservedHostPorts: "80", - }, - }, - } + n := node2k() liveAlloc := &Allocation{ ID: "test-alloc-live", @@ -669,10 +390,10 @@ func TestAllocsFit_ServerTerminalAlloc(t *testing.T) { // Should *not* fit both allocations since deadAlloc is still running fit, _, used, err := AllocsFit(n, []*Allocation{liveAlloc, deadAlloc}, nil, false) - require.NoError(t, err) - require.False(t, fit) - require.EqualValues(t, 2000, used.Flattened.Cpu.CpuShares) - require.EqualValues(t, 2048, used.Flattened.Memory.MemoryMB) + must.NoError(t, err) + must.False(t, fit) + must.Eq(t, 2000, used.Flattened.Cpu.CpuShares) + must.Eq(t, 2048, used.Flattened.Memory.MemoryMB) } // Tests that AllocsFit detects device collisions @@ -748,16 +469,9 @@ func TestAllocsFit_Devices(t *testing.T) { func TestAllocsFit_MemoryOversubscription(t *testing.T) { ci.Parallel(t) - n := &Node{ - NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 2000, - }, - Memory: NodeMemoryResources{ - MemoryMB: 2048, - }, - }, - } + n := node2k() + n.NodeResources.Memory.MemoryMB = 2048 + n.ReservedResources = nil a1 := &Allocation{ AllocatedResources: &AllocatedResources{ @@ -776,91 +490,28 @@ func TestAllocsFit_MemoryOversubscription(t *testing.T) { } // Should fit one allocation - fit, _, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) - require.NoError(t, err) - require.True(t, fit) - require.EqualValues(t, 100, used.Flattened.Cpu.CpuShares) - require.EqualValues(t, 1000, used.Flattened.Memory.MemoryMB) - require.EqualValues(t, 4000, used.Flattened.Memory.MemoryMaxMB) + fit, dim, used, err := AllocsFit(n, []*Allocation{a1}, nil, false) + must.NoError(t, err) + must.True(t, fit, must.Sprintf("bad dimension: %q", dim)) + must.Eq(t, 100, used.Flattened.Cpu.CpuShares) + must.Eq(t, 1000, used.Flattened.Memory.MemoryMB) + must.Eq(t, 4000, used.Flattened.Memory.MemoryMaxMB) // Should fit second allocation - fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false) - require.NoError(t, err) - require.True(t, fit) - require.EqualValues(t, 200, used.Flattened.Cpu.CpuShares) - require.EqualValues(t, 2000, used.Flattened.Memory.MemoryMB) - require.EqualValues(t, 8000, used.Flattened.Memory.MemoryMaxMB) + fit, dim, used, err = AllocsFit(n, []*Allocation{a1, a1}, nil, false) + must.NoError(t, err) + must.True(t, fit, must.Sprintf("bad dimension: %q", dim)) + must.Eq(t, 200, used.Flattened.Cpu.CpuShares) + must.Eq(t, 2000, used.Flattened.Memory.MemoryMB) + must.Eq(t, 8000, used.Flattened.Memory.MemoryMaxMB) // Should not fit a third allocation - fit, _, used, err = AllocsFit(n, []*Allocation{a1, a1, a1}, nil, false) - require.NoError(t, err) - require.False(t, fit) - require.EqualValues(t, 300, used.Flattened.Cpu.CpuShares) - require.EqualValues(t, 3000, used.Flattened.Memory.MemoryMB) - require.EqualValues(t, 12000, used.Flattened.Memory.MemoryMaxMB) -} - -// COMPAT(0.11): Remove in 0.11 -func TestScoreFitBinPack_Old(t *testing.T) { - ci.Parallel(t) - - node := &Node{} - node.Resources = &Resources{ - CPU: 4096, - MemoryMB: 8192, - } - node.Reserved = &Resources{ - CPU: 2048, - MemoryMB: 4096, - } - - // Test a perfect fit - util := &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 2048, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 4096, - }, - }, - } - score := ScoreFitBinPack(node, util) - if score != 18.0 { - t.Fatalf("bad: %v", score) - } - - // Test the worst fit - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 0, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 0, - }, - }, - } - score = ScoreFitBinPack(node, util) - if score != 0.0 { - t.Fatalf("bad: %v", score) - } - - // Test a mid-case scenario - util = &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: 1024, - }, - Memory: AllocatedMemoryResources{ - MemoryMB: 2048, - }, - }, - } - score = ScoreFitBinPack(node, util) - if score < 10.0 || score > 16.0 { - t.Fatalf("bad: %v", score) - } + fit, dim, used, err = AllocsFit(n, []*Allocation{a1, a1, a1}, nil, false) + must.NoError(t, err) + must.False(t, fit, must.Sprintf("bad dimension: %q", dim)) + must.Eq(t, 300, used.Flattened.Cpu.CpuShares) + must.Eq(t, 3000, used.Flattened.Memory.MemoryMB) + must.Eq(t, 12000, used.Flattened.Memory.MemoryMaxMB) } func TestScoreFitBinPack(t *testing.T) { @@ -868,13 +519,22 @@ func TestScoreFitBinPack(t *testing.T) { node := &Node{} node.NodeResources = &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 4096, + Processors: NodeProcessorResources{ + Topology: &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: []numalib.Core{{ + ID: 0, + Grade: numalib.Performance, + BaseSpeed: 4096, + }}, + }, }, Memory: NodeMemoryResources{ MemoryMB: 8192, }, } + node.NodeResources.Compatibility() node.ReservedResources = &NodeReservedResources{ Cpu: NodeReservedCpuResources{ CpuShares: 2048, diff --git a/nomad/structs/node_class_test.go b/nomad/structs/node_class_test.go index baccaa095449..fe8265dc8406 100644 --- a/nomad/structs/node_class_test.go +++ b/nomad/structs/node_class_test.go @@ -27,7 +27,7 @@ func testNode() *Node { "driver.exec": "1", }, NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ + Cpu: LegacyNodeCpuResources{ CpuShares: 4000, }, Memory: NodeMemoryResources{ diff --git a/nomad/structs/numa.go b/nomad/structs/numa.go new file mode 100644 index 000000000000..027427d482f2 --- /dev/null +++ b/nomad/structs/numa.go @@ -0,0 +1,181 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "errors" + "fmt" + + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/hashicorp/nomad/helper" +) + +const ( + // NoneNUMA indicates no NUMA aware scheduling is requested for the task + NoneNUMA = "none" + + // PreferNUMA indicates nodes with NUMA ideal cores should be used if available + PreferNUMA = "prefer" + + // RequireNUMA indicates a task must be placed on a node with available NUMA ideal cores + RequireNUMA = "require" +) + +type NUMA struct { + // Affinity is the numa affinity scheduling behavior. + // One of "none", "prefer", "require". + Affinity string +} + +func (n *NUMA) Equal(o *NUMA) bool { + if n == nil || o == nil { + return n == o + } + return n.Affinity == o.Affinity +} + +func (n *NUMA) Copy() *NUMA { + if n == nil { + return nil + } + return &NUMA{ + Affinity: n.Affinity, + } +} + +func (n *NUMA) Validate() error { + if n == nil { + return nil + } + switch n.Affinity { + case NoneNUMA, PreferNUMA, RequireNUMA: + return nil + default: + return errors.New("numa affinity must be one of none, prefer, or require") + } +} + +// Requested returns true if the NUMA.Affinity is set to one of "prefer" or +// "require" and will require such CPU cores for scheduling. +func (n *NUMA) Requested() bool { + if n == nil || n.Affinity == NoneNUMA { + return false + } + return true +} + +// LegacyNodeCpuResources is the pre-1.7 CPU resources struct. It remains here +// for compatibility and can be removed in Nomad 1.9+. +// +// Deprecated; use NodeProcessorResources instead. +type LegacyNodeCpuResources struct { + // Deprecated; do not use this value except for compatibility. + CpuShares int64 + + // Deprecated; do not use this value except for compatibility. + TotalCpuCores uint16 + + // Deprecated; do not use this value except for compatibility. + ReservableCpuCores []uint16 +} + +// partial struct serialization / copy / merge sadness means this struct can +// exist with no data, which is a condition we must detect during the upgrade path +func (r LegacyNodeCpuResources) empty() bool { + return r.CpuShares == 0 || r.TotalCpuCores == 0 || len(r.ReservableCpuCores) == 0 +} + +// NomadProcessorResources captures the CPU hardware resources of the Nomad node. +// +// In Nomad enterprise this structure is used to map tasks to NUMA nodes. +type NodeProcessorResources struct { + // Topology is here to serve as a reference + Topology *numalib.Topology // do not modify +} + +// partial struct serialization / copy / merge sadness means this struct can +// exist with no data, which is a condition we must detect during the upgrade path +func (r NodeProcessorResources) empty() bool { + return r.Topology == nil || len(r.Topology.Cores) == 0 +} + +func NewNodeProcessorResources(top *numalib.Topology) NodeProcessorResources { + return NodeProcessorResources{ + Topology: top, + } +} + +func (r *NodeProcessorResources) String() string { + if r == nil || r.Topology == nil { + return "(nil)" + } + return fmt.Sprintf("(%d,%d)", r.Topology.NumECores(), r.Topology.NumPCores()) +} + +func (r *NodeProcessorResources) Copy() NodeProcessorResources { + return NodeProcessorResources{ + Topology: r.Topology, + } +} + +func (r *NodeProcessorResources) Merge(o *NodeProcessorResources) { + if o == nil || o.Topology == nil { + return + } + r.Topology = o.Topology +} + +func (r *NodeProcessorResources) Equal(o *NodeProcessorResources) bool { + if r == nil || o == nil { + return r == o + } + return r.Topology.Equal(o.Topology) +} + +func (r *NodeProcessorResources) TotalCompute() int { + if r == nil || r.Topology == nil { + return 0 + } + return int(r.Topology.TotalCompute()) +} + +func topologyFromLegacy(old LegacyNodeCpuResources) *numalib.Topology { + // interpret per-core frequency given total compute and total core count + frequency := hw.MHz(old.CpuShares / (int64(len(old.ReservableCpuCores)))) + + cores := helper.ConvertSlice( + old.ReservableCpuCores, + func(id uint16) numalib.Core { + return numalib.Core{ + ID: hw.CoreID(id), + SocketID: 0, // legacy: assume single socket with id 0 + NodeID: 0, // legacy: assume single numa node with id 0 + Grade: numalib.Performance, + Disable: false, // only usable cores in the source + GuessSpeed: frequency, + } + }, + ) + + withheld := (frequency * hw.MHz(old.TotalCpuCores)) - hw.MHz(old.CpuShares) + + return &numalib.Topology{ + // legacy: assume one node with id 0 + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + + // legacy: with one node the distance matrix is 1-D + Distances: numalib.SLIT{{10}}, + + // legacy: a pseudo representation of each actual core profile + Cores: cores, + + // legacy: set since we have the value + OverrideTotalCompute: hw.MHz(old.CpuShares), + + // legacy: set since we can compute the value + OverrideWitholdCompute: withheld, + } +} diff --git a/nomad/structs/numa_test.go b/nomad/structs/numa_test.go new file mode 100644 index 000000000000..2aa823825535 --- /dev/null +++ b/nomad/structs/numa_test.go @@ -0,0 +1,172 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +package structs + +import ( + "errors" + "testing" + + "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/shoenig/test/must" +) + +func TestNUMA_Equal(t *testing.T) { + ci.Parallel(t) + + must.Equal[*NUMA](t, nil, nil) + must.NotEqual[*NUMA](t, nil, new(NUMA)) + + must.StructEqual(t, &NUMA{ + Affinity: "none", + }, []must.Tweak[*NUMA]{{ + Field: "Affinity", + Apply: func(n *NUMA) { n.Affinity = "require" }, + }}) +} + +func TestNUMA_Validate(t *testing.T) { + ci.Parallel(t) + + err := errors.New("numa affinity must be one of none, prefer, or require") + + cases := []struct { + name string + affinity string + exp error + }{ + { + name: "affinity unset", + affinity: "", + exp: err, + }, + { + name: "affinity none", + affinity: "none", + exp: nil, + }, + { + name: "affinity prefer", + affinity: "prefer", + exp: nil, + }, + { + name: "affinity require", + affinity: "require", + exp: nil, + }, + { + name: "affinity invalid", + affinity: "invalid", + exp: err, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + numa := &NUMA{ + tc.affinity, + } + result := numa.Validate() + must.Eq(t, tc.exp, result) + }) + } +} + +func TestNUMA_Copy(t *testing.T) { + ci.Parallel(t) + + n := &NUMA{Affinity: "require"} + c := n.Copy() + must.Equal(t, n, c) + + n.Affinity = "prefer" + must.NotEqual(t, n, c) +} + +func makeLegacyCore(id hw.CoreID) numalib.Core { + return numalib.Core{ + SocketID: 0, + NodeID: 0, + ID: id, + Grade: numalib.Performance, + Disable: false, + GuessSpeed: 3200, + } +} + +func TestNUMA_topologyFromLegacy_plain(t *testing.T) { + ci.Parallel(t) + + old := LegacyNodeCpuResources{ + CpuShares: 12800, + TotalCpuCores: 4, + ReservableCpuCores: []uint16{ + 0, 1, 2, 3, + }, + } + + result := topologyFromLegacy(old) + + exp := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{{10}}, + Cores: []numalib.Core{ + makeLegacyCore(0), + makeLegacyCore(1), + makeLegacyCore(2), + makeLegacyCore(3), + }, + OverrideTotalCompute: 12800, + OverrideWitholdCompute: 0, + } + + // only compares total compute + must.Equal(t, exp, result) + + // check underlying fields + must.Eq(t, exp.NodeIDs, result.NodeIDs) + must.Eq(t, exp.Distances, result.Distances) + must.Eq(t, exp.Cores, result.Cores) + must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) + must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) +} + +func TestNUMA_topologyFromLegacy_reservations(t *testing.T) { + ci.Parallel(t) + + old := LegacyNodeCpuResources{ + CpuShares: 9600, + TotalCpuCores: 4, + ReservableCpuCores: []uint16{ + 1, 2, 3, // core 0 excluded + }, + } + + result := topologyFromLegacy(old) + + exp := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{{10}}, + Cores: []numalib.Core{ + makeLegacyCore(1), + makeLegacyCore(2), + makeLegacyCore(3), + }, + OverrideTotalCompute: 9600, + OverrideWitholdCompute: 3200, // core 0 excluded + } + + // only compares total compute + must.Equal(t, exp, result) + + // check underlying fields + must.Eq(t, exp.NodeIDs, result.NodeIDs) + must.Eq(t, exp.Distances, result.Distances) + must.Eq(t, exp.Cores, result.Cores) + must.Eq(t, exp.OverrideTotalCompute, result.OverrideTotalCompute) + must.Eq(t, exp.OverrideWitholdCompute, result.OverrideWitholdCompute) +} diff --git a/nomad/structs/structs.go b/nomad/structs/structs.go index 332b442f442a..195b689f0002 100644 --- a/nomad/structs/structs.go +++ b/nomad/structs/structs.go @@ -46,7 +46,6 @@ import ( "github.com/hashicorp/nomad/helper/escapingfs" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" - "github.com/hashicorp/nomad/lib/cpuset" "github.com/hashicorp/nomad/lib/kheap" psstructs "github.com/hashicorp/nomad/plugins/shared/structs" "github.com/miekg/dns" @@ -2309,66 +2308,6 @@ func (n *Node) TerminalStatus() bool { } } -// ComparableReservedResources returns the reserved resouces on the node -// handling upgrade paths. Reserved networks must be handled separately. After -// 0.11 calls to this should be replaced with: -// node.ReservedResources.Comparable() -// -// COMPAT(0.11): Remove in 0.11 -func (n *Node) ComparableReservedResources() *ComparableResources { - // See if we can no-op - if n.Reserved == nil && n.ReservedResources == nil { - return nil - } - - // Node already has 0.9+ behavior - if n.ReservedResources != nil { - return n.ReservedResources.Comparable() - } - - // Upgrade path - return &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: int64(n.Reserved.CPU), - }, - Memory: AllocatedMemoryResources{ - MemoryMB: int64(n.Reserved.MemoryMB), - }, - }, - Shared: AllocatedSharedResources{ - DiskMB: int64(n.Reserved.DiskMB), - }, - } -} - -// ComparableResources returns the resouces on the node -// handling upgrade paths. Networking must be handled separately. After 0.11 -// calls to this should be replaced with: node.NodeResources.Comparable() -// -// // COMPAT(0.11): Remove in 0.11 -func (n *Node) ComparableResources() *ComparableResources { - // Node already has 0.9+ behavior - if n.NodeResources != nil { - return n.NodeResources.Comparable() - } - - // Upgrade path - return &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: int64(n.Resources.CPU), - }, - Memory: AllocatedMemoryResources{ - MemoryMB: int64(n.Resources.MemoryMB), - }, - }, - Shared: AllocatedSharedResources{ - DiskMB: int64(n.Resources.DiskMB), - }, - } -} - func (n *Node) IsInAnyDC(datacenters []string) bool { for _, dc := range datacenters { if glob.Glob(dc, n.Datacenter) { @@ -2476,6 +2415,7 @@ type Resources struct { IOPS int // COMPAT(0.10): Only being used to issue warnings Networks Networks Devices ResourceDevices + NUMA *NUMA } const ( @@ -2592,6 +2532,20 @@ func (r *Resources) Equal(o *Resources) bool { // COMPAT(0.10): Remove in 0.10. type ResourceDevices []*RequestedDevice +// Copy ResourceDevices +// +// COMPAT(0.10): Remove in 0.10. +func (d ResourceDevices) Copy() ResourceDevices { + if d == nil { + return nil + } + c := make(ResourceDevices, len(d)) + for i, device := range d { + c[i] = device.Copy() + } + return c +} + // Equal ResourceDevices as set keyed by Name. // // COMPAT(0.10): Remove in 0.10 @@ -2657,22 +2611,17 @@ func (r *Resources) Copy() *Resources { if r == nil { return nil } - newR := new(Resources) - *newR = *r - - // Copy the network objects - newR.Networks = r.Networks.Copy() - - // Copy the devices - if r.Devices != nil { - n := len(r.Devices) - newR.Devices = make([]*RequestedDevice, n) - for i := 0; i < n; i++ { - newR.Devices[i] = r.Devices[i].Copy() - } + return &Resources{ + CPU: r.CPU, + Cores: r.Cores, + MemoryMB: r.MemoryMB, + MemoryMaxMB: r.MemoryMaxMB, + DiskMB: r.DiskMB, + IOPS: r.IOPS, + Networks: r.Networks.Copy(), + Devices: r.Devices.Copy(), + NUMA: r.NUMA.Copy(), } - - return newR } // NetIndex finds the matching net index using device name @@ -3145,10 +3094,15 @@ func (r *RequestedDevice) Validate() error { // NodeResources is used to define the resources available on a client node. type NodeResources struct { - Cpu NodeCpuResources - Memory NodeMemoryResources - Disk NodeDiskResources - Devices []*NodeDeviceResource + // Do not read from this value except for compatibility (i.e. serialization). + // + // Deprecated; use NodeProcessorResources instead. + Cpu LegacyNodeCpuResources + + Processors NodeProcessorResources + Memory NodeMemoryResources + Disk NodeDiskResources + Devices []*NodeDeviceResource // NodeNetworks was added in Nomad 0.12 to support multiple interfaces. // It is the superset of host_networks, fingerprinted networks, and the @@ -3166,6 +3120,30 @@ type NodeResources struct { MaxDynamicPort int } +// Compatibility will translate the LegacyNodeCpuResources into NodeProcessor +// Resources, or the other way around as needed. +func (n *NodeResources) Compatibility() { + // Copy values from n.Processors to n.Cpu for compatibility + // + // COMPAT: added in Nomad 1.7; can be removed in 1.9+ + if n.Processors.Topology == nil && !n.Cpu.empty() { + // When we receive a node update from a pre-1.7 client it contains only + // the LegacyNodeCpuResources field, and so we synthesize a pseudo + // NodeProcessorResources field + n.Processors.Topology = topologyFromLegacy(n.Cpu) + } else if !n.Processors.empty() { + // When we receive a node update from a 1.7+ client it contains a + // NodeProcessorResources field, and we populate the LegacyNodeCpuResources + // field using that information. + n.Cpu.CpuShares = int64(n.Processors.TotalCompute()) + n.Cpu.TotalCpuCores = uint16(n.Processors.Topology.UsableCores().Size()) + cores := n.Processors.Topology.UsableCores().Slice() + n.Cpu.ReservableCpuCores = helper.ConvertSlice(cores, func(coreID hw.CoreID) uint16 { + return uint16(coreID) + }) + } +} + func (n *NodeResources) Copy() *NodeResources { if n == nil { return nil @@ -3173,7 +3151,7 @@ func (n *NodeResources) Copy() *NodeResources { newN := new(NodeResources) *newN = *n - newN.Cpu = n.Cpu.Copy() + newN.Processors = n.Processors.Copy() newN.Networks = n.Networks.Copy() if n.NodeNetworks != nil { @@ -3192,6 +3170,9 @@ func (n *NodeResources) Copy() *NodeResources { } } + // apply compatibility fixups + n.Compatibility() + return newN } @@ -3202,11 +3183,16 @@ func (n *NodeResources) Comparable() *ComparableResources { return nil } + usableCores := n.Processors.Topology.UsableCores().Slice() + reservableCores := helper.ConvertSlice(usableCores, func(id hw.CoreID) uint16 { + return uint16(id) + }) + c := &ComparableResources{ Flattened: AllocatedTaskResources{ Cpu: AllocatedCpuResources{ - CpuShares: n.Cpu.CpuShares, - ReservedCores: n.Cpu.ReservableCpuCores, + CpuShares: int64(n.Processors.Topology.UsableCompute()), + ReservedCores: reservableCores, }, Memory: AllocatedMemoryResources{ MemoryMB: n.Memory.MemoryMB, @@ -3225,7 +3211,7 @@ func (n *NodeResources) Merge(o *NodeResources) { return } - n.Cpu.Merge(&o.Cpu) + n.Processors.Merge(&o.Processors) n.Memory.Merge(&o.Memory) n.Disk.Merge(&o.Disk) @@ -3246,6 +3232,9 @@ func (n *NodeResources) Merge(o *NodeResources) { } } } + + // apply compatibility fixups + n.Compatibility() } func lookupNetworkByDevice(nets []*NodeNetworkResource, name string) (int, *NodeNetworkResource) { @@ -3266,7 +3255,7 @@ func (n *NodeResources) Equal(o *NodeResources) bool { return false } - if !n.Cpu.Equal(&o.Cpu) { + if !n.Processors.Equal(&o.Processors) { return false } if !n.Memory.Equal(&o.Memory) { @@ -3351,82 +3340,6 @@ func NodeNetworksEquals(n1, n2 []*NodeNetworkResource) bool { } -// NodeCpuResources captures the CPU resources of the node. -type NodeCpuResources struct { - // CpuShares is the CPU shares available. This is calculated by number of - // cores multiplied by the core frequency. - CpuShares int64 - - // TotalCpuCores is the total number of cores on the machine. This includes cores not in - // the agent's cpuset if on a linux platform - TotalCpuCores uint16 - - // ReservableCpuCores is the set of cpus which are available to be reserved on the Node. - // This value is currently only reported on Linux platforms which support cgroups and is - // discovered by inspecting the cpuset of the agent's cgroup. - ReservableCpuCores []uint16 -} - -func (n NodeCpuResources) Copy() NodeCpuResources { - newN := n - if n.ReservableCpuCores != nil { - newN.ReservableCpuCores = make([]uint16, len(n.ReservableCpuCores)) - copy(newN.ReservableCpuCores, n.ReservableCpuCores) - } - - return newN -} - -func (n *NodeCpuResources) Merge(o *NodeCpuResources) { - if o == nil { - return - } - - if o.CpuShares != 0 { - n.CpuShares = o.CpuShares - } - - if o.TotalCpuCores != 0 { - n.TotalCpuCores = o.TotalCpuCores - } - - if len(o.ReservableCpuCores) != 0 { - n.ReservableCpuCores = o.ReservableCpuCores - } -} - -func (n *NodeCpuResources) Equal(o *NodeCpuResources) bool { - if o == nil && n == nil { - return true - } else if o == nil { - return false - } else if n == nil { - return false - } - - if n.CpuShares != o.CpuShares { - return false - } - - if n.TotalCpuCores != o.TotalCpuCores { - return false - } - - if len(n.ReservableCpuCores) != len(o.ReservableCpuCores) { - return false - } - for i := range n.ReservableCpuCores { - if n.ReservableCpuCores[i] != o.ReservableCpuCores[i] { - return false - } - } - return true -} - -func (n *NodeCpuResources) SharesPerCore() int64 { - return n.CpuShares / int64(n.TotalCpuCores) -} - // NodeMemoryResources captures the memory resources of the node type NodeMemoryResources struct { // MemoryMB is the total available memory on the node @@ -4103,9 +4016,14 @@ func (a *AllocatedCpuResources) Add(delta *AllocatedCpuResources) { return } + // add cpu bandwidth a.CpuShares += delta.CpuShares - a.ReservedCores = cpuset.New(a.ReservedCores...).Union(cpuset.New(delta.ReservedCores...)).ToSlice() + // add cpu cores + cores := idset.From[uint16](a.ReservedCores) + deltaCores := idset.From[uint16](delta.ReservedCores) + cores.InsertSet(deltaCores) + a.ReservedCores = cores.Slice() } func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { @@ -4113,8 +4031,14 @@ func (a *AllocatedCpuResources) Subtract(delta *AllocatedCpuResources) { return } + // remove cpu bandwidth a.CpuShares -= delta.CpuShares - a.ReservedCores = cpuset.New(a.ReservedCores...).Difference(cpuset.New(delta.ReservedCores...)).ToSlice() + + // remove cpu cores + cores := idset.From[uint16](a.ReservedCores) + deltaCores := idset.From[uint16](delta.ReservedCores) + cores.RemoveSet(deltaCores) + a.ReservedCores = cores.Slice() } func (a *AllocatedCpuResources) Max(other *AllocatedCpuResources) { @@ -4280,12 +4204,16 @@ func (c *ComparableResources) Superset(other *ComparableResources) (bool, string return false, "cpu" } - if len(c.Flattened.Cpu.ReservedCores) > 0 && !cpuset.New(c.Flattened.Cpu.ReservedCores...).IsSupersetOf(cpuset.New(other.Flattened.Cpu.ReservedCores...)) { + cores := idset.From[uint16](c.Flattened.Cpu.ReservedCores) + otherCores := idset.From[uint16](other.Flattened.Cpu.ReservedCores) + if len(c.Flattened.Cpu.ReservedCores) > 0 && !cores.Superset(otherCores) { return false, "cores" } + if c.Flattened.Memory.MemoryMB < other.Flattened.Memory.MemoryMB { return false, "memory" } + if c.Shared.DiskMB < other.Shared.DiskMB { return false, "disk" } @@ -11184,46 +11112,6 @@ func (a *Allocation) SetEventDisplayMessages() { setDisplayMsg(a.TaskStates) } -// ComparableResources returns the resources on the allocation -// handling upgrade paths. After 0.11 calls to this should be replaced with: -// alloc.AllocatedResources.Comparable() -// -// COMPAT(0.11): Remove in 0.11 -func (a *Allocation) ComparableResources() *ComparableResources { - // Alloc already has 0.9+ behavior - if a.AllocatedResources != nil { - return a.AllocatedResources.Comparable() - } - - var resources *Resources - if a.Resources != nil { - resources = a.Resources - } else if a.TaskResources != nil { - resources = new(Resources) - resources.Add(a.SharedResources) - for _, taskResource := range a.TaskResources { - resources.Add(taskResource) - } - } - - // Upgrade path - return &ComparableResources{ - Flattened: AllocatedTaskResources{ - Cpu: AllocatedCpuResources{ - CpuShares: int64(resources.CPU), - }, - Memory: AllocatedMemoryResources{ - MemoryMB: int64(resources.MemoryMB), - MemoryMaxMB: int64(resources.MemoryMaxMB), - }, - Networks: resources.Networks, - }, - Shared: AllocatedSharedResources{ - DiskMB: int64(resources.DiskMB), - }, - } -} - // LookupTask by name from the Allocation. Returns nil if the Job is not set, the // TaskGroup does not exist, or the task name cannot be found. func (a *Allocation) LookupTask(name string) *Task { diff --git a/nomad/structs/structs_test.go b/nomad/structs/structs_test.go index f096650bda4a..6513a8cced2f 100644 --- a/nomad/structs/structs_test.go +++ b/nomad/structs/structs_test.go @@ -15,6 +15,8 @@ import ( "github.com/hashicorp/consul/api" "github.com/hashicorp/go-multierror" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/pointer" "github.com/hashicorp/nomad/helper/uuid" "github.com/kr/pretty" @@ -6824,7 +6826,6 @@ func TestNode_Canonicalize(t *testing.T) { func TestNode_Copy(t *testing.T) { ci.Parallel(t) - require := require.New(t) node := &Node{ ID: uuid.Generate(), @@ -6838,36 +6839,9 @@ func TestNode_Copy(t *testing.T) { "driver.exec": "1", "driver.mock_driver": "1", }, - Resources: &Resources{ - CPU: 4000, - MemoryMB: 8192, - DiskMB: 100 * 1024, - Networks: []*NetworkResource{ - { - Device: "eth0", - CIDR: "192.168.0.100/32", - MBits: 1000, - }, - }, - }, - Reserved: &Resources{ - CPU: 100, - MemoryMB: 256, - DiskMB: 4 * 1024, - Networks: []*NetworkResource{ - { - Device: "eth0", - IP: "192.168.0.100", - ReservedPorts: []Port{{Label: "ssh", Value: 22}}, - MBits: 1, - }, - }, - }, NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 4000, - TotalCpuCores: 4, - ReservableCpuCores: []uint16{0, 1, 2, 3}, + Processors: NodeProcessorResources{ + Topology: MockBasicTopology(), }, Memory: NodeMemoryResources{ MemoryMB: 8192, @@ -6923,14 +6897,14 @@ func TestNode_Copy(t *testing.T) { node2 := node.Copy() - require.Equal(node.Attributes, node2.Attributes) - require.Equal(node.Resources, node2.Resources) - require.Equal(node.Reserved, node2.Reserved) - require.Equal(node.Links, node2.Links) - require.Equal(node.Meta, node2.Meta) - require.Equal(node.Events, node2.Events) - require.Equal(node.DrainStrategy, node2.DrainStrategy) - require.Equal(node.Drivers, node2.Drivers) + must.Eq(t, node.Attributes, node2.Attributes) + must.Eq(t, node.Resources, node2.Resources) + must.Eq(t, node.Reserved, node2.Reserved) + must.Eq(t, node.Links, node2.Links) + must.Eq(t, node.Meta, node2.Meta) + must.Eq(t, node.Events, node2.Events) + must.Eq(t, node.DrainStrategy, node2.DrainStrategy) + must.Eq(t, node.Drivers, node2.Drivers) } func TestNode_GetID(t *testing.T) { @@ -7196,10 +7170,8 @@ func TestNodeResources_Copy(t *testing.T) { ci.Parallel(t) orig := &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: int64(32000), - TotalCpuCores: 32, - ReservableCpuCores: []uint16{1, 2, 3, 9}, + Processors: NodeProcessorResources{ + Topology: MockBasicTopology(), }, Memory: NodeMemoryResources{ MemoryMB: int64(64000), @@ -7228,25 +7200,20 @@ func TestNodeResources_Copy(t *testing.T) { }, } - kopy := orig.Copy() - assert.Equal(t, orig, kopy) + cpy := orig.Copy() + must.Eq(t, orig, cpy) - // Make sure slices aren't shared - kopy.Cpu.ReservableCpuCores[1] = 9000 - assert.NotEqual(t, orig.Cpu.ReservableCpuCores, kopy.Cpu.ReservableCpuCores) - - kopy.NodeNetworks[0].MacAddress = "11:11:11:11:11:11" - kopy.NodeNetworks[0].Addresses[0].Alias = "public" - assert.NotEqual(t, orig.NodeNetworks[0], kopy.NodeNetworks[0]) + cpy.NodeNetworks[0].MacAddress = "11:11:11:11:11:11" + cpy.NodeNetworks[0].Addresses[0].Alias = "public" + must.NotEq(t, orig.NodeNetworks[0], cpy.NodeNetworks[0]) } func TestNodeResources_Merge(t *testing.T) { ci.Parallel(t) res := &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: int64(32000), - TotalCpuCores: 32, + Processors: NodeProcessorResources{ + Topology: MockBasicTopology(), }, Memory: NodeMemoryResources{ MemoryMB: int64(64000), @@ -7258,8 +7225,11 @@ func TestNodeResources_Merge(t *testing.T) { }, } + topo2 := MockBasicTopology() + topo2.NodeIDs = idset.From[hw.NodeID]([]hw.NodeID{0, 1, 2}) + res.Merge(&NodeResources{ - Cpu: NodeCpuResources{ReservableCpuCores: []uint16{0, 1, 2, 3}}, + Processors: NodeProcessorResources{topo2}, Memory: NodeMemoryResources{ MemoryMB: int64(100000), }, @@ -7270,11 +7240,9 @@ func TestNodeResources_Merge(t *testing.T) { }, }) - require.Exactly(t, &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: int64(32000), - TotalCpuCores: 32, - ReservableCpuCores: []uint16{0, 1, 2, 3}, + must.Eq(t, &NodeResources{ + Processors: NodeProcessorResources{ + Topology: MockBasicTopology(), }, Memory: NodeMemoryResources{ MemoryMB: int64(100000), diff --git a/nomad/structs/testing.go b/nomad/structs/testing.go index df2db0822c28..8452a4349be7 100644 --- a/nomad/structs/testing.go +++ b/nomad/structs/testing.go @@ -7,6 +7,9 @@ import ( "fmt" "time" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/uuid" psstructs "github.com/hashicorp/nomad/plugins/shared/structs" ) @@ -23,7 +26,7 @@ func NodeResourcesToAllocatedResources(n *NodeResources) *AllocatedResources { Tasks: map[string]*AllocatedTaskResources{ "web": { Cpu: AllocatedCpuResources{ - CpuShares: n.Cpu.CpuShares, + CpuShares: int64(n.Processors.Topology.UsableCompute()), }, Memory: AllocatedMemoryResources{ MemoryMB: n.Memory.MemoryMB, @@ -36,6 +39,31 @@ func NodeResourcesToAllocatedResources(n *NodeResources) *AllocatedResources { } } +// MockBasicTopology returns a numalib.Topology that looks likes a simple VM; +// - 1 socket, 1 NUMA node +// - 4 cores @ 3500 MHz (14,000 MHz total) +// - no client config overrides +func MockBasicTopology() *numalib.Topology { + cores := make([]numalib.Core, 4) + for i := 0; i < 4; i++ { + cores[i] = numalib.Core{ + SocketID: 0, + NodeID: 0, + ID: hw.CoreID(i), + Grade: numalib.Performance, + Disable: false, + BaseSpeed: 3500, + } + } + return &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: cores, + OverrideTotalCompute: 0, + OverrideWitholdCompute: 0, + } +} + func MockNode() *Node { node := &Node{ ID: uuid.Generate(), @@ -50,8 +78,8 @@ func MockNode() *Node { "driver.mock_driver": "1", }, NodeResources: &NodeResources{ - Cpu: NodeCpuResources{ - CpuShares: 4000, + Processors: NodeProcessorResources{ + Topology: MockBasicTopology(), }, Memory: NodeMemoryResources{ MemoryMB: 8192, diff --git a/scheduler/benchmarks/benchmarks_test.go b/scheduler/benchmarks/benchmarks_test.go index e3218921e0cc..7ab141037bb8 100644 --- a/scheduler/benchmarks/benchmarks_test.go +++ b/scheduler/benchmarks/benchmarks_test.go @@ -173,13 +173,12 @@ func upsertNodes(h *scheduler.Harness, count, racks int) { node.Datacenter = datacenters[i%2] node.Meta = map[string]string{} node.Meta["rack"] = fmt.Sprintf("r%d", i%racks) - cpuShares := 14000 memoryMB := 32000 diskMB := 100 * 1024 node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: int64(cpuShares), + Processors: structs.NodeProcessorResources{ + Topology: structs.MockBasicTopology(), }, Memory: structs.NodeMemoryResources{ MemoryMB: int64(memoryMB), @@ -196,6 +195,7 @@ func upsertNodes(h *scheduler.Harness, count, racks int) { }, }, } + node.NodeResources.Compatibility() err := h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node) if err != nil { diff --git a/scheduler/context_test.go b/scheduler/context_test.go index 4df305e96f98..07b13fa9a07f 100644 --- a/scheduler/context_test.go +++ b/scheduler/context_test.go @@ -7,11 +7,15 @@ import ( "testing" "github.com/hashicorp/nomad/ci" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/testlog" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/state" "github.com/hashicorp/nomad/nomad/structs" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" ) @@ -30,19 +34,63 @@ func testContext(t testing.TB) (*state.StateStore, *EvalContext) { return state, ctx } +// cpuResources creates both the legacy and modern structs concerning cpu +// metrics used for resource accounting +// +// only creates a trivial single node, single core system for the sake of +// compatibility with existing tests +func cpuResources(shares int) (structs.LegacyNodeCpuResources, structs.NodeProcessorResources) { + n := &structs.NodeResources{ + Processors: structs.NodeProcessorResources{ + Topology: &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: []numalib.Core{{ + SocketID: 0, + NodeID: 0, + ID: 0, + Grade: numalib.Performance, + Disable: false, + BaseSpeed: hw.MHz(shares), + }}, + }, + }, + } + + // polyfill the legacy struct + n.Compatibility() + + return n.Cpu, n.Processors +} + +func cpuResourcesFrom(top *numalib.Topology) (structs.LegacyNodeCpuResources, structs.NodeProcessorResources) { + n := &structs.NodeResources{ + Processors: structs.NodeProcessorResources{ + Topology: top, + }, + } + + // polyfill the legacy struct + n.Compatibility() + + return n.Cpu, n.Processors +} + func TestEvalContext_ProposedAlloc(t *testing.T) { ci.Parallel(t) state, ctx := testContext(t) + + legacyCpuResources, processorResources := cpuResources(2048) + nodes := []*RankedNode{ { Node: &structs.Node{ // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -54,9 +102,8 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -163,14 +210,16 @@ func TestEvalContext_ProposedAlloc(t *testing.T) { func TestEvalContext_ProposedAlloc_EvictPreempt(t *testing.T) { ci.Parallel(t) state, ctx := testContext(t) + + legacyCpuResources, processorResources := cpuResources(3 * 1024) + nodes := []*RankedNode{ { Node: &structs.Node{ ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 1024 * 3, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 1024 * 3, }, @@ -425,20 +474,20 @@ func TestPortCollisionEvent_Copy(t *testing.T) { // Copy must be equal evCopy := ev.Copy() - require.Equal(t, ev, evCopy) + must.Eq(t, ev, evCopy) // Modifying the copy should not affect the original value evCopy.Reason = "copy" - require.NotEqual(t, ev.Reason, evCopy.Reason) + must.NotEq(t, ev.Reason, evCopy.Reason) evCopy.Node.Attributes["test"] = "true" - require.NotEqual(t, ev.Node, evCopy.Node) + must.NotEq(t, ev.Node, evCopy.Node) evCopy.Allocations = append(evCopy.Allocations, mock.Alloc()) - require.NotEqual(t, ev.Allocations, evCopy.Allocations) + must.NotEq(t, ev.Allocations, evCopy.Allocations) evCopy.NetIndex.AddAllocs(evCopy.Allocations) - require.NotEqual(t, ev.NetIndex, evCopy.NetIndex) + must.NotEq(t, ev.NetIndex, evCopy.NetIndex) } func TestPortCollisionEvent_Sanitize(t *testing.T) { diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index e6769da15b8c..24e3d6f2b075 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -1673,16 +1673,19 @@ func TestServiceSched_Plan_Partial_Progress(t *testing.T) { h := NewHarness(t) - // Create a node + // Create a node of limited resources + legacyCpuResources4000, processorResources4000 := cpuResources(4000) node := mock.Node() - require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)) + node.NodeResources.Processors = processorResources4000 + node.NodeResources.Cpu = legacyCpuResources4000 + must.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)) // Create a job with a high resource ask so that all the allocations can't // be placed on a single node. job := mock.Job() job.TaskGroups[0].Count = 3 job.TaskGroups[0].Tasks[0].Resources.CPU = 3600 - require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) + must.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, job)) // Create a mock evaluation to register the job eval := &structs.Evaluation{ @@ -1694,33 +1697,24 @@ func TestServiceSched_Plan_Partial_Progress(t *testing.T) { Status: structs.EvalStatusPending, } - require.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) + must.NoError(t, h.State.UpsertEvals(structs.MsgTypeTestSetup, h.NextIndex(), []*structs.Evaluation{eval})) // Process the evaluation - err := h.Process(NewServiceScheduler, eval) - if err != nil { - t.Fatalf("err: %v", err) - } + must.NoError(t, h.Process(NewServiceScheduler, eval)) // Ensure a single plan - if len(h.Plans) != 1 { - t.Fatalf("bad: %#v", h.Plans) - } + must.SliceLen(t, 1, h.Plans) plan := h.Plans[0] // Ensure the plan doesn't have annotations. - if plan.Annotations != nil { - t.Fatalf("expected no annotations") - } + must.Nil(t, plan.Annotations) // Ensure the plan allocated var planned []*structs.Allocation for _, allocList := range plan.NodeAllocation { planned = append(planned, allocList...) } - if len(planned) != 1 { - t.Fatalf("bad: %#v", plan) - } + must.SliceLen(t, 1, planned) // Lookup the allocations by JobID ws := memdb.NewWatchSet() @@ -1728,14 +1722,11 @@ func TestServiceSched_Plan_Partial_Progress(t *testing.T) { require.NoError(t, err) // Ensure only one allocations placed - if len(out) != 1 { - t.Fatalf("bad: %#v", out) - } + must.SliceLen(t, 1, out) + // Ensure 2 queued queued := h.Evals[0].QueuedAllocations["web"] - if queued != 2 { - t.Fatalf("expected: %v, actual: %v", 2, queued) - } + must.Eq(t, 2, queued, must.Sprintf("exp: 2, got: %#v", h.Evals[0].QueuedAllocations)) h.AssertEvalStatus(t, structs.EvalStatusComplete) } @@ -5249,7 +5240,7 @@ func TestGenericSched_AllocFit_Lifecycle(t *testing.T) { testCases := []struct { Name string - NodeCpu int64 + NodeCpu int TaskResources structs.Resources MainTaskCount int InitTaskCount int @@ -5320,8 +5311,11 @@ func TestGenericSched_AllocFit_Lifecycle(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.Name, func(t *testing.T) { h := NewHarness(t) + + legacyCpuResources, processorResources := cpuResources(testCase.NodeCpu) node := mock.Node() - node.NodeResources.Cpu.CpuShares = testCase.NodeCpu + node.NodeResources.Processors = processorResources + node.NodeResources.Cpu = legacyCpuResources require.NoError(t, h.State.UpsertNode(structs.MsgTypeTestSetup, h.NextIndex(), node)) // Create a job with sidecar & init tasks @@ -5956,14 +5950,15 @@ func TestServiceSched_Preemption(t *testing.T) { require := require.New(t) h := NewHarness(t) + legacyCpuResources, processorResources := cpuResources(1000) + // Create a node node := mock.Node() node.Resources = nil node.ReservedResources = nil node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 1000, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, diff --git a/scheduler/numa_ce.go b/scheduler/numa_ce.go new file mode 100644 index 000000000000..2957f24f0382 --- /dev/null +++ b/scheduler/numa_ce.go @@ -0,0 +1,33 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: BUSL-1.1 + +//go:build !ent + +package scheduler + +import ( + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" + "github.com/hashicorp/nomad/helper" + "github.com/hashicorp/nomad/nomad/structs" +) + +type coreSelector struct { + topology *numalib.Topology + availableCores *idset.Set[hw.CoreID] +} + +// Select returns a set of CoreIDs that satisfy the requested core reservations, +// as well as the amount of CPU bandwidth represented by those specific cores. +// +// NUMA preference is available in ent only. +func (cs *coreSelector) Select(ask *structs.Resources) ([]uint16, hw.MHz) { + cores := cs.availableCores.Slice()[0:ask.Cores] + mhz := hw.MHz(0) + for _, core := range cores { + mhz += cs.topology.Cores[core].MHz() + } + ids := helper.ConvertSlice(cores, func(id hw.CoreID) uint16 { return uint16(id) }) + return ids, mhz +} diff --git a/scheduler/preemption.go b/scheduler/preemption.go index ec366b23501e..709f5d770264 100644 --- a/scheduler/preemption.go +++ b/scheduler/preemption.go @@ -136,10 +136,10 @@ func NewPreemptor(jobPriority int, ctx Context, jobID *structs.NamespacedID) *Pr // SetNode sets the node func (p *Preemptor) SetNode(node *structs.Node) { - nodeRemainingResources := node.ComparableResources() + nodeRemainingResources := node.NodeResources.Comparable() // Subtract the reserved resources of the node - if c := node.ComparableReservedResources(); c != nil { + if c := node.ReservedResources.Comparable(); c != nil { nodeRemainingResources.Subtract(c) } p.nodeRemainingResources = nodeRemainingResources @@ -161,7 +161,7 @@ func (p *Preemptor) SetCandidates(allocs []*structs.Allocation) { if tg != nil && tg.Migrate != nil { maxParallel = tg.Migrate.MaxParallel } - p.allocDetails[alloc.ID] = &allocInfo{maxParallel: maxParallel, resources: alloc.ComparableResources()} + p.allocDetails[alloc.ID] = &allocInfo{maxParallel: maxParallel, resources: alloc.AllocatedResources.Comparable()} p.currentAllocs = append(p.currentAllocs, alloc) } } diff --git a/scheduler/preemption_test.go b/scheduler/preemption_test.go index df8d043e0fd3..265df4fb94ff 100644 --- a/scheduler/preemption_test.go +++ b/scheduler/preemption_test.go @@ -177,10 +177,11 @@ func TestPreemption(t *testing.T) { deviceIDs = append(deviceIDs, "dev"+strconv.Itoa(i)) } + legacyCpuResources, processorResources := cpuResources(4000) + defaultNodeResources := &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4000, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 8192, }, @@ -459,10 +460,10 @@ func TestPreemption(t *testing.T) { }, nodeReservedCapacity: reservedNodeResources, // This test sets up a node with two NICs + nodeCapacity: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4000, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 8192, }, @@ -1402,12 +1403,13 @@ func TestPreemptionMultiple(t *testing.T) { // All low priority allocs should preempted to accomodate the high priority job h := NewHarness(t) + legacyCpuResources, processorResources := cpuResources(4000) + // node with 4 GPUs node := mock.Node() node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4000, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 8192, }, diff --git a/scheduler/rank.go b/scheduler/rank.go index 68a38a8e42af..782b02d632f0 100644 --- a/scheduler/rank.go +++ b/scheduler/rank.go @@ -7,8 +7,8 @@ import ( "fmt" "math" - "github.com/hashicorp/nomad/lib/cpuset" - + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/nomad/structs" ) @@ -55,8 +55,10 @@ func (r *RankedNode) ProposedAllocs(ctx Context) ([]*structs.Allocation, error) return p, nil } -func (r *RankedNode) SetTaskResources(task *structs.Task, - resource *structs.AllocatedTaskResources) { +func (r *RankedNode) SetTaskResources( + task *structs.Task, + resource *structs.AllocatedTaskResources, +) { if r.TaskResources == nil { r.TaskResources = make(map[string]*structs.AllocatedTaskResources) r.TaskLifecycles = make(map[string]*structs.TaskLifecycleConfig) @@ -467,36 +469,49 @@ OUTER: } } - // Check if we need to allocate any reserved cores - if task.Resources.Cores > 0 { - // set of reservable CPUs for the node - nodeCPUSet := cpuset.New(option.Node.NodeResources.Cpu.ReservableCpuCores...) - // set of all reserved CPUs on the node - allocatedCPUSet := cpuset.New() - for _, alloc := range proposed { - allocatedCPUSet = allocatedCPUSet.Union(cpuset.New(alloc.ComparableResources().Flattened.Cpu.ReservedCores...)) + // Handle CPU core reservations + if wantedCores := task.Resources.Cores; wantedCores > 0 { + // set of cores on this node allowable for use by nomad + nodeCores := option.Node.NodeResources.Processors.Topology.UsableCores() + + // set of consumed cores on this node + consumedCores := idset.Empty[hw.CoreID]() + for _, alloc := range proposed { // proposed is existing + proposal + allocCores := alloc.AllocatedResources.Comparable().Flattened.Cpu.ReservedCores + idset.InsertSlice(consumedCores, allocCores...) } - // add any cores that were reserved for other tasks + // add cores reserved for other tasks for _, tr := range total.Tasks { - allocatedCPUSet = allocatedCPUSet.Union(cpuset.New(tr.Cpu.ReservedCores...)) + taskCores := tr.Cpu.ReservedCores + idset.InsertSlice(consumedCores, taskCores...) } - // set of CPUs not yet reserved on the node - availableCPUSet := nodeCPUSet.Difference(allocatedCPUSet) + // usable cores not yet reserved on this node + availableCores := nodeCores.Difference(consumedCores) - // If not enough cores are available mark the node as exhausted - if availableCPUSet.Size() < task.Resources.Cores { - // TODO preemption + // mark the node as exhausted if not enough cores available + if availableCores.Size() < wantedCores { iter.ctx.Metrics().ExhaustedNode(option.Node, "cores") continue OUTER } - // Set the task's reserved cores - taskResources.Cpu.ReservedCores = availableCPUSet.ToSlice()[0:task.Resources.Cores] - // Total CPU usage on the node is still tracked by CPUShares. Even though the task will have the entire - // core reserved, we still track overall usage by cpu shares. - taskResources.Cpu.CpuShares = option.Node.NodeResources.Cpu.SharesPerCore() * int64(task.Resources.Cores) + // set the task's reserved cores + cores, bandwidth := (&coreSelector{ + topology: option.Node.NodeResources.Processors.Topology, + availableCores: availableCores, + }).Select(task.Resources) + + // mark the node as exhausted if not enough cores available given + // the NUMA preference + if cores == nil { + iter.ctx.Metrics().ExhaustedNode(option.Node, "numa-cores") + continue OUTER + } + + // set the cores and bandwidth consumed by the task + taskResources.Cpu.ReservedCores = cores + taskResources.Cpu.CpuShares = int64(bandwidth) } // Store the task resource diff --git a/scheduler/rank_test.go b/scheduler/rank_test.go index 9118da48b2d1..efb0a8a2fdbe 100644 --- a/scheduler/rank_test.go +++ b/scheduler/rank_test.go @@ -7,6 +7,9 @@ import ( "sort" "testing" + "github.com/hashicorp/nomad/client/lib/idset" + "github.com/hashicorp/nomad/client/lib/numalib" + "github.com/hashicorp/nomad/client/lib/numalib/hw" "github.com/hashicorp/nomad/helper/uuid" "github.com/hashicorp/nomad/nomad/mock" "github.com/hashicorp/nomad/nomad/structs" @@ -34,16 +37,22 @@ func TestFeasibleRankIterator(t *testing.T) { } } +var ( + legacyCpuResources1024, processorResources1024 = cpuResources(1024) + legacyCpuResources2048, processorResources2048 = cpuResources(2048) + legacyCpuResources4096, processorResources4096 = cpuResources(4096) +) + func TestBinPackIterator_NoExistingAlloc(t *testing.T) { _, ctx := testContext(t) + nodes := []*RankedNode{ { Node: &structs.Node{ // Perfect fit NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -62,9 +71,8 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) { Node: &structs.Node{ // Overloaded NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 1024, - }, + Processors: processorResources1024, + Cpu: legacyCpuResources1024, Memory: structs.NodeMemoryResources{ MemoryMB: 1024, }, @@ -83,9 +91,8 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) { Node: &structs.Node{ // 50% fit NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -142,15 +149,19 @@ func TestBinPackIterator_NoExistingAlloc(t *testing.T) { // resources. func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { _, ctx := testContext(t) + + legacyCpuResources900, processorResources900 := cpuResources(900) + legacyCpuResources1100, processorResources1100 := cpuResources(1100) + legacyCpuResources2000, processorResources2000 := cpuResources(2000) + nodes := []*RankedNode{ { // Best fit Node: &structs.Node{ Name: "no-reserved", NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 1100, - }, + Processors: processorResources1100, + Cpu: legacyCpuResources1100, Memory: structs.NodeMemoryResources{ MemoryMB: 1100, }, @@ -162,9 +173,8 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { Node: &structs.Node{ Name: "reserved", NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2000, - }, + Processors: processorResources2000, + Cpu: legacyCpuResources2000, Memory: structs.NodeMemoryResources{ MemoryMB: 2000, }, @@ -184,9 +194,8 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { Node: &structs.Node{ Name: "reserved2", NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2000, - }, + Processors: processorResources2000, + Cpu: legacyCpuResources2000, Memory: structs.NodeMemoryResources{ MemoryMB: 2000, }, @@ -205,9 +214,8 @@ func TestBinPackIterator_NoExistingAlloc_MixedReserve(t *testing.T) { Node: &structs.Node{ Name: "overloaded", NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 900, - }, + Processors: processorResources900, + Cpu: legacyCpuResources900, Memory: structs.NodeMemoryResources{ MemoryMB: 900, }, @@ -263,9 +271,8 @@ func TestBinPackIterator_Network_Success(t *testing.T) { Node: &structs.Node{ // Perfect fit NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -295,9 +302,8 @@ func TestBinPackIterator_Network_Success(t *testing.T) { Node: &structs.Node{ // 50% fit NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -392,9 +398,8 @@ func TestBinPackIterator_Network_Failure(t *testing.T) { Node: &structs.Node{ // 50% fit NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -520,9 +525,8 @@ func TestBinPackIterator_Network_NoCollision_Node(t *testing.T) { }, }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -615,9 +619,8 @@ func TestBinPackIterator_Network_NodeError(t *testing.T) { }, }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -703,9 +706,8 @@ func TestBinPackIterator_Network_PortCollision_Alloc(t *testing.T) { Node: &structs.Node{ ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -830,9 +832,8 @@ func TestBinPackIterator_Network_Interpolation_Success(t *testing.T) { "some_network": "public", }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -877,9 +878,8 @@ func TestBinPackIterator_Network_Interpolation_Success(t *testing.T) { "some_network": "second", }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -984,9 +984,8 @@ func TestBinPackIterator_Host_Network_Interpolation_Absent_Value(t *testing.T) { "some_network": "public", }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -1086,9 +1085,8 @@ func TestBinPackIterator_Host_Network_Interpolation_Interface_Not_Exists(t *test "some_network": "absent", }, NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 4096, - }, + Processors: processorResources4096, + Cpu: legacyCpuResources4096, Memory: structs.NodeMemoryResources{ MemoryMB: 4096, }, @@ -1184,9 +1182,8 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1198,9 +1195,8 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1281,17 +1277,30 @@ func TestBinPackIterator_PlannedAlloc(t *testing.T) { func TestBinPackIterator_ReservedCores(t *testing.T) { state, ctx := testContext(t) + + topology := &numalib.Topology{ + NodeIDs: idset.From[hw.NodeID]([]hw.NodeID{0}), + Distances: numalib.SLIT{[]numalib.Cost{10}}, + Cores: []numalib.Core{{ + ID: 0, + Grade: numalib.Performance, + BaseSpeed: 1024, + }, { + ID: 1, + Grade: numalib.Performance, + BaseSpeed: 1024, + }}, + } + legacyCpuResources, processorResources := cpuResourcesFrom(topology) + nodes := []*RankedNode{ { Node: &structs.Node{ // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - TotalCpuCores: 2, - ReservableCpuCores: []uint16{0, 1}, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1303,11 +1312,8 @@ func TestBinPackIterator_ReservedCores(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - TotalCpuCores: 2, - ReservableCpuCores: []uint16{0, 1}, - }, + Processors: processorResources, + Cpu: legacyCpuResources, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1404,9 +1410,8 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1418,9 +1423,8 @@ func TestBinPackIterator_ExistingAlloc(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1520,9 +1524,8 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, @@ -1534,9 +1537,8 @@ func TestBinPackIterator_ExistingAlloc_PlannedEvict(t *testing.T) { // Perfect fit ID: uuid.Generate(), NodeResources: &structs.NodeResources{ - Cpu: structs.NodeCpuResources{ - CpuShares: 2048, - }, + Processors: processorResources2048, + Cpu: legacyCpuResources2048, Memory: structs.NodeMemoryResources{ MemoryMB: 2048, }, diff --git a/scheduler/scheduler_sysbatch_test.go b/scheduler/scheduler_sysbatch_test.go index f1918d3bd48a..7c14fa10f47e 100644 --- a/scheduler/scheduler_sysbatch_test.go +++ b/scheduler/scheduler_sysbatch_test.go @@ -1432,6 +1432,8 @@ func TestSysBatch_Preemption(t *testing.T) { h := NewHarness(t) + legacyCpuResources, processorResources := cpuResources(3072) + // Create nodes nodes := make([]*structs.Node, 0) for i := 0; i < 2; i++ { @@ -1448,9 +1450,10 @@ func TestSysBatch_Preemption(t *testing.T) { }}, } node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 3072}, - Memory: structs.NodeMemoryResources{MemoryMB: 5034}, - Disk: structs.NodeDiskResources{DiskMB: 20 * 1024}, + Processors: processorResources, + Cpu: legacyCpuResources, + Memory: structs.NodeMemoryResources{MemoryMB: 5034}, + Disk: structs.NodeDiskResources{DiskMB: 20 * 1024}, Networks: []*structs.NetworkResource{{ Device: "eth0", CIDR: "192.168.0.100/32", diff --git a/scheduler/scheduler_system_test.go b/scheduler/scheduler_system_test.go index e3d3ff748144..62ddb780a70b 100644 --- a/scheduler/scheduler_system_test.go +++ b/scheduler/scheduler_system_test.go @@ -259,7 +259,7 @@ func TestSystemSched_ExhaustResources(t *testing.T) { // Create a service job which consumes most of the system resources svcJob := mock.Job() svcJob.TaskGroups[0].Count = 1 - svcJob.TaskGroups[0].Tasks[0].Resources.CPU = 3600 + svcJob.TaskGroups[0].Tasks[0].Resources.CPU = 13500 // mock.Node() has 14k require.NoError(t, h.State.UpsertJob(structs.MsgTypeTestSetup, h.NextIndex(), nil, svcJob)) // Create a mock evaluation to register the job @@ -1857,6 +1857,8 @@ func TestSystemSched_Preemption(t *testing.T) { h := NewHarness(t) + legacyCpuResources, processorResources := cpuResources(3072) + // Create nodes nodes := make([]*structs.Node, 0) for i := 0; i < 2; i++ { @@ -1873,9 +1875,10 @@ func TestSystemSched_Preemption(t *testing.T) { }}, } node.NodeResources = &structs.NodeResources{ - Cpu: structs.NodeCpuResources{CpuShares: 3072}, - Memory: structs.NodeMemoryResources{MemoryMB: 5034}, - Disk: structs.NodeDiskResources{DiskMB: 20 * 1024}, + Processors: processorResources, + Cpu: legacyCpuResources, + Memory: structs.NodeMemoryResources{MemoryMB: 5034}, + Disk: structs.NodeDiskResources{DiskMB: 20 * 1024}, Networks: []*structs.NetworkResource{{ Device: "eth0", CIDR: "192.168.0.100/32", diff --git a/scheduler/util.go b/scheduler/util.go index 6f84ce5f5755..0dade6571efc 100644 --- a/scheduler/util.go +++ b/scheduler/util.go @@ -360,6 +360,8 @@ func nonNetworkResourcesUpdated(a, b *structs.Resources) comparison { return difference("task memory max", a.MemoryMaxMB, b.MemoryMaxMB) case !a.Devices.Equal(&b.Devices): return difference("task devices", a.Devices, b.Devices) + case !a.NUMA.Equal(b.NUMA): + return difference("numa", a.NUMA, b.NUMA) } return same } diff --git a/scheduler/util_test.go b/scheduler/util_test.go index 1378f20308b7..a7049333845b 100644 --- a/scheduler/util_test.go +++ b/scheduler/util_test.go @@ -914,7 +914,7 @@ func TestInplaceUpdate_NoMatch(t *testing.T) { // Create a new task group that requires too much resources. tg := &structs.TaskGroup{} *tg = *job.TaskGroups[0] - resource := &structs.Resources{CPU: 9999} + resource := &structs.Resources{CPU: 99999} tg.Tasks[0].Resources = resource updates := []allocTuple{{Alloc: alloc, TaskGroup: tg}} @@ -1190,6 +1190,25 @@ func TestTasksUpdated_Identity(t *testing.T) { must.True(t, tasksUpdated(j1, j2, name).modified) } +func TestTasksUpdated_NUMA(t *testing.T) { + ci.Parallel(t) + + j1 := mock.Job() + name := j1.TaskGroups[0].Name + + j1.TaskGroups[0].Tasks[0].Resources.NUMA = &structs.NUMA{ + Affinity: "none", + } + + j2 := j1.Copy() + + must.False(t, tasksUpdated(j1, j2, name).modified) + + j2.TaskGroups[0].Tasks[0].Resources.NUMA.Affinity = "require" + + must.True(t, tasksUpdated(j1, j2, name).modified) +} + func TestTaskGroupConstraints(t *testing.T) { ci.Parallel(t)