Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate with goresctrl #711

Merged
merged 3 commits into from
Sep 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ UI_ASSETS := $(shell for i in pkg/cri/resource-manager/visualizer/*; do \

# Right now we don't depend on libexec/%.o on purpose so make sure the file
# is always up-to-date when elf/avx512.c is changed.
GEN_TARGETS := pkg/avx/programbytes_gendata.go pkg/sysfs/sst_types_amd64.go pkg/sysfs/sst_types_priv.go
GEN_TARGETS := pkg/avx/programbytes_gendata.go

# Determine binary version and buildid, and versions for rpm, deb, and tar packages.
BUILD_VERSION := $(shell scripts/build/get-buildid --version --shell=no)
Expand Down
28 changes: 15 additions & 13 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,31 @@ go 1.14
require (
contrib.go.opencensus.io/exporter/jaeger v0.1.0
contrib.go.opencensus.io/exporter/prometheus v0.1.1-0.20191218042359-6151c48ac7fa
github.com/apache/thrift v0.13.0 // indirect
github.com/cilium/ebpf v0.0.0-20200702112145-1c8d4c9ef775
github.com/evanphx/json-patch v4.9.0+incompatible
github.com/golang/protobuf v1.4.3
github.com/google/go-cmp v0.4.0
github.com/hashicorp/go-multierror v1.0.0
github.com/evanphx/json-patch v4.11.0+incompatible
github.com/golang/protobuf v1.5.2
github.com/google/go-cmp v0.5.6
github.com/hashicorp/go-multierror v1.1.1
github.com/intel/cri-resource-manager/pkg/topology v0.0.0
github.com/intel/goresctrl v0.0.0-20210216084728-8e0ae5411bf4
github.com/intel/goresctrl v0.2.0
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.8.0
github.com/prometheus/client_golang v1.11.0
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.14.0
github.com/prometheus/common v0.30.0
github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect
github.com/shurcooL/vfsgen v0.0.0-20181202132449-6a9ea43bcacd
go.opencensus.io v0.22.2
golang.org/x/net v0.0.0-20200707034311-ab3426394381
golang.org/x/sys v0.0.0-20201015000850-e3ed0017c211
go.opencensus.io v0.22.4
go.uber.org/zap v1.13.0 // indirect
golang.org/x/net v0.0.0-20210525063256-abc453219eb5
golang.org/x/sys v0.0.0-20210903071746-97244b99971b
golang.org/x/time v0.0.0-20191024005414-555d28b269f0
google.golang.org/grpc v1.27.0
google.golang.org/grpc v1.31.0
k8s.io/api v0.19.4
k8s.io/apimachinery v0.19.4
k8s.io/apimachinery v0.22.1
k8s.io/client-go v0.19.4
k8s.io/cri-api v0.0.0
k8s.io/klog/v2 v2.4.0
k8s.io/klog/v2 v2.9.0
k8s.io/kubernetes v1.19.4
sigs.k8s.io/yaml v1.2.0
)
Expand Down
499 changes: 254 additions & 245 deletions go.sum

Large diffs are not rendered by default.

62 changes: 32 additions & 30 deletions pkg/cpuallocator/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
logger "github.com/intel/cri-resource-manager/pkg/log"
"github.com/intel/cri-resource-manager/pkg/sysfs"
"github.com/intel/cri-resource-manager/pkg/utils"
"github.com/intel/goresctrl/pkg/sst"
idset "github.com/intel/goresctrl/pkg/utils"
)

// AllocFlag represents CPU allocation preferences.
Expand Down Expand Up @@ -80,17 +82,17 @@ type cpuAllocator struct {

// topologyCache caches topology lookups
type topologyCache struct {
pkg map[sysfs.ID]cpuset.CPUSet
node map[sysfs.ID]cpuset.CPUSet
core map[sysfs.ID]cpuset.CPUSet
pkg map[idset.ID]cpuset.CPUSet
node map[idset.ID]cpuset.CPUSet
core map[idset.ID]cpuset.CPUSet

cpuPriorities cpuPriorities // CPU priority mapping
}

type cpuPriorities [NumCPUPriorities]cpuset.CPUSet

// IDFilter helps filtering Ids.
type IDFilter func(sysfs.ID) bool
type IDFilter func(idset.ID) bool

// IDSorter helps sorting Ids.
type IDSorter func(int, int) bool
Expand All @@ -110,8 +112,8 @@ func NewCPUAllocator(sys sysfs.System) CPUAllocator {
}

// Pick packages, nodes or CPUs by filtering according to a function.
func pickIds(idSlice []sysfs.ID, f IDFilter) []sysfs.ID {
ids := make([]sysfs.ID, len(idSlice))
func pickIds(idSlice []idset.ID, f IDFilter) []idset.ID {
ids := make([]idset.ID, len(idSlice))

idx := 0
for _, id := range idSlice {
Expand Down Expand Up @@ -144,7 +146,7 @@ func (a *allocatorHelper) takeIdlePackages() {

// pick idle packages
pkgs := pickIds(a.sys.PackageIDs(),
func(id sysfs.ID) bool {
func(id idset.ID) bool {
cset := a.topology.pkg[id].Difference(offline)
return cset.Intersection(a.from).Equals(cset)
})
Expand Down Expand Up @@ -185,7 +187,7 @@ func (a *allocatorHelper) takeIdleCores() {

// pick (first id for all) idle cores
cores := pickIds(a.sys.CPUIDs(),
func(id sysfs.ID) bool {
func(id idset.ID) bool {
cset := a.topology.core[id].Difference(offline)
if cset.IsEmpty() {
return false
Expand Down Expand Up @@ -227,7 +229,7 @@ func (a *allocatorHelper) takeIdleThreads() {

// pick all threads with free capacity
cores := pickIds(a.sys.CPUIDs(),
func(id sysfs.ID) bool {
func(id idset.ID) bool {
return a.from.Difference(offline).Contains(int(id))
})

Expand Down Expand Up @@ -385,9 +387,9 @@ func (ca *cpuAllocator) ReleaseCpus(from *cpuset.CPUSet, cnt int, prefer CPUPrio

func newTopologyCache(sys sysfs.System) topologyCache {
c := topologyCache{
pkg: make(map[sysfs.ID]cpuset.CPUSet),
node: make(map[sysfs.ID]cpuset.CPUSet),
core: make(map[sysfs.ID]cpuset.CPUSet)}
pkg: make(map[idset.ID]cpuset.CPUSet),
node: make(map[idset.ID]cpuset.CPUSet),
core: make(map[idset.ID]cpuset.CPUSet)}
if sys != nil {
for _, id := range sys.PackageIDs() {
c.pkg[id] = sys.Package(id).CPUSet()
Expand Down Expand Up @@ -421,20 +423,20 @@ func (c *topologyCache) discoverCPUPriorities(sys sysfs.System) {

for p, cpus := range cpuPriorities {
source := map[bool]string{true: "sst", false: "cpufreq"}[sstActive]
cset := sysfs.NewIDSet(cpus...).CPUSet()
cset := sysfs.CPUSetFromIDSet(idset.NewIDSet(cpus...))
log.Debug("package #%d (%s): %d %s priority cpus (%v)", id, source, len(cpus), CPUPriority(p), cset)
prio[p] = prio[p].Union(cset)
}
}
}

func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID sysfs.ID) ([NumCPUPriorities][]sysfs.ID, bool) {
func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID idset.ID) ([NumCPUPriorities][]idset.ID, bool) {
active := false

pkg := sys.Package(pkgID)
sst := pkg.SstInfo()
cpuIDs := c.pkg[pkgID].ToSlice()
prios := make(map[sysfs.ID]CPUPriority, len(cpuIDs))
prios := make(map[idset.ID]CPUPriority, len(cpuIDs))

// Determine SST-based priority. Based on experimentation there is some
// hierarchy between the SST features. Without trying to be too smart
Expand All @@ -448,7 +450,7 @@ func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID sysfs.ID)
log.Debug("package #%d: using SST-TF based CPU prioritization", pkgID)
// We only look at the CLOS id as SST-TF (seems to) follows ordered CLOS priority
for _, i := range cpuIDs {
id := sysfs.ID(i)
id := idset.ID(i)
p := PriorityLow
// First two CLOSes are prioritized by SST
if sys.CPU(id).SstClos() < 2 {
Expand All @@ -463,7 +465,7 @@ func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID sysfs.ID)

active = false
for _, i := range cpuIDs {
id := sysfs.ID(i)
id := idset.ID(i)
clos := sys.CPU(id).SstClos()
p := closPrio[clos]
if p != PriorityNormal {
Expand All @@ -476,7 +478,7 @@ func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID sysfs.ID)
if !active && sst.BFEnabled {
log.Debug("package #%d: using SST-BF based CPU prioritization", pkgID)
for _, i := range cpuIDs {
id := sysfs.ID(i)
id := idset.ID(i)
p := PriorityLow
if sst.BFCores.Has(id) {
p = PriorityHigh
Expand All @@ -486,15 +488,15 @@ func (c *topologyCache) discoverSstCPUPriority(sys sysfs.System, pkgID sysfs.ID)
active = true
}

var ret [NumCPUPriorities][]sysfs.ID
var ret [NumCPUPriorities][]idset.ID

for cpu, prio := range prios {
ret[prio] = append(ret[prio], cpu)
}
return ret, active
}

func (c *topologyCache) sstClosPriority(sys sysfs.System, pkgID sysfs.ID) map[int]CPUPriority {
func (c *topologyCache) sstClosPriority(sys sysfs.System, pkgID idset.ID) map[int]CPUPriority {
sortedKeys := func(m map[int]int) []int {
keys := make([]int, 0, len(m))
for k := range m {
Expand All @@ -505,21 +507,21 @@ func (c *topologyCache) sstClosPriority(sys sysfs.System, pkgID sysfs.ID) map[in
}

pkg := sys.Package(pkgID)
sst := pkg.SstInfo()
sstinfo := pkg.SstInfo()

// Get a list of unique CLOS proportional priority values
closPps := make(map[int]int)
closIds := make(map[int]int)
for _, cpuID := range c.pkg[pkgID].ToSlice() {
clos := sys.CPU(sysfs.ID(cpuID)).SstClos()
pp := sst.ClosInfo[clos].ProportionalPriority
clos := sys.CPU(idset.ID(cpuID)).SstClos()
pp := sstinfo.ClosInfo[clos].ProportionalPriority
closPps[pp] = clos
closIds[clos] = 0 // 0 is a dummy value here
}

// Form a list of (active) CLOS ids in sorted order
var closSorted []int
if sst.CPPriority == sysfs.Ordered {
if sstinfo.CPPriority == sst.Ordered {
// In ordered mode the priority is simply the CLOS id
closSorted = sortedKeys(closIds)
log.Debug("package #%d, ordered SST-CP priority with CLOS ids %v", pkgID, closSorted)
Expand Down Expand Up @@ -548,15 +550,15 @@ func (c *topologyCache) sstClosPriority(sys sysfs.System, pkgID sysfs.ID) map[in
return closPriority
}

func (c *topologyCache) discoverCpufreqPriority(sys sysfs.System, pkgID sysfs.ID) [NumCPUPriorities][]sysfs.ID {
var prios [NumCPUPriorities][]sysfs.ID
func (c *topologyCache) discoverCpufreqPriority(sys sysfs.System, pkgID idset.ID) [NumCPUPriorities][]idset.ID {
var prios [NumCPUPriorities][]idset.ID

// Group cpus by base frequency and energy performance profile
freqs := map[uint64][]sysfs.ID{}
epps := map[sysfs.EPP][]sysfs.ID{}
freqs := map[uint64][]idset.ID{}
epps := map[sysfs.EPP][]idset.ID{}
cpuIDs := c.pkg[pkgID].ToSlice()
for _, num := range cpuIDs {
id := sysfs.ID(num)
id := idset.ID(num)
cpu := sys.CPU(id)
bf := cpu.BaseFrequency()
freqs[bf] = append(freqs[bf], id)
Expand Down Expand Up @@ -584,7 +586,7 @@ func (c *topologyCache) discoverCpufreqPriority(sys sysfs.System, pkgID sysfs.ID

// Finally, determine priority of each CPU
for _, num := range cpuIDs {
id := sysfs.ID(num)
id := idset.ID(num)
cpu := sys.CPU(id)
p := PriorityNormal

Expand Down
6 changes: 3 additions & 3 deletions pkg/cri/resource-manager/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ import (
"github.com/intel/cri-resource-manager/pkg/cri/resource-manager/config"
"github.com/intel/cri-resource-manager/pkg/cri/resource-manager/kubernetes"
logger "github.com/intel/cri-resource-manager/pkg/log"
system "github.com/intel/cri-resource-manager/pkg/sysfs"
"github.com/intel/cri-resource-manager/pkg/topology"
idset "github.com/intel/goresctrl/pkg/utils"
)

const (
Expand Down Expand Up @@ -481,8 +481,8 @@ type Device struct {

// PageMigrate contains the policy/preferences for container page migration.
type PageMigrate struct {
SourceNodes system.IDSet // idle memory pages on these NUMA nodes
TargetNodes system.IDSet // should be migrated to these NUMA nodes
SourceNodes idset.IDSet // idle memory pages on these NUMA nodes
TargetNodes idset.IDSet // should be migrated to these NUMA nodes
}

// Clone creates a copy of the page migration policy/preferences.
Expand Down
18 changes: 9 additions & 9 deletions pkg/cri/resource-manager/control/page-migrate/demoter.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (

"github.com/intel/cri-resource-manager/pkg/cgroups"
"github.com/intel/cri-resource-manager/pkg/config"
system "github.com/intel/cri-resource-manager/pkg/sysfs"
idset "github.com/intel/goresctrl/pkg/utils"
)

// Support dynamic pushing of unused pages from DRAM to PMEM.
Expand Down Expand Up @@ -83,7 +83,7 @@ type pagePool struct {

type demotion struct {
pagePool pagePool
targetNodes system.IDSet
targetNodes idset.IDSet
}

func copyPagePool(p pagePool) pagePool {
Expand Down Expand Up @@ -137,7 +137,7 @@ func (d *demoter) Reconfigure() {
d.start()
}

func (d *demoter) updateDemoter(cid string, p pagePool, targetNodes system.IDSet) {
func (d *demoter) updateDemoter(cid string, p pagePool, targetNodes idset.IDSet) {
channel, found := d.containerDemoters[cid]
if !found {
channel := make(chan interface{})
Expand Down Expand Up @@ -306,7 +306,7 @@ func (d *demoter) scanPages() {
d.stopUnusedDemoters(d.migration.containers)
}

func (d *demoter) getPagesForContainer(c *container, sourceNodes system.IDSet) (pagePool, error) {
func (d *demoter) getPagesForContainer(c *container, sourceNodes idset.IDSet) (pagePool, error) {
pool := pagePool{
pages: make(map[int][]page, 0),
longestRange: 0,
Expand Down Expand Up @@ -463,15 +463,15 @@ func (d *demoter) getPagesForContainer(c *container, sourceNodes system.IDSet) (
return pool, nil
}

func pickClosestPMEMNode(currentNode system.ID, targetNodes system.IDSet) system.ID {
func pickClosestPMEMNode(currentNode idset.ID, targetNodes idset.IDSet) idset.ID {
// TODO: analyze the topology information (and possibly the amount of free memory) and choose the "best"
// PMEM node to demote the page to. The array targetNodes already contains only the subset of PMEM nodes
// available in this topology subtree. Right now just pick a random controller.
nodes := targetNodes.Members()
return nodes[rand.Intn(len(nodes))]
}

func (d *demoter) movePagesForPid(p []page, count uint, pid int, targetNodes system.IDSet) (uint, error) {
func (d *demoter) movePagesForPid(p []page, count uint, pid int, targetNodes idset.IDSet) (uint, error) {
// We move at max count pages, but there might not be that much.
nPages := count
if uint(len(p)) < count {
Expand Down Expand Up @@ -507,10 +507,10 @@ func (d *demoter) movePagesForPid(p []page, count uint, pid int, targetNodes sys
continue
}
// log.Debug("page 0x%08X: old status %d", pages[i], pageStatus)
if !targetNodes.Has(system.ID(pageStatus)) {
if !targetNodes.Has(idset.ID(pageStatus)) {
// In case of many PMEM controllers choose the one that is the closest.
dramPages = append(dramPages, pages[i])
nodes = append(nodes, int(pickClosestPMEMNode(system.ID(pageStatus), targetNodes)))
nodes = append(nodes, int(pickClosestPMEMNode(idset.ID(pageStatus), targetNodes)))
} // else no need to move.
}

Expand All @@ -521,7 +521,7 @@ func (d *demoter) movePagesForPid(p []page, count uint, pid int, targetNodes sys
return nPages, err
}

func (d *demoter) movePages(p pagePool, count uint, targetNodes system.IDSet) error {
func (d *demoter) movePages(p pagePool, count uint, targetNodes idset.IDSet) error {
// Select pid for moving the pages so that the process with the largest number
// of non-dirty pages gets the pages moved first.
processedPids := make(map[int]bool, 0)
Expand Down
Loading