From 045a43d184735d67d302c55a8d04b13ed928efcc Mon Sep 17 00:00:00 2001 From: Francesco Romani Date: Wed, 10 Apr 2024 11:58:48 +0200 Subject: [PATCH] nrt: logr: add support for logr Move to the logr.Logger interface everywhere, instead of the global `klog` instance. This enable named logger, presetting values for simple and automatic consistency, enables pluggable loggers and comes for free since we already depend on the logr package and klog has a native logr integration. In addition, add minimal support to make it easy to replace the logr reference, to help integrators of this code. The default is still (and will still be) klog for backward compatibility and ecosystem integration. Signed-off-by: Francesco Romani --- .../cache/discardreserved.go | 12 ++- .../cache/discardreserved_test.go | 3 +- .../cache/foreign_pods.go | 17 +-- .../cache/foreign_pods_test.go | 3 +- pkg/noderesourcetopology/cache/overreserve.go | 102 ++++++++++-------- .../cache/overreserve_test.go | 39 +++---- pkg/noderesourcetopology/cache/passthrough.go | 10 +- .../cache/passthrough_test.go | 3 +- pkg/noderesourcetopology/cache/store.go | 36 ++++--- pkg/noderesourcetopology/cache/store_test.go | 19 ++-- pkg/noderesourcetopology/config.go | 15 ++- pkg/noderesourcetopology/config_test.go | 5 +- pkg/noderesourcetopology/filter.go | 77 ++++++------- pkg/noderesourcetopology/filter_test.go | 7 +- pkg/noderesourcetopology/least_numa.go | 47 ++++---- pkg/noderesourcetopology/least_numa_test.go | 5 +- pkg/noderesourcetopology/logging.go | 49 --------- pkg/noderesourcetopology/logging/logging.go | 77 +++++++++++++ pkg/noderesourcetopology/plugin.go | 16 ++- pkg/noderesourcetopology/pluginhelpers.go | 50 +++++---- .../pluginhelpers_test.go | 3 +- .../podprovider/podprovider.go | 22 ++-- pkg/noderesourcetopology/postbind.go | 5 + pkg/noderesourcetopology/reserve.go | 9 ++ pkg/noderesourcetopology/score.go | 51 +++++---- pkg/noderesourcetopology/score_test.go | 7 +- .../stringify/stringify.go | 27 ++++- .../stringify/stringify_test.go | 2 +- 28 files changed, 412 insertions(+), 306 deletions(-) delete mode 100644 pkg/noderesourcetopology/logging.go create mode 100644 pkg/noderesourcetopology/logging/logging.go diff --git a/pkg/noderesourcetopology/cache/discardreserved.go b/pkg/noderesourcetopology/cache/discardreserved.go index 0807d3ba3..5ba908877 100644 --- a/pkg/noderesourcetopology/cache/discardreserved.go +++ b/pkg/noderesourcetopology/cache/discardreserved.go @@ -20,6 +20,7 @@ import ( "context" "sync" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" corev1 "k8s.io/api/core/v1" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) // DiscardReserved is intended to solve similiar problem as Overreserve Cache, @@ -45,12 +47,14 @@ type DiscardReserved struct { rMutex sync.RWMutex reservationMap map[string]map[types.UID]bool // Key is NodeName, value is Pod UID : reserved status client ctrlclient.Client + lh logr.Logger } -func NewDiscardReserved(client ctrlclient.Client) Interface { +func NewDiscardReserved(lh logr.Logger, client ctrlclient.Client) Interface { return &DiscardReserved{ client: client, reservationMap: make(map[string]map[types.UID]bool), + lh: lh, } } @@ -74,7 +78,7 @@ func (pt *DiscardReserved) NodeMaybeOverReserved(nodeName string, pod *corev1.Po func (pt *DiscardReserved) NodeHasForeignPods(nodeName string, pod *corev1.Pod) {} func (pt *DiscardReserved) ReserveNodeResources(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT Reserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT Reserve", "logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) pt.rMutex.Lock() defer pt.rMutex.Unlock() @@ -85,14 +89,14 @@ func (pt *DiscardReserved) ReserveNodeResources(nodeName string, pod *corev1.Pod } func (pt *DiscardReserved) UnreserveNodeResources(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT Unreserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT Unreserve", "logID", klog.KObj(pod), "podUID", pod.GetUID(), "node", nodeName) pt.removeReservationForNode(nodeName, pod) } // PostBind is invoked to cleanup reservationMap func (pt *DiscardReserved) PostBind(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT PostBind", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT PostBind", "logID", klog.KObj(pod), "podUID", pod.GetUID(), "node", nodeName) pt.removeReservationForNode(nodeName, pod) } diff --git a/pkg/noderesourcetopology/cache/discardreserved_test.go b/pkg/noderesourcetopology/cache/discardreserved_test.go index 3eac547e5..df383443d 100644 --- a/pkg/noderesourcetopology/cache/discardreserved_test.go +++ b/pkg/noderesourcetopology/cache/discardreserved_test.go @@ -26,6 +26,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -50,7 +51,7 @@ func TestDiscardReservedNodesGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, _ podlisterv1.PodLister) (Interface, error) { - return NewDiscardReserved(client), nil + return NewDiscardReserved(klog.Background(), client), nil }, testCases..., ) diff --git a/pkg/noderesourcetopology/cache/foreign_pods.go b/pkg/noderesourcetopology/cache/foreign_pods.go index 68956e7ed..b5cf7fab3 100644 --- a/pkg/noderesourcetopology/cache/foreign_pods.go +++ b/pkg/noderesourcetopology/cache/foreign_pods.go @@ -17,11 +17,14 @@ limitations under the License. package cache import ( + "fmt" + + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" ) @@ -35,11 +38,11 @@ var ( onlyExclusiveResources = false ) -func SetupForeignPodsDetector(schedProfileName string, podInformer k8scache.SharedInformer, cc Interface) { +func SetupForeignPodsDetector(lh logr.Logger, schedProfileName string, podInformer k8scache.SharedInformer, cc Interface) { foreignCache := func(obj interface{}) { pod, ok := obj.(*corev1.Pod) if !ok { - klog.V(3).InfoS("nrtcache: foreign: unsupported object %T", obj) + lh.V(3).Info("unsupported object", "kind", fmt.Sprintf("%T", obj)) return } if !IsForeignPod(pod) { @@ -47,7 +50,7 @@ func SetupForeignPodsDetector(schedProfileName string, podInformer k8scache.Shar } cc.NodeHasForeignPods(pod.Spec.NodeName, pod) - klog.V(6).InfoS("nrtcache: has foreign pods", "logID", klog.KObj(pod), "node", pod.Spec.NodeName, "podUID", pod.UID) + lh.V(6).Info("detected foreign pods", "logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", pod.Spec.NodeName) } podInformer.AddEventHandler(k8scache.ResourceEventHandlerFuncs{ @@ -67,11 +70,11 @@ func TrackAllForeignPods() { onlyExclusiveResources = false } -func RegisterSchedulerProfileName(schedProfileName string) { - klog.InfoS("nrtcache: setting up foreign pod detection", "profile", schedProfileName) +func RegisterSchedulerProfileName(lh logr.Logger, schedProfileName string) { + lh.Info("setting up detection", "profile", schedProfileName) schedProfileNames.Insert(schedProfileName) - klog.V(5).InfoS("nrtcache: registered scheduler profiles", "names", schedProfileNames.List()) + lh.V(5).Info("registered scheduler profiles", "names", schedProfileNames.List()) } func IsForeignPod(pod *corev1.Pod) bool { diff --git a/pkg/noderesourcetopology/cache/foreign_pods_test.go b/pkg/noderesourcetopology/cache/foreign_pods_test.go index a0333b377..f4338e388 100644 --- a/pkg/noderesourcetopology/cache/foreign_pods_test.go +++ b/pkg/noderesourcetopology/cache/foreign_pods_test.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" ) func TestIsForeignPod(t *testing.T) { @@ -195,7 +196,7 @@ func TestIsForeignPod(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, profileName := range tt.profileNames { - RegisterSchedulerProfileName(profileName) + RegisterSchedulerProfileName(klog.Background(), profileName) } defer CleanRegisteredSchedulerProfileNames() diff --git a/pkg/noderesourcetopology/cache/overreserve.go b/pkg/noderesourcetopology/cache/overreserve.go index 3c373e9f2..37283fda0 100644 --- a/pkg/noderesourcetopology/cache/overreserve.go +++ b/pkg/noderesourcetopology/cache/overreserve.go @@ -21,8 +21,8 @@ import ( "errors" "fmt" "sync" - "time" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -30,17 +30,18 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" podlisterv1 "k8s.io/client-go/listers/core/v1" - "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/podprovider" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" ) type OverReserve struct { + lh logr.Logger client ctrlclient.Client lock sync.Mutex nrts *nrtStore @@ -54,12 +55,12 @@ type OverReserve struct { isPodRelevant podprovider.PodFilterFunc } -func NewOverReserve(cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient.Client, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (*OverReserve, error) { +func NewOverReserve(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient.Client, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (*OverReserve, error) { if client == nil || podLister == nil { - return nil, fmt.Errorf("nrtcache: received nil references") + return nil, fmt.Errorf("received nil references") } - resyncMethod := getCacheResyncMethod(cfg) + resyncMethod := getCacheResyncMethod(lh, cfg) nrtObjs := &topologyv1alpha2.NodeResourceTopologyList{} // TODO: we should pass-in a context in the future @@ -67,10 +68,11 @@ func NewOverReserve(cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient. return nil, err } - klog.V(3).InfoS("nrtcache: initializing", "objects", len(nrtObjs.Items), "method", resyncMethod) + lh.V(3).Info("initializing", "noderesourcetopologies", len(nrtObjs.Items), "method", resyncMethod) obj := &OverReserve{ + lh: lh, client: client, - nrts: newNrtStore(nrtObjs.Items), + nrts: newNrtStore(lh, nrtObjs.Items), assumedResources: make(map[string]*resourceStore), nodesMaybeOverreserved: newCounter(), nodesWithForeignPods: newCounter(), @@ -97,10 +99,13 @@ func (ov *OverReserve) GetCachedNRTCopy(ctx context.Context, nodeName string, po return nrt, true } - klog.V(6).InfoS("nrtcache NRT", "logID", klog.KObj(pod), "vanilla", stringify.NodeResourceTopologyResources(nrt)) - nodeAssumedResources.UpdateNRT(klog.KObj(pod).String(), nrt) + logID := logging.PodLogID(pod) + lh := ov.lh.WithValues("logID", logID, "podUID", pod.GetUID(), "node", nodeName) - klog.V(5).InfoS("nrtcache NRT", "logID", klog.KObj(pod), "updated", stringify.NodeResourceTopologyResources(nrt)) + lh.V(6).Info("NRT", "fromcache", stringify.NodeResourceTopologyResources(nrt)) + nodeAssumedResources.UpdateNRT(logID, nrt) + + lh.V(5).Info("NRT", "withassumed", stringify.NodeResourceTopologyResources(nrt)) return nrt, true } @@ -108,49 +113,52 @@ func (ov *OverReserve) NodeMaybeOverReserved(nodeName string, pod *corev1.Pod) { ov.lock.Lock() defer ov.lock.Unlock() val := ov.nodesMaybeOverreserved.Incr(nodeName) - klog.V(4).InfoS("nrtcache: mark discarded", "logID", klog.KObj(pod), "node", nodeName, "count", val) + ov.lh.V(4).Info("mark discarded", "node", nodeName, "count", val) } func (ov *OverReserve) NodeHasForeignPods(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() if !ov.nrts.Contains(nodeName) { - klog.V(5).InfoS("nrtcache: ignoring foreign pods", "logID", klog.KObj(pod), "node", nodeName, "nrtinfo", "missing") + lh.V(5).Info("ignoring foreign pods", "nrtinfo", "missing") return } val := ov.nodesWithForeignPods.Incr(nodeName) - klog.V(4).InfoS("nrtcache: marked with foreign pods", "logID", klog.KObj(pod), "node", nodeName, "count", val) + lh.V(4).Info("marked with foreign pods", "count", val) } func (ov *OverReserve) ReserveNodeResources(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() nodeAssumedResources, ok := ov.assumedResources[nodeName] if !ok { - nodeAssumedResources = newResourceStore() + nodeAssumedResources = newResourceStore(ov.lh) ov.assumedResources[nodeName] = nodeAssumedResources } nodeAssumedResources.AddPod(pod) - klog.V(5).InfoS("nrtcache post reserve", "logID", klog.KObj(pod), "node", nodeName, "assumedResources", nodeAssumedResources.String()) + lh.V(5).Info("post reserve", "assumedResources", nodeAssumedResources.String()) ov.nodesMaybeOverreserved.Delete(nodeName) - klog.V(6).InfoS("nrtcache: reset discard counter", "logID", klog.KObj(pod), "node", nodeName) + lh.V(6).Info("reset discard counter") } func (ov *OverReserve) UnreserveNodeResources(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() nodeAssumedResources, ok := ov.assumedResources[nodeName] if !ok { // this should not happen, so we're vocal about it // we don't return error because not much to do to recover anyway - klog.V(3).InfoS("nrtcache: no resources tracked", "logID", klog.KObj(pod), "node", nodeName) + lh.V(3).Info("no resources tracked") return } nodeAssumedResources.DeletePod(pod) - klog.V(5).InfoS("nrtcache post release", "logID", klog.KObj(pod), "node", nodeName, "assumedResources", nodeAssumedResources.String()) + lh.V(5).Info("post release", "assumedResources", nodeAssumedResources.String()) } // NodesMaybeOverReserved returns a slice of all the node names which have been discarded previously, @@ -160,7 +168,7 @@ func (ov *OverReserve) UnreserveNodeResources(nodeName string, pod *corev1.Pod) // 2. it was pessimistically overallocated, so the node is a candidate for resync // This function enables the caller to know the slice of nodes should be considered for resync, // avoiding the need to rescan the full node list. -func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { +func (ov *OverReserve) NodesMaybeOverReserved(lh logr.Logger) []string { ov.lock.Lock() defer ov.lock.Unlock() // this is intentionally aggressive. We don't yet make any attempt to find out if the @@ -177,7 +185,7 @@ func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { } if nodes.Len() > 0 { - klog.V(4).InfoS("nrtcache: found dirty nodes", "logID", logID, "foreign", foreignCount, "discarded", nodes.Len()-foreignCount, "total", nodes.Len()) + lh.V(4).Info("found dirty nodes", "foreign", foreignCount, "discarded", nodes.Len()-foreignCount, "total", nodes.Len()) } return nodes.Keys() } @@ -192,77 +200,81 @@ func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { // too aggressive resync attempts, so to more, likely unnecessary, computation work on the scheduler side. func (ov *OverReserve) Resync() { // we are not working with a specific pod, so we need a unique key to track this flow - logID := logIDFromTime() + lh := ov.lh.WithValues("logID", logging.TimeLogID(), "flow", logging.FlowCacheSync) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) - nodeNames := ov.NodesMaybeOverReserved(logID) + nodeNames := ov.NodesMaybeOverReserved(lh) // avoid as much as we can unnecessary work and logs. if len(nodeNames) == 0 { - klog.V(6).InfoS("nrtcache: resync: no dirty nodes detected") + lh.V(6).Info("no dirty nodes detected") return } // node -> pod identifier (namespace, name) - nodeToObjsMap, err := makeNodeToPodDataMap(ov.podLister, ov.isPodRelevant, logID) + nodeToObjsMap, err := makeNodeToPodDataMap(lh, ov.podLister, ov.isPodRelevant) if err != nil { - klog.ErrorS(err, "cannot find the mapping between running pods and nodes") + lh.Error(err, "cannot find the mapping between running pods and nodes") return } - klog.V(6).InfoS("nrtcache: resync NodeTopology cache starting", "logID", logID) - defer klog.V(6).InfoS("nrtcache: resync NodeTopology cache complete", "logID", logID) + lh.V(6).Info("resync NodeTopology cache starting") + defer lh.V(6).Info("resync NodeTopology cache complete") var nrtUpdates []*topologyv1alpha2.NodeResourceTopology for _, nodeName := range nodeNames { + lh = lh.WithValues("node", nodeName) + nrtCandidate := &topologyv1alpha2.NodeResourceTopology{} if err := ov.client.Get(context.Background(), types.NamespacedName{Name: nodeName}, nrtCandidate); err != nil { - klog.V(3).InfoS("nrtcache: failed to get NodeTopology", "logID", logID, "node", nodeName, "error", err) + lh.V(3).Info("failed to get NodeTopology", "error", err) continue } if nrtCandidate == nil { - klog.V(3).InfoS("nrtcache: missing NodeTopology", "logID", logID, "node", nodeName) + lh.V(3).Info("missing NodeTopology") continue } objs, ok := nodeToObjsMap[nodeName] if !ok { // this really should never happen - klog.V(3).InfoS("nrtcache: cannot find any pod for node", "logID", logID, "node", nodeName) + lh.V(3).Info("cannot find any pod for node") continue } pfpExpected, onlyExclRes := podFingerprintForNodeTopology(nrtCandidate, ov.resyncMethod) if pfpExpected == "" { - klog.V(3).InfoS("nrtcache: missing NodeTopology podset fingerprint data", "logID", logID, "node", nodeName) + lh.V(3).Info("missing NodeTopology podset fingerprint data") continue } - klog.V(6).InfoS("nrtcache: trying to resync NodeTopology", "logID", logID, "node", nodeName, "fingerprint", pfpExpected, "onlyExclusiveResources", onlyExclRes) + lh.V(6).Info("trying to sync NodeTopology", "fingerprint", pfpExpected, "onlyExclusiveResources", onlyExclRes) - err = checkPodFingerprintForNode(logID, objs, nodeName, pfpExpected, onlyExclRes) + err = checkPodFingerprintForNode(lh, objs, nodeName, pfpExpected, onlyExclRes) if errors.Is(err, podfingerprint.ErrSignatureMismatch) { // can happen, not critical - klog.V(5).InfoS("nrtcache: NodeTopology podset fingerprint mismatch", "logID", logID, "node", nodeName) + lh.V(5).Info("NodeTopology podset fingerprint mismatch") continue } if err != nil { // should never happen, let's be vocal - klog.V(3).ErrorS(err, "nrtcache: checking NodeTopology podset fingerprint", "logID", logID, "node", nodeName) + lh.V(3).Error(err, "checking NodeTopology podset fingerprint") continue } - klog.V(4).InfoS("nrtcache: overriding cached info", "logID", logID, "node", nodeName) + lh.V(4).Info("overriding cached info") nrtUpdates = append(nrtUpdates, nrtCandidate) } - ov.FlushNodes(logID, nrtUpdates...) + ov.FlushNodes(lh, nrtUpdates...) } // FlushNodes drops all the cached information about a given node, resetting its state clean. -func (ov *OverReserve) FlushNodes(logID string, nrts ...*topologyv1alpha2.NodeResourceTopology) { +func (ov *OverReserve) FlushNodes(lh logr.Logger, nrts ...*topologyv1alpha2.NodeResourceTopology) { ov.lock.Lock() defer ov.lock.Unlock() for _, nrt := range nrts { - klog.V(4).InfoS("nrtcache: flushing", "logID", logID, "node", nrt.Name) + lh.V(4).Info("flushing", "node", nrt.Name) ov.nrts.Update(nrt) delete(ov.assumedResources, nrt.Name) ov.nodesMaybeOverreserved.Delete(nrt.Name) @@ -275,14 +287,14 @@ func (ov *OverReserve) Store() *nrtStore { return ov.nrts } -func makeNodeToPodDataMap(podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc, logID string) (map[string][]podData, error) { +func makeNodeToPodDataMap(lh logr.Logger, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (map[string][]podData, error) { nodeToObjsMap := make(map[string][]podData) pods, err := podLister.List(labels.Everything()) if err != nil { return nodeToObjsMap, err } for _, pod := range pods { - if !isPodRelevant(pod, logID) { + if !isPodRelevant(lh, pod) { continue } nodeObjs := nodeToObjsMap[pod.Spec.NodeName] @@ -296,17 +308,13 @@ func makeNodeToPodDataMap(podLister podlisterv1.PodLister, isPodRelevant podprov return nodeToObjsMap, nil } -func logIDFromTime() string { - return fmt.Sprintf("resync%v", time.Now().UnixMilli()) -} - -func getCacheResyncMethod(cfg *apiconfig.NodeResourceTopologyCache) apiconfig.CacheResyncMethod { +func getCacheResyncMethod(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache) apiconfig.CacheResyncMethod { var resyncMethod apiconfig.CacheResyncMethod if cfg != nil && cfg.ResyncMethod != nil { resyncMethod = *cfg.ResyncMethod } else { // explicitly set to nil? resyncMethod = apiconfig.CacheResyncAutodetect - klog.InfoS("cache resync method missing", "fallback", resyncMethod) + lh.Info("cache resync method missing", "fallback", resyncMethod) } return resyncMethod } diff --git a/pkg/noderesourcetopology/cache/overreserve_test.go b/pkg/noderesourcetopology/cache/overreserve_test.go index ec48e2bae..55ffa2348 100644 --- a/pkg/noderesourcetopology/cache/overreserve_test.go +++ b/pkg/noderesourcetopology/cache/overreserve_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -89,7 +90,7 @@ func TestGetCacheResyncMethod(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - got := getCacheResyncMethod(testCase.cfg) + got := getCacheResyncMethod(klog.Background(), testCase.cfg) if got != testCase.expected { t.Errorf("cache resync method got %v expected %v", got, testCase.expected) } @@ -104,12 +105,12 @@ func TestInitEmptyLister(t *testing.T) { fakePodLister := &fakePodLister{} - _, err = NewOverReserve(nil, nil, fakePodLister, podprovider.IsPodRelevantAlways) + _, err = NewOverReserve(klog.Background(), nil, nil, fakePodLister, podprovider.IsPodRelevantAlways) if err == nil { t.Fatalf("accepted nil lister") } - _, err = NewOverReserve(nil, fakeClient, nil, podprovider.IsPodRelevantAlways) + _, err = NewOverReserve(klog.Background(), nil, fakeClient, nil, podprovider.IsPodRelevantAlways) if err == nil { t.Fatalf("accepted nil indexer") } @@ -124,7 +125,7 @@ func TestNodesMaybeOverReservedCount(t *testing.T) { fakePodLister := &fakePodLister{} nrtCache := mustOverReserve(t, fakeClient, fakePodLister) - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -149,7 +150,7 @@ func TestDirtyNodesMarkDiscarded(t *testing.T) { nrtCache.ReserveNodeResources(nodeName, &corev1.Pod{}) } - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -158,7 +159,7 @@ func TestDirtyNodesMarkDiscarded(t *testing.T) { nrtCache.NodeMaybeOverReserved(nodeName, &corev1.Pod{}) } - dirtyNodes = nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes = nrtCache.NodesMaybeOverReserved(klog.Background()) sort.Strings(dirtyNodes) if !reflect.DeepEqual(dirtyNodes, expectedNodes) { @@ -185,7 +186,7 @@ func TestDirtyNodesUnmarkedOnReserve(t *testing.T) { nrtCache.ReserveNodeResources(nodeName, &corev1.Pod{}) } - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -201,7 +202,7 @@ func TestDirtyNodesUnmarkedOnReserve(t *testing.T) { "node-1", } - dirtyNodes = nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes = nrtCache.NodesMaybeOverReserved(klog.Background()) if !reflect.DeepEqual(dirtyNodes, expectedNodes) { t.Errorf("got=%v expected=%v", dirtyNodes, expectedNodes) @@ -228,7 +229,7 @@ func TestOverreserveGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, podLister podlisterv1.PodLister) (Interface, error) { - return NewOverReserve(nil, client, podLister, podprovider.IsPodRelevantAlways) + return NewOverReserve(klog.Background(), nil, client, podLister, podprovider.IsPodRelevantAlways) }, testCases..., ) @@ -403,8 +404,6 @@ func TestFlush(t *testing.T) { }, } - logID := "testFlush" - nrtCache.ReserveNodeResources("node1", testPod) nrtCache.NodeMaybeOverReserved("node1", testPod) @@ -433,9 +432,11 @@ func TestFlush(t *testing.T) { }, } - nrtCache.FlushNodes(logID, expectedNodeTopology.DeepCopy()) + lh := klog.Background() + + nrtCache.FlushNodes(lh, expectedNodeTopology.DeepCopy()) - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(lh) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes after flush: %v", dirtyNodes) } @@ -517,7 +518,7 @@ func TestResyncNoPodFingerprint(t *testing.T) { nrtCache.Resync() - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 1 || dirtyNodes[0] != "node1" { t.Errorf("cleaned nodes after resyncing with bad data: %v", dirtyNodes) @@ -611,7 +612,7 @@ func TestResyncMatchFingerprint(t *testing.T) { nrtCache.Resync() - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) > 0 { t.Errorf("node still dirty after resyncing with good data: %v", dirtyNodes) } @@ -641,7 +642,7 @@ func TestUnknownNodeWithForeignPods(t *testing.T) { nrtCache.NodeHasForeignPods("node-bogus", &corev1.Pod{}) - names := nrtCache.NodesMaybeOverReserved("testing") + names := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(names) != 0 { t.Errorf("non-existent node has foreign pods!") } @@ -714,7 +715,7 @@ func TestNodeWithForeignPods(t *testing.T) { target := "node2" nrtCache.NodeHasForeignPods(target, &corev1.Pod{}) - names := nrtCache.NodesMaybeOverReserved("testing") + names := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(names) != 1 || names[0] != target { t.Errorf("unexpected dirty nodes: %v", names) } @@ -726,7 +727,7 @@ func TestNodeWithForeignPods(t *testing.T) { } func mustOverReserve(t *testing.T, client ctrlclient.Client, podLister podlisterv1.PodLister) *OverReserve { - obj, err := NewOverReserve(nil, client, podLister, podprovider.IsPodRelevantAlways) + obj, err := NewOverReserve(klog.Background(), nil, client, podLister, podprovider.IsPodRelevantAlways) if err != nil { t.Fatalf("unexpected error creating cache: %v", err) } @@ -960,7 +961,7 @@ func TestMakeNodeToPodDataMap(t *testing.T) { pods: tcase.pods, err: tcase.err, } - got, err := makeNodeToPodDataMap(podLister, tcase.isPodRelevant, tcase.description) + got, err := makeNodeToPodDataMap(klog.Background(), podLister, tcase.isPodRelevant) if err != tcase.expectedErr { t.Errorf("error mismatch: got %v expected %v", err, tcase.expectedErr) } diff --git a/pkg/noderesourcetopology/cache/passthrough.go b/pkg/noderesourcetopology/cache/passthrough.go index 54e3e5ba9..ac2107613 100644 --- a/pkg/noderesourcetopology/cache/passthrough.go +++ b/pkg/noderesourcetopology/cache/passthrough.go @@ -19,30 +19,32 @@ package cache import ( "context" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) type Passthrough struct { client ctrlclient.Client + lh logr.Logger } -func NewPassthrough(client ctrlclient.Client) Interface { +func NewPassthrough(lh logr.Logger, client ctrlclient.Client) Interface { return Passthrough{ client: client, + lh: lh, } } func (pt Passthrough) GetCachedNRTCopy(ctx context.Context, nodeName string, _ *corev1.Pod) (*topologyv1alpha2.NodeResourceTopology, bool) { - klog.V(5).InfoS("Lister for nodeResTopoPlugin") + pt.lh.V(5).Info("lister for NRT plugin") nrt := &topologyv1alpha2.NodeResourceTopology{} if err := pt.client.Get(ctx, types.NamespacedName{Name: nodeName}, nrt); err != nil { - klog.V(5).ErrorS(err, "Cannot get NodeTopologies from NodeResourceTopologyLister") + pt.lh.V(5).Error(err, "cannot get nrts from lister") return nil, true } return nrt, true diff --git a/pkg/noderesourcetopology/cache/passthrough_test.go b/pkg/noderesourcetopology/cache/passthrough_test.go index 075486d64..03552c07e 100644 --- a/pkg/noderesourcetopology/cache/passthrough_test.go +++ b/pkg/noderesourcetopology/cache/passthrough_test.go @@ -21,6 +21,7 @@ import ( topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -44,7 +45,7 @@ func TestPassthroughGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, _ podlisterv1.PodLister) (Interface, error) { - return NewPassthrough(client), nil + return NewPassthrough(klog.Background(), client), nil }, testCases..., ) diff --git a/pkg/noderesourcetopology/cache/store.go b/pkg/noderesourcetopology/cache/store.go index 8ff78b321..cbfb986ec 100644 --- a/pkg/noderesourcetopology/cache/store.go +++ b/pkg/noderesourcetopology/cache/store.go @@ -21,8 +21,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/klog/v2" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" topologyv1alpha2attr "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/attribute" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -36,17 +36,19 @@ import ( // data is intentionally copied each time it enters and exists the store. E.g, no pointer sharing. type nrtStore struct { data map[string]*topologyv1alpha2.NodeResourceTopology + lh logr.Logger } // newNrtStore creates a new nrtStore and initializes it with copies of the provided Node Resource Topology data. -func newNrtStore(nrts []topologyv1alpha2.NodeResourceTopology) *nrtStore { +func newNrtStore(lh logr.Logger, nrts []topologyv1alpha2.NodeResourceTopology) *nrtStore { data := make(map[string]*topologyv1alpha2.NodeResourceTopology, len(nrts)) for _, nrt := range nrts { data[nrt.Name] = nrt.DeepCopy() } - klog.V(6).InfoS("nrtcache: initialized nrtStore", "objects", len(data)) + lh.V(6).Info("initialized nrtStore", "objects", len(data)) return &nrtStore{ data: data, + lh: lh, } } @@ -60,7 +62,7 @@ func (nrs nrtStore) Contains(nodeName string) bool { func (nrs *nrtStore) GetNRTCopyByNodeName(nodeName string) *topologyv1alpha2.NodeResourceTopology { obj, ok := nrs.data[nodeName] if !ok { - klog.V(3).InfoS("nrtcache: missing cached NodeTopology", "node", nodeName) + nrs.lh.V(3).Info("missing cached NodeTopology", "node", nodeName) return nil } return obj.DeepCopy() @@ -69,18 +71,20 @@ func (nrs *nrtStore) GetNRTCopyByNodeName(nodeName string) *topologyv1alpha2.Nod // Update adds or replace the Node Resource Topology associated to a node. Always do a copy. func (nrs *nrtStore) Update(nrt *topologyv1alpha2.NodeResourceTopology) { nrs.data[nrt.Name] = nrt.DeepCopy() - klog.V(5).InfoS("nrtcache: updated cached NodeTopology", "node", nrt.Name) + nrs.lh.V(5).Info("updated cached NodeTopology", "node", nrt.Name) } // resourceStore maps the resource requested by pod by pod namespaed name. It is not thread safe and needs to be protected by a lock. type resourceStore struct { // key: namespace + "/" name data map[string]corev1.ResourceList + lh logr.Logger } -func newResourceStore() *resourceStore { +func newResourceStore(lh logr.Logger) *resourceStore { return &resourceStore{ data: make(map[string]corev1.ResourceList), + lh: lh, } } @@ -94,27 +98,27 @@ func (rs *resourceStore) String() string { // AddPod returns true if updating existing pod, false if adding for the first time func (rs *resourceStore) AddPod(pod *corev1.Pod) bool { - key := pod.Namespace + "/" + pod.Name // this is also a valid logID + key := pod.Namespace + "/" + pod.Name _, ok := rs.data[key] if ok { // should not happen, so we log with a low level - klog.V(4).InfoS("updating existing entry", "key", key) + rs.lh.V(4).Info("updating existing entry", "key", key) } resData := util.GetPodEffectiveRequest(pod) - klog.V(5).InfoS("nrtcache: resourcestore ADD", stringify.ResourceListToLoggable(key, resData)...) + rs.lh.V(5).Info("resourcestore ADD", stringify.ResourceListToLoggable(resData)...) rs.data[key] = resData return ok } // DeletePod returns true if deleted an existing pod, false otherwise func (rs *resourceStore) DeletePod(pod *corev1.Pod) bool { - key := pod.Namespace + "/" + pod.Name // this is also a valid logID + key := pod.Namespace + "/" + pod.Name _, ok := rs.data[key] if ok { // should not happen, so we log with a low level - klog.V(4).InfoS("removing missing entry", "key", key) + rs.lh.V(4).Info("removing missing entry", "key", key) } - klog.V(5).InfoS("nrtcache: resourcestore DEL", stringify.ResourceListToLoggable(key, rs.data[key])...) + rs.lh.V(5).Info("resourcestore DEL", stringify.ResourceListToLoggable(rs.data[key])...) delete(rs.data, key) return ok } @@ -142,7 +146,7 @@ func (rs *resourceStore) UpdateNRT(logID string, nrt *topologyv1alpha2.NodeResou if zr.Available.Cmp(qty) < 0 { // this should happen rarely, and it is likely caused by // a bug elsewhere. - klog.V(3).InfoS("nrtcache: cannot decrement resource", "logID", logID, "zone", zr.Name, "node", nrt.Name, "available", zr.Available, "requestor", key, "quantity", qty) + rs.lh.V(3).Info("cannot decrement resource", "logID", logID, "zone", zr.Name, "node", nrt.Name, "available", zr.Available, "requestor", key, "quantity", qty.String()) zr.Available = resource.Quantity{} continue } @@ -224,7 +228,7 @@ type podData struct { // checkPodFingerprintForNode verifies if the given pods fingeprint (usually from NRT update) matches the // computed one using the stored data about pods running on nodes. Returns nil on success, or an error // describing the failure -func checkPodFingerprintForNode(logID string, objs []podData, nodeName, pfpExpected string, onlyExclRes bool) error { +func checkPodFingerprintForNode(lh logr.Logger, objs []podData, nodeName, pfpExpected string, onlyExclRes bool) error { st := podfingerprint.MakeStatus(nodeName) pfp := podfingerprint.NewTracingFingerprint(len(objs), &st) for _, obj := range objs { @@ -235,8 +239,8 @@ func checkPodFingerprintForNode(logID string, objs []podData, nodeName, pfpExpec } pfpComputed := pfp.Sign() - klog.V(5).InfoS("nrtcache: podset fingerprint check", "logID", logID, "node", nodeName, "expected", pfpExpected, "computed", pfpComputed, "onlyExclusiveResources", onlyExclRes) - klog.V(6).InfoS("nrtcache: podset fingerprint debug", "logID", logID, "node", nodeName, "status", st.Repr()) + lh.V(5).Info("podset fingerprint check", "expected", pfpExpected, "computed", pfpComputed, "onlyExclusiveResources", onlyExclRes) + lh.V(6).Info("podset fingerprint debug", "status", st.Repr()) err := pfp.Check(pfpExpected) podfingerprint.MarkCompleted(st) diff --git a/pkg/noderesourcetopology/cache/store_test.go b/pkg/noderesourcetopology/cache/store_test.go index 30871d450..c7daa08f4 100644 --- a/pkg/noderesourcetopology/cache/store_test.go +++ b/pkg/noderesourcetopology/cache/store_test.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -202,7 +203,7 @@ func TestNRTStoreGet(t *testing.T) { }, }, } - ns := newNrtStore(nrts) + ns := newNrtStore(klog.Background(), nrts) obj := ns.GetNRTCopyByNodeName("node-0") obj.TopologyPolicies[0] = "single-numa-node" @@ -238,7 +239,7 @@ func TestNRTStoreUpdate(t *testing.T) { }, }, } - ns := newNrtStore(nrts) + ns := newNrtStore(klog.Background(), nrts) nrt3 := &topologyv1alpha2.NodeResourceTopology{ ObjectMeta: metav1.ObjectMeta{ @@ -258,14 +259,14 @@ func TestNRTStoreUpdate(t *testing.T) { } func TestNRTStoreGetMissing(t *testing.T) { - ns := newNrtStore(nil) + ns := newNrtStore(klog.Background(), nil) if ns.GetNRTCopyByNodeName("node-missing") != nil { t.Errorf("missing node returned non-nil data") } } func TestNRTStoreContains(t *testing.T) { - ns := newNrtStore(nil) + ns := newNrtStore(klog.Background(), nil) if ns.Contains("node-0") { t.Errorf("unexpected node found") } @@ -288,7 +289,7 @@ func TestNRTStoreContains(t *testing.T) { }, }, } - ns = newNrtStore(nrts) + ns = newNrtStore(klog.Background(), nrts) if !ns.Contains("node-0") { t.Errorf("missing node") } @@ -409,7 +410,7 @@ func TestResourceStoreAddPod(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.AddPod(&pod) if existed { t.Fatalf("replaced a pod into a empty resourceStore") @@ -441,7 +442,7 @@ func TestResourceStoreDeletePod(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.DeletePod(&pod) if existed { t.Fatalf("deleted a pod into a empty resourceStore") @@ -508,7 +509,7 @@ func TestResourceStoreUpdate(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.AddPod(&pod) if existed { t.Fatalf("replacing a pod into a empty resourceStore") @@ -581,7 +582,7 @@ func TestCheckPodFingerprintForNode(t *testing.T) { for _, tcase := range tcases { t.Run(tcase.description, func(t *testing.T) { - gotErr := checkPodFingerprintForNode("testing", tcase.objs, "test-node", tcase.pfp, tcase.onlyExclRes) + gotErr := checkPodFingerprintForNode(klog.Background(), tcase.objs, "test-node", tcase.pfp, tcase.onlyExclRes) if !errors.Is(gotErr, tcase.expectedErr) { t.Errorf("got error %v expected %v", gotErr, tcase.expectedErr) } diff --git a/pkg/noderesourcetopology/config.go b/pkg/noderesourcetopology/config.go index 9b58d34bf..3cd18df56 100644 --- a/pkg/noderesourcetopology/config.go +++ b/pkg/noderesourcetopology/config.go @@ -17,9 +17,9 @@ limitations under the License. package noderesourcetopology import ( - "k8s.io/klog/v2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" ) @@ -57,10 +57,10 @@ func makeTopologyManagerConfigDefaults() TopologyManagerConfig { } } -func topologyManagerConfigFromNodeResourceTopology(nodeTopology *topologyv1alpha2.NodeResourceTopology) TopologyManagerConfig { +func topologyManagerConfigFromNodeResourceTopology(lh logr.Logger, nodeTopology *topologyv1alpha2.NodeResourceTopology) TopologyManagerConfig { conf := makeTopologyManagerConfigDefaults() // Backward compatibility (v1alpha2 and previous). Deprecated, will be removed when the NRT API moves to v1beta1. - updateTopologyManagerConfigFromTopologyPolicies(&conf, nodeTopology.Name, nodeTopology.TopologyPolicies) + updateTopologyManagerConfigFromTopologyPolicies(lh, &conf, nodeTopology.Name, nodeTopology.TopologyPolicies) // preferred new configuration source (v1alpha2 and onwards) updateTopologyManagerConfigFromAttributes(&conf, nodeTopology.Attributes) return conf @@ -80,18 +80,17 @@ func updateTopologyManagerConfigFromAttributes(conf *TopologyManagerConfig, attr } } -func updateTopologyManagerConfigFromTopologyPolicies(conf *TopologyManagerConfig, nodeName string, topologyPolicies []string) { +func updateTopologyManagerConfigFromTopologyPolicies(lh logr.Logger, conf *TopologyManagerConfig, nodeName string, topologyPolicies []string) { if len(topologyPolicies) == 0 { - klog.V(3).InfoS("Cannot determine policy", "node", nodeName) return } if len(topologyPolicies) > 1 { - klog.V(4).InfoS("Ignoring extra policies", "node", nodeName, "policies count", len(topologyPolicies)-1) + lh.V(4).Info("ignoring extra policies", "node", nodeName, "policies count", len(topologyPolicies)-1) } policyName := topologyv1alpha2.TopologyManagerPolicy(topologyPolicies[0]) - klog.Warning("The `topologyPolicies` field is deprecated and will be removed with the NRT API v1beta1.") - klog.Warning("The `topologyPolicies` field is deprecated, please use top-level Attributes field instead.") + lh.Info("the `topologyPolicies` field is deprecated and will be removed with the NRT API v1beta1.") + lh.Info("the `topologyPolicies` field is deprecated, please use top-level Attributes field instead.") switch policyName { case topologyv1alpha2.SingleNUMANodePodLevel: diff --git a/pkg/noderesourcetopology/config_test.go b/pkg/noderesourcetopology/config_test.go index a84ae7027..954086698 100644 --- a/pkg/noderesourcetopology/config_test.go +++ b/pkg/noderesourcetopology/config_test.go @@ -20,6 +20,7 @@ import ( "reflect" "testing" + "k8s.io/klog/v2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" @@ -299,7 +300,7 @@ func TestConfigFromPolicies(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := TopologyManagerConfig{} - updateTopologyManagerConfigFromTopologyPolicies(&got, "", tt.policies) + updateTopologyManagerConfigFromTopologyPolicies(klog.Background(), &got, "", tt.policies) if !reflect.DeepEqual(got, tt.expected) { t.Errorf("conf got=%+#v expected=%+#v", got, tt.expected) } @@ -402,7 +403,7 @@ func TestConfigFromNRT(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := topologyManagerConfigFromNodeResourceTopology(&tt.nrt) + got := topologyManagerConfigFromNodeResourceTopology(klog.Background(), &tt.nrt) if !reflect.DeepEqual(got, tt.expected) { t.Errorf("conf got=%+#v expected=%+#v", got, tt.expected) } diff --git a/pkg/noderesourcetopology/filter.go b/pkg/noderesourcetopology/filter.go index 61cde522e..1052608d4 100644 --- a/pkg/noderesourcetopology/filter.go +++ b/pkg/noderesourcetopology/filter.go @@ -18,18 +18,18 @@ package noderesourcetopology import ( "context" - "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/klog/v2" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" bm "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" "sigs.k8s.io/scheduler-plugins/pkg/util" @@ -41,66 +41,63 @@ const highestNUMAID = 8 type PolicyHandler func(pod *v1.Pod, zoneMap topologyv1alpha2.ZoneList) *framework.Status -func singleNUMAContainerLevelHandler(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { - klog.V(5).InfoS("Single NUMA node handler") +func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { + lh.V(5).Info("container level single NUMA node handler") // prepare NUMANodes list from zoneMap - nodes := createNUMANodeList(zones) + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) // Node() != nil already verified in Filter(), which is the only public entry point - logNumaNodes("container handler NUMA resources", nodeInfo.Node().Name, nodes) + logNumaNodes(lh, "container handler NUMA resources", nodeInfo.Node().Name, nodes) // the init containers are running SERIALLY and BEFORE the normal containers. // https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#understanding-init-containers // therefore, we don't need to accumulate their resources together for _, initContainer := range pod.Spec.InitContainers { - logID := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, initContainer.Name) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, initContainer.Resources.Requests)...) + lh.V(6).Info("init container desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...) - _, match := resourcesAvailableInAnyNUMANodes(logID, nodes, initContainer.Resources.Requests, qos, nodeInfo) + _, match := resourcesAvailableInAnyNUMANodes(lh, nodes, initContainer.Resources.Requests, qos, nodeInfo) if !match { // we can't align init container, so definitely we can't align a pod - klog.V(2).InfoS("cannot align container", "name", initContainer.Name, "kind", "init") + lh.V(2).Info("cannot align container", "name", initContainer.Name, "kind", "init") return framework.NewStatus(framework.Unschedulable, "cannot align init container") } } for _, container := range pod.Spec.Containers { - logID := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, container.Resources.Requests)...) + // TODO: add containerName + lh.V(6).Info("app container resources", stringify.ResourceListToLoggable(container.Resources.Requests)...) - numaID, match := resourcesAvailableInAnyNUMANodes(logID, nodes, container.Resources.Requests, qos, nodeInfo) + numaID, match := resourcesAvailableInAnyNUMANodes(lh, nodes, container.Resources.Requests, qos, nodeInfo) if !match { // we can't align container, so definitely we can't align a pod - klog.V(2).InfoS("cannot align container", "name", container.Name, "kind", "app") + lh.V(2).Info("cannot align container", "name", container.Name, "kind", "app") return framework.NewStatus(framework.Unschedulable, "cannot align container") } // subtract the resources requested by the container from the given NUMA. // this is necessary, so we won't allocate the same resources for the upcoming containers - subtractFromNUMA(nodes, numaID, container) + subtractFromNUMA(lh, nodes, numaID, container) } return nil } // resourcesAvailableInAnyNUMANodes checks for sufficient resource and return the NUMAID that would be selected by Kubelet. // this function requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63 -func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, resources v1.ResourceList, qos v1.PodQOSClass, nodeInfo *framework.NodeInfo) (int, bool) { +func resourcesAvailableInAnyNUMANodes(lh logr.Logger, numaNodes NUMANodeList, resources v1.ResourceList, qos v1.PodQOSClass, nodeInfo *framework.NodeInfo) (int, bool) { numaID := highestNUMAID bitmask := bm.NewEmptyBitMask() // set all bits, each bit is a NUMA node, if resources couldn't be aligned // on the NUMA node, bit should be unset bitmask.Fill() - // Node() != nil already verified in Filter(), which is the only public entry point - nodeName := nodeInfo.Node().Name nodeResources := util.ResourceList(nodeInfo.Allocatable) for resource, quantity := range resources { if quantity.IsZero() { // why bother? everything's fine from the perspective of this resource - klog.V(4).InfoS("ignoring zero-qty resource request", "logID", logID, "node", nodeName, "resource", resource) + lh.V(4).Info("ignoring zero-qty resource request", "resource", resource) continue } @@ -108,7 +105,7 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso // some resources may not expose NUMA affinity (device plugins, extended resources), but all resources // must be reported at node level; thus, if they are not present at node level, we can safely assume // we don't have the resource at all. - klog.V(5).InfoS("early verdict: cannot meet request", "logID", logID, "node", nodeName, "resource", resource, "suitable", "false") + lh.V(5).Info("early verdict: cannot meet request", "resource", resource, "suitable", "false") return numaID, false } @@ -128,19 +125,19 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso } resourceBitmask.Add(numaNode.NUMAID) - klog.V(6).InfoS("feasible", "logID", logID, "node", nodeName, "NUMA", numaNode.NUMAID, "resource", resource) + lh.V(6).Info("feasible", "numaCell", numaNode.NUMAID, "resource", resource) } // non-native resources or ephemeral-storage may not expose NUMA affinity, // but since they are available at node level, this is fine if !hasNUMAAffinity && (!v1helper.IsNativeResource(resource) || resource == v1.ResourceEphemeralStorage) { - klog.V(6).InfoS("resource available at node level (no NUMA affinity)", "logID", logID, "node", nodeName, "resource", resource) + lh.V(6).Info("resource available at node level (no NUMA affinity)", "resource", resource) continue } bitmask.And(resourceBitmask) if bitmask.IsEmpty() { - klog.V(5).InfoS("early verdict", "logID", logID, "node", nodeName, "resource", resource, "suitable", "false") + lh.V(5).Info("early verdict", "resource", resource, "suitable", "false") return numaID, false } } @@ -152,7 +149,7 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso // at least one NUMA node is available ret := !bitmask.IsEmpty() - klog.V(5).InfoS("final verdict", "logID", logID, "node", nodeName, "suitable", ret) + lh.V(5).Info("final verdict", "suitable", ret) return numaID, ret } @@ -175,20 +172,19 @@ func isResourceSetSuitable(qos v1.PodQOSClass, resource v1.ResourceName, quantit return numaQuantity.Cmp(quantity) >= 0 } -func singleNUMAPodLevelHandler(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { - klog.V(5).InfoS("Pod Level Resource handler") +func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { + lh.V(5).Info("pod level single NUMA node handler") resources := util.GetPodEffectiveRequest(pod) - logID := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) - nodes := createNUMANodeList(zones) + nodes := createNUMANodeList(lh, zones) // Node() != nil already verified in Filter(), which is the only public entry point - logNumaNodes("pod handler NUMA resources", nodeInfo.Node().Name, nodes) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, resources)...) + logNumaNodes(lh, "pod handler NUMA resources", nodeInfo.Node().Name, nodes) + lh.V(6).Info("pod desired resources", stringify.ResourceListToLoggable(resources)...) - if _, match := resourcesAvailableInAnyNUMANodes(logID, createNUMANodeList(zones), resources, v1qos.GetPodQOS(pod), nodeInfo); !match { - klog.V(2).InfoS("cannot align pod", "name", pod.Name) + if _, match := resourcesAvailableInAnyNUMANodes(lh, createNUMANodeList(lh, zones), resources, v1qos.GetPodQOS(pod), nodeInfo); !match { + lh.V(2).Info("cannot align pod", "name", pod.Name) return framework.NewStatus(framework.Unschedulable, "cannot align pod") } return nil @@ -204,22 +200,27 @@ func (tm *TopologyMatch) Filter(ctx context.Context, cycleState *framework.Cycle } nodeName := nodeInfo.Node().Name + + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowFilter) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + nodeTopology, ok := tm.nrtCache.GetCachedNRTCopy(ctx, nodeName, pod) if !ok { - klog.V(2).InfoS("invalid topology data", "node", nodeName) + lh.V(2).Info("invalid topology data") return framework.NewStatus(framework.Unschedulable, "invalid node topology data") } if nodeTopology == nil { return nil } - klog.V(5).InfoS("Found NodeResourceTopology", "nodeTopology", klog.KObj(nodeTopology)) + lh.V(5).Info("found nrt data", "object", stringify.NodeResourceTopologyResources(nodeTopology)) - handler := filterHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(nodeTopology)) + handler := filterHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(lh, nodeTopology)) if handler == nil { return nil } - status := handler(pod, nodeTopology.Zones, nodeInfo) + status := handler(lh, pod, nodeTopology.Zones, nodeInfo) if status != nil { tm.nrtCache.NodeMaybeOverReserved(nodeName, pod) } @@ -227,7 +228,7 @@ func (tm *TopologyMatch) Filter(ctx context.Context, cycleState *framework.Cycle } // subtractFromNUMA finds the correct NUMA ID's resources and subtract them from `nodes`. -func subtractFromNUMA(nodes NUMANodeList, numaID int, container v1.Container) { +func subtractFromNUMA(lh logr.Logger, nodes NUMANodeList, numaID int, container v1.Container) { for i := 0; i < len(nodes); i++ { if nodes[i].NUMAID != numaID { continue @@ -241,7 +242,7 @@ func subtractFromNUMA(nodes NUMANodeList, numaID int, container v1.Container) { // when resourcesAvailableInAnyNUMANodes function is passed // but let's log here if such unlikely case will occur if nodeResQuan.Sign() == -1 { - klog.V(4).InfoS("resource quantity should not be a negative value", "resource", resName, "quantity", nodeResQuan.String()) + lh.V(4).Info("resource quantity should not be a negative value", "resource", resName, "quantity", nodeResQuan.String()) } nRes[resName] = nodeResQuan } diff --git a/pkg/noderesourcetopology/filter_test.go b/pkg/noderesourcetopology/filter_test.go index db20bf82c..4a19c6c6a 100644 --- a/pkg/noderesourcetopology/filter_test.go +++ b/pkg/noderesourcetopology/filter_test.go @@ -27,6 +27,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" nrtcache "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/cache" @@ -683,7 +684,7 @@ func TestNodeResourceTopology(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } for _, tt := range tests { @@ -902,7 +903,7 @@ func TestNodeResourceTopologyMultiContainerPodScope(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } nodeInfo := framework.NewNodeInfo() @@ -1164,7 +1165,7 @@ func TestNodeResourceTopologyMultiContainerContainerScope(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } nodeInfo := framework.NewNodeInfo() diff --git a/pkg/noderesourcetopology/least_numa.go b/pkg/noderesourcetopology/least_numa.go index 9cb269b7f..cf14195ae 100644 --- a/pkg/noderesourcetopology/least_numa.go +++ b/pkg/noderesourcetopology/least_numa.go @@ -17,14 +17,12 @@ limitations under the License. package noderesourcetopology import ( - "fmt" - v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "gonum.org/v1/gonum/stat/combin" @@ -36,8 +34,8 @@ const ( maxDistanceValue = 255 ) -func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - nodes := createNUMANodeList(zones) +func leastNUMAContainerScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) maxNUMANodesCount := 0 @@ -49,12 +47,11 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) if onlyNonNUMAResources(nodes, container.Resources.Requests) { continue } - identifier := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, container.Resources.Requests) + numaNodes, isMinAvgDistance := numaNodesRequired(lh, qos, nodes, container.Resources.Requests) // container's resources can't fit onto node, return MinNodeScore for whole pod if numaNodes == nil { // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes - klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier) + lh.Info("cannot calculate how many NUMA nodes are required", "container", container.Name) return framework.MinNodeScore, nil } @@ -78,23 +75,21 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) return normalizeScore(maxNUMANodesCount, allContainersMinAvgDistance), nil } -func leastNUMAPodScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - nodes := createNUMANodeList(zones) +func leastNUMAPodScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) - identifier := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) - resources := util.GetPodEffectiveRequest(pod) // if a pod requests only non NUMA resources return max score if onlyNonNUMAResources(nodes, resources) { return framework.MaxNodeScore, nil } - numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, resources) + numaNodes, isMinAvgDistance := numaNodesRequired(lh, qos, nodes, resources) // pod's resources can't fit onto node, return MinNodeScore if numaNodes == nil { // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes - klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier) + lh.Info("cannot calculate how many NUMA nodes are required") return framework.MinNodeScore, nil } @@ -112,12 +107,12 @@ func normalizeScore(numaNodesCount int, isMinAvgDistance bool) int64 { return score } -func minAvgDistanceInCombinations(numaNodes NUMANodeList, numaNodesCombination [][]int) float32 { +func minAvgDistanceInCombinations(lh logr.Logger, numaNodes NUMANodeList, numaNodesCombination [][]int) float32 { // max distance for NUMA node var minDistance float32 = maxDistanceValue for _, combination := range numaNodesCombination { - avgDistance := nodesAvgDistance(numaNodes, combination...) + avgDistance := nodesAvgDistance(lh, numaNodes, combination...) if avgDistance < minDistance { minDistance = avgDistance } @@ -126,7 +121,7 @@ func minAvgDistanceInCombinations(numaNodes NUMANodeList, numaNodesCombination [ return minDistance } -func nodesAvgDistance(numaNodes NUMANodeList, nodes ...int) float32 { +func nodesAvgDistance(lh logr.Logger, numaNodes NUMANodeList, nodes ...int) float32 { if len(nodes) == 0 { return maxDistanceValue } @@ -140,7 +135,7 @@ func nodesAvgDistance(numaNodes NUMANodeList, nodes ...int) float32 { cost, ok := numaNodes[node1].Costs[numaNodes[node2].NUMAID] // we couldn't read Costs assign maxDistanceValue if !ok { - klog.Warningf("cannot retrieve Costs information for node ID %d", numaNodes[node1].NUMAID) + lh.Info("cannot retrieve Costs information", "nodeID", numaNodes[node1].NUMAID) cost = maxDistanceValue } accu += cost @@ -169,10 +164,10 @@ func combineResources(numaNodes NUMANodeList, combination []int) v1.ResourceList // numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources // or nil when resources can't be fitted onto the worker node // second value returned is a boolean indicating if bitmask is optimal from distance perspective -func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) (bitmask.BitMask, bool) { +func numaNodesRequired(lh logr.Logger, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) (bitmask.BitMask, bool) { for bitmaskLen := 1; bitmaskLen <= len(numaNodes); bitmaskLen++ { numaNodesCombination := combin.Combinations(len(numaNodes), bitmaskLen) - suitableCombination, isMinDistance := findSuitableCombination(identifier, qos, numaNodes, resources, numaNodesCombination) + suitableCombination, isMinDistance := findSuitableCombination(lh, qos, numaNodes, resources, numaNodesCombination) // we have found suitable combination for given bitmaskLen if suitableCombination != nil { bm := bitmask.NewEmptyBitMask() @@ -189,8 +184,8 @@ func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANode // findSuitableCombination returns combination from numaNodesCombination that can fit resources, otherwise return nil // second value returned is a boolean indicating if returned combination is optimal from distance perspective // this function will always return combination that provides minimal average distance between nodes in combination -func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList, numaNodesCombination [][]int) ([]int, bool) { - minAvgDistance := minAvgDistanceInCombinations(numaNodes, numaNodesCombination) +func findSuitableCombination(lh logr.Logger, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList, numaNodesCombination [][]int) ([]int, bool) { + minAvgDistance := minAvgDistanceInCombinations(lh, numaNodes, numaNodesCombination) var ( minDistanceCombination []int // init as max distance @@ -201,10 +196,10 @@ func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NU continue } combinationResources := combineResources(numaNodes, combination) - resourcesFit := checkResourcesFit(identifier, qos, resources, combinationResources) + resourcesFit := checkResourcesFit(lh, qos, resources, combinationResources) if resourcesFit { - distance := nodesAvgDistance(numaNodes, combination...) + distance := nodesAvgDistance(lh, numaNodes, combination...) if distance == minAvgDistance { // return early if we can fit resources into combination and provide minDistance return combination, true @@ -220,10 +215,10 @@ func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NU return minDistanceCombination, false } -func checkResourcesFit(identifier string, qos v1.PodQOSClass, resources v1.ResourceList, combinationResources v1.ResourceList) bool { +func checkResourcesFit(lh logr.Logger, qos v1.PodQOSClass, resources v1.ResourceList, combinationResources v1.ResourceList) bool { for resource, quantity := range resources { if quantity.IsZero() { - klog.V(4).InfoS("ignoring zero-qty resource request", "identifier", identifier, "resource", resource) + lh.V(4).Info("ignoring zero-qty resource request", "resource", resource) continue } if combinationQuantity := combinationResources[resource]; !isResourceSetSuitable(qos, resource, quantity, combinationQuantity) { diff --git a/pkg/noderesourcetopology/least_numa_test.go b/pkg/noderesourcetopology/least_numa_test.go index 86876b801..f847b397a 100644 --- a/pkg/noderesourcetopology/least_numa_test.go +++ b/pkg/noderesourcetopology/least_numa_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" ) @@ -683,7 +684,7 @@ func TestNUMANodesRequired(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - bm, isMinDistance := numaNodesRequired("test", v1.PodQOSGuaranteed, tc.numaNodes, tc.podResources) + bm, isMinDistance := numaNodesRequired(klog.Background(), v1.PodQOSGuaranteed, tc.numaNodes, tc.podResources) if bm != nil && !bm.IsEqual(tc.expectedBitmask) { t.Errorf("wrong bitmask expected: %d got: %d", tc.expectedBitmask, bm) @@ -901,7 +902,7 @@ func TestMinDistance(t *testing.T) { } for _, tc := range tcases { t.Run(tc.description, func(t *testing.T) { - distance := minAvgDistanceInCombinations(tc.numaNodes, tc.combinations) + distance := minAvgDistanceInCombinations(klog.Background(), tc.numaNodes, tc.combinations) if distance != tc.expected { t.Errorf("Expected distance to be %f not %f", tc.expected, distance) } diff --git a/pkg/noderesourcetopology/logging.go b/pkg/noderesourcetopology/logging.go deleted file mode 100644 index d5842dc68..000000000 --- a/pkg/noderesourcetopology/logging.go +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package noderesourcetopology - -import ( - "encoding/json" - "fmt" - - "k8s.io/klog/v2" - - topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" - - "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" -) - -func logNumaNodes(desc, nodeName string, nodes NUMANodeList) { - for _, numaNode := range nodes { - numaLogKey := fmt.Sprintf("%s/node-%d", nodeName, numaNode.NUMAID) - klog.V(6).InfoS(desc, stringify.ResourceListToLoggable(numaLogKey, numaNode.Resources)...) - } -} - -func logNRT(desc string, nrtObj *topologyv1alpha2.NodeResourceTopology) { - if !klog.V(6).Enabled() { - // avoid the expensive marshal operation - return - } - - ntrJson, err := json.MarshalIndent(nrtObj, "", " ") - if err != nil { - klog.V(6).ErrorS(err, "failed to marshal noderesourcetopology object") - return - } - klog.V(6).Info(desc, "noderesourcetopology", string(ntrJson)) -} diff --git a/pkg/noderesourcetopology/logging/logging.go b/pkg/noderesourcetopology/logging/logging.go new file mode 100644 index 000000000..14b151339 --- /dev/null +++ b/pkg/noderesourcetopology/logging/logging.go @@ -0,0 +1,77 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logging + +import ( + "fmt" + "time" + + "github.com/go-logr/logr" + + corev1 "k8s.io/api/core/v1" +) + +// before to replace with FromContext(), at least in filter and score, +// we would need a way to inject a logger instance (preferably a +// per-plugin logger instance) when we create the Scheduler +// (with app.NewSchedulerCommand) + +// well-known structured log keys +const ( + KeyLogID string = "logID" + KeyPodUID string = "podUID" + KeyNode string = "node" + KeyFlow string = "flow" +) + +const ( + FlowBegin string = "begin" + FlowEnd string = "end" +) + +const ( + FlowCacheSync string = "cachesync" + FlowFilter string = "filter" + FlowPostBind string = "postbind" + FlowReserve string = "reserve" + FlowUnreserve string = "unreserve" + FlowScore string = "score" +) + +var logh logr.Logger + +func SetLogger(lh logr.Logger) { + logh = lh +} + +func Log() logr.Logger { + return logh +} + +func PodLogID(pod *corev1.Pod) string { + if pod == nil { + return "" + } + if pod.Namespace == "" { + return pod.Name + } + return pod.Namespace + "/" + pod.Name +} + +func TimeLogID() string { + return fmt.Sprintf("uts/%v", time.Now().UnixMilli()) +} diff --git a/pkg/noderesourcetopology/plugin.go b/pkg/noderesourcetopology/plugin.go index 153a4fccd..7ad6619d0 100644 --- a/pkg/noderesourcetopology/plugin.go +++ b/pkg/noderesourcetopology/plugin.go @@ -31,7 +31,9 @@ import ( apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" "sigs.k8s.io/scheduler-plugins/apis/config/validation" nrtcache "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/cache" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" + "github.com/go-logr/logr" topologyapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" ) @@ -94,8 +96,8 @@ func subtractFromNUMAs(resources v1.ResourceList, numaNodes NUMANodeList, nodes } } -type filterFn func(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status -type scoringFn func(*v1.Pod, topologyv1alpha2.ZoneList) (int64, *framework.Status) +type filterFn func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status +type scoringFn func(logr.Logger, *v1.Pod, topologyv1alpha2.ZoneList) (int64, *framework.Status) // TopologyMatch plugin which run simplified version of TopologyManager's admit handler type TopologyMatch struct { @@ -118,7 +120,11 @@ func (tm *TopologyMatch) Name() string { // New initializes a new plugin and returns it. func New(_ context.Context, args runtime.Object, handle framework.Handle) (framework.Plugin, error) { - klog.V(5).InfoS("Creating new TopologyMatch plugin") + // we do this later to make sure klog is initialized. We don't need this anyway before this point + lh := klog.Background() + logging.SetLogger(lh) + + lh.V(5).Info("creating new noderesourcetopology plugin") tcfg, ok := args.(*apiconfig.NodeResourceTopologyMatchArgs) if !ok { return nil, fmt.Errorf("want args to be of type NodeResourceTopologyMatchArgs, got %T", args) @@ -128,9 +134,9 @@ func New(_ context.Context, args runtime.Object, handle framework.Handle) (frame return nil, err } - nrtCache, err := initNodeTopologyInformer(tcfg, handle) + nrtCache, err := initNodeTopologyInformer(lh, tcfg, handle) if err != nil { - klog.ErrorS(err, "Cannot create clientset for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) + lh.Error(err, "cannot create clientset for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) return nil, err } diff --git a/pkg/noderesourcetopology/pluginhelpers.go b/pkg/noderesourcetopology/pluginhelpers.go index 832c95433..54b95e390 100644 --- a/pkg/noderesourcetopology/pluginhelpers.go +++ b/pkg/noderesourcetopology/pluginhelpers.go @@ -22,9 +22,9 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/numanode" @@ -40,64 +40,64 @@ const ( maxNUMAId = 64 ) -func initNodeTopologyInformer(tcfg *apiconfig.NodeResourceTopologyMatchArgs, handle framework.Handle) (nrtcache.Interface, error) { +func initNodeTopologyInformer(lh logr.Logger, tcfg *apiconfig.NodeResourceTopologyMatchArgs, handle framework.Handle) (nrtcache.Interface, error) { client, err := ctrlclient.New(handle.KubeConfig(), ctrlclient.Options{Scheme: scheme}) if err != nil { - klog.ErrorS(err, "Cannot create client for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) + lh.Error(err, "cannot create client for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) return nil, err } if tcfg.DiscardReservedNodes { - return nrtcache.NewDiscardReserved(client), nil + return nrtcache.NewDiscardReserved(lh.WithName("nrtcache"), client), nil } if tcfg.CacheResyncPeriodSeconds <= 0 { - return nrtcache.NewPassthrough(client), nil + return nrtcache.NewPassthrough(lh.WithName("nrtcache"), client), nil } - podSharedInformer, podLister, isPodRelevant := podprovider.NewFromHandle(handle, tcfg.Cache) + podSharedInformer, podLister, isPodRelevant := podprovider.NewFromHandle(lh, handle, tcfg.Cache) - nrtCache, err := nrtcache.NewOverReserve(tcfg.Cache, client, podLister, isPodRelevant) + nrtCache, err := nrtcache.NewOverReserve(lh.WithName("nrtcache"), tcfg.Cache, client, podLister, isPodRelevant) if err != nil { return nil, err } - initNodeTopologyForeignPodsDetection(tcfg.Cache, handle, podSharedInformer, nrtCache) + initNodeTopologyForeignPodsDetection(lh, tcfg.Cache, handle, podSharedInformer, nrtCache) resyncPeriod := time.Duration(tcfg.CacheResyncPeriodSeconds) * time.Second go wait.Forever(nrtCache.Resync, resyncPeriod) - klog.V(3).InfoS("enable NodeTopology cache (needs the Reserve plugin)", "resyncPeriod", resyncPeriod) + lh.V(3).Info("enable NodeTopology cache (needs the Reserve plugin)", "resyncPeriod", resyncPeriod) return nrtCache, nil } -func initNodeTopologyForeignPodsDetection(cfg *apiconfig.NodeResourceTopologyCache, handle framework.Handle, podSharedInformer k8scache.SharedInformer, nrtCache *nrtcache.OverReserve) { - foreignPodsDetect := getForeignPodsDetectMode(cfg) +func initNodeTopologyForeignPodsDetection(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache, handle framework.Handle, podSharedInformer k8scache.SharedInformer, nrtCache *nrtcache.OverReserve) { + foreignPodsDetect := getForeignPodsDetectMode(lh, cfg) if foreignPodsDetect == apiconfig.ForeignPodsDetectNone { - klog.InfoS("foreign pods detection disabled by configuration") + lh.Info("foreign pods detection disabled by configuration") return } fwk, ok := handle.(framework.Framework) if !ok { - klog.Warningf("cannot determine the scheduler profile names - no foreign pod detection enabled") + lh.Info("cannot determine the scheduler profile names - no foreign pod detection enabled") return } profileName := fwk.ProfileName() - klog.InfoS("setting up foreign pods detection", "name", profileName, "mode", foreignPodsDetect) + lh.Info("setting up foreign pods detection", "name", profileName, "mode", foreignPodsDetect) if foreignPodsDetect == apiconfig.ForeignPodsDetectOnlyExclusiveResources { nrtcache.TrackOnlyForeignPodsWithExclusiveResources() } else { nrtcache.TrackAllForeignPods() } - nrtcache.RegisterSchedulerProfileName(profileName) - nrtcache.SetupForeignPodsDetector(profileName, podSharedInformer, nrtCache) + nrtcache.RegisterSchedulerProfileName(lh.WithName("foreignpods"), profileName) + nrtcache.SetupForeignPodsDetector(lh.WithName("foreignpods"), profileName, podSharedInformer, nrtCache) } -func createNUMANodeList(zones topologyv1alpha2.ZoneList) NUMANodeList { +func createNUMANodeList(lh logr.Logger, zones topologyv1alpha2.ZoneList) NUMANodeList { numaIDToZoneIDx := make([]int, maxNUMAId) nodes := NUMANodeList{} // filter non Node zones and create idToIdx lookup array @@ -108,14 +108,15 @@ func createNUMANodeList(zones topologyv1alpha2.ZoneList) NUMANodeList { numaID, err := numanode.NameToID(zone.Name) if err != nil || numaID > maxNUMAId { - klog.Error(err) + lh.Error(err, "error getting the numaID", "zone", zone.Name, "numaID", numaID) continue } numaIDToZoneIDx[numaID] = i resources := extractResources(zone) - klog.V(6).InfoS("extracted NUMA resources", stringify.ResourceListToLoggable(zone.Name, resources)...) + numaItems := []interface{}{"numaCell", numaID} + lh.V(6).Info("extracted NUMA resources", stringify.ResourceListToLoggableWithValues(numaItems, resources)...) nodes = append(nodes, NUMANode{NUMAID: numaID, Resources: resources}) } @@ -166,13 +167,20 @@ func onlyNonNUMAResources(numaNodes NUMANodeList, resources corev1.ResourceList) return true } -func getForeignPodsDetectMode(cfg *apiconfig.NodeResourceTopologyCache) apiconfig.ForeignPodsDetectMode { +func getForeignPodsDetectMode(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache) apiconfig.ForeignPodsDetectMode { var foreignPodsDetect apiconfig.ForeignPodsDetectMode if cfg != nil && cfg.ForeignPodsDetect != nil { foreignPodsDetect = *cfg.ForeignPodsDetect } else { // explicitly set to nil? foreignPodsDetect = apiconfig.ForeignPodsDetectAll - klog.InfoS("foreign pods detection value missing", "fallback", foreignPodsDetect) + lh.Info("foreign pods detection value missing", "fallback", foreignPodsDetect) } return foreignPodsDetect } + +func logNumaNodes(lh logr.Logger, desc, nodeName string, nodes NUMANodeList) { + for _, numaNode := range nodes { + numaItems := []interface{}{"numaCell", numaNode.NUMAID} + lh.V(6).Info(desc, stringify.ResourceListToLoggableWithValues(numaItems, numaNode.Resources)...) + } +} diff --git a/pkg/noderesourcetopology/pluginhelpers_test.go b/pkg/noderesourcetopology/pluginhelpers_test.go index 8138fdebd..0ae260b35 100644 --- a/pkg/noderesourcetopology/pluginhelpers_test.go +++ b/pkg/noderesourcetopology/pluginhelpers_test.go @@ -21,6 +21,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" ) @@ -148,7 +149,7 @@ func TestGetForeignPodsDetectMode(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - got := getForeignPodsDetectMode(testCase.cfg) + got := getForeignPodsDetectMode(klog.Background(), testCase.cfg) if got != testCase.expected { t.Errorf("foreign pods detect mode got %v expected %v", got, testCase.expected) } diff --git a/pkg/noderesourcetopology/podprovider/podprovider.go b/pkg/noderesourcetopology/podprovider/podprovider.go index 7aa6ec208..2fa2f7bb4 100644 --- a/pkg/noderesourcetopology/podprovider/podprovider.go +++ b/pkg/noderesourcetopology/podprovider/podprovider.go @@ -19,21 +19,21 @@ package podprovider import ( "context" + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" coreinformers "k8s.io/client-go/informers/core/v1" podlisterv1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" ) -type PodFilterFunc func(pod *corev1.Pod, logID string) bool +type PodFilterFunc func(lh logr.Logger, pod *corev1.Pod) bool -func NewFromHandle(handle framework.Handle, cacheConf *apiconfig.NodeResourceTopologyCache) (k8scache.SharedIndexInformer, podlisterv1.PodLister, PodFilterFunc) { +func NewFromHandle(lh logr.Logger, handle framework.Handle, cacheConf *apiconfig.NodeResourceTopologyCache) (k8scache.SharedIndexInformer, podlisterv1.PodLister, PodFilterFunc) { dedicated := wantsDedicatedInformer(cacheConf) if !dedicated { podHandle := handle.SharedInformerFactory().Core().V1().Pods() // shortcut @@ -43,38 +43,38 @@ func NewFromHandle(handle framework.Handle, cacheConf *apiconfig.NodeResourceTop podInformer := coreinformers.NewFilteredPodInformer(handle.ClientSet(), metav1.NamespaceAll, 0, cache.Indexers{}, nil) podLister := podlisterv1.NewPodLister(podInformer.GetIndexer()) - klog.V(5).InfoS("Start custom pod informer") + lh.V(5).Info("start custom pod informer") ctx := context.Background() go podInformer.Run(ctx.Done()) - klog.V(5).InfoS("Syncing custom pod informer") + lh.V(5).Info("syncing custom pod informer") cache.WaitForCacheSync(ctx.Done(), podInformer.HasSynced) - klog.V(5).InfoS("Synced custom pod informer") + lh.V(5).Info("synced custom pod informer") return podInformer, podLister, IsPodRelevantDedicated } // IsPodRelevantAlways is meant to be used in test only -func IsPodRelevantAlways(pod *corev1.Pod, logID string) bool { +func IsPodRelevantAlways(lh logr.Logger, pod *corev1.Pod) bool { return true } -func IsPodRelevantShared(pod *corev1.Pod, logID string) bool { +func IsPodRelevantShared(lh logr.Logger, pod *corev1.Pod) bool { // we are interested only about nodes which consume resources return pod.Status.Phase == corev1.PodRunning } -func IsPodRelevantDedicated(pod *corev1.Pod, logID string) bool { +func IsPodRelevantDedicated(lh logr.Logger, pod *corev1.Pod) bool { // Every other phase we're interested into (see https://github.com/kubernetes-sigs/scheduler-plugins/pull/599). // Note PodUnknown is deprecated and reportedly no longer set since 2015 (!!) if pod.Status.Phase == corev1.PodPending { // this is unexpected, so we're loud about it - klog.V(2).InfoS("nrtcache: Listed pod in Pending phase, ignored", "logID", logID, "podUID", pod.UID) + lh.V(2).Info("listed pod in Pending phase, ignored", "podUID", pod.GetUID()) return false } if pod.Spec.NodeName == "" { // this is very unexpected, so we're louder about it - klog.InfoS("nrtcache: Listed pod unbound, ignored", "logID", logID, "podUID", pod.UID) + lh.Info("listed pod unbound, ignored", "podUID", pod.GetUID()) return false } return true diff --git a/pkg/noderesourcetopology/postbind.go b/pkg/noderesourcetopology/postbind.go index 45ea1ee68..a0218e25d 100644 --- a/pkg/noderesourcetopology/postbind.go +++ b/pkg/noderesourcetopology/postbind.go @@ -21,8 +21,13 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/scheduler/framework" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) func (tm *TopologyMatch) PostBind(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowPostBind) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.PostBind(nodeName, pod) } diff --git a/pkg/noderesourcetopology/reserve.go b/pkg/noderesourcetopology/reserve.go index 5c79c12e6..b7c5d2397 100644 --- a/pkg/noderesourcetopology/reserve.go +++ b/pkg/noderesourcetopology/reserve.go @@ -21,14 +21,23 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/scheduler/framework" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) func (tm *TopologyMatch) Reserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowReserve) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.ReserveNodeResources(nodeName, pod) // can't fail return framework.NewStatus(framework.Success, "") } func (tm *TopologyMatch) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowUnreserve) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.UnreserveNodeResources(nodeName, pod) } diff --git a/pkg/noderesourcetopology/score.go b/pkg/noderesourcetopology/score.go index 1e88deafd..e6b611194 100644 --- a/pkg/noderesourcetopology/score.go +++ b/pkg/noderesourcetopology/score.go @@ -23,13 +23,15 @@ import ( "gonum.org/v1/gonum/stat" v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" "sigs.k8s.io/scheduler-plugins/pkg/util" ) @@ -57,7 +59,11 @@ func (rw resourceToWeightMap) weight(r v1.ResourceName) int64 { } func (tm *TopologyMatch) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { - klog.V(6).InfoS("scoring node", "nodeName", nodeName) + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowScore) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + + lh.V(6).Info("scoring node") // if it's a non-guaranteed pod, every node is considered to be a good fit if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { return framework.MaxNodeScore, nil @@ -66,21 +72,21 @@ func (tm *TopologyMatch) Score(ctx context.Context, state *framework.CycleState, nodeTopology, ok := tm.nrtCache.GetCachedNRTCopy(ctx, nodeName, pod) if !ok { - klog.V(4).InfoS("noderesourcetopology is not valid for node", "node", nodeName) + lh.V(4).Info("noderesourcetopology is not valid for node") return 0, nil } if nodeTopology == nil { - klog.V(5).InfoS("noderesourcetopology was not found for node", "node", nodeName) + lh.V(5).Info("noderesourcetopology was not found for node") return 0, nil } - logNRT("noderesourcetopology found", nodeTopology) + lh.V(6).Info("found object", "noderesourcetopology", stringify.NodeResourceTopologyResources(nodeTopology)) - handler := tm.scoringHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(nodeTopology)) + handler := tm.scoringHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(lh, nodeTopology)) if handler == nil { return 0, nil } - return handler(pod, nodeTopology.Zones) + return handler(lh, pod, nodeTopology.Zones) } func (tm *TopologyMatch) ScoreExtensions() framework.ScoreExtensions { @@ -89,7 +95,7 @@ func (tm *TopologyMatch) ScoreExtensions() framework.ScoreExtensions { // scoreForEachNUMANode will iterate over all NUMA zones of the node and invoke the scoreStrategyFn func for every zone. // it will return the minimal score of all the calculated NUMA's score, in order to avoid edge cases. -func scoreForEachNUMANode(requested v1.ResourceList, numaList NUMANodeList, score scoreStrategyFn, resourceToWeightMap resourceToWeightMap) int64 { +func scoreForEachNUMANode(lh logr.Logger, requested v1.ResourceList, numaList NUMANodeList, score scoreStrategyFn, resourceToWeightMap resourceToWeightMap) int64 { numaScores := make([]int64, len(numaList)) minScore := int64(0) @@ -100,7 +106,7 @@ func scoreForEachNUMANode(requested v1.ResourceList, numaList NUMANodeList, scor minScore = numaScore } numaScores[numa.NUMAID] = numaScore - klog.V(6).InfoS("numa score result", "numaID", numa.NUMAID, "score", numaScore) + lh.V(6).Info("numa score result", "numaCell", numa.NUMAID, "score", numaScore) } return minScore } @@ -121,32 +127,31 @@ func getScoringStrategyFunction(strategy apiconfig.ScoringStrategyType) (scoreSt } } -func podScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { +func podScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { // This code is in Admit implementation of pod scope // https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/pkg/kubelet/cm/topologymanager/scope_pod.go#L52 // but it works with HintProviders, takes into account all possible allocations. resources := util.GetPodEffectiveRequest(pod) - allocatablePerNUMA := createNUMANodeList(zones) - finalScore := scoreForEachNUMANode(resources, allocatablePerNUMA, scorerFn, resourceToWeightMap) - klog.V(5).InfoS("pod scope scoring final node score", "finalScore", finalScore) + allocatablePerNUMA := createNUMANodeList(lh, zones) + finalScore := scoreForEachNUMANode(lh, resources, allocatablePerNUMA, scorerFn, resourceToWeightMap) + lh.V(5).Info("pod scope scoring final node score", "finalScore", finalScore) return finalScore, nil } -func containerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { +func containerScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { // This code is in Admit implementation of container scope // https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/pkg/kubelet/cm/topologymanager/scope_container.go#L52 containers := append(pod.Spec.InitContainers, pod.Spec.Containers...) contScore := make([]float64, len(containers)) - allocatablePerNUMA := createNUMANodeList(zones) + allocatablePerNUMA := createNUMANodeList(lh, zones) for i, container := range containers { - identifier := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - contScore[i] = float64(scoreForEachNUMANode(container.Resources.Requests, allocatablePerNUMA, scorerFn, resourceToWeightMap)) - klog.V(6).InfoS("container scope scoring", "container", identifier, "score", contScore[i]) + contScore[i] = float64(scoreForEachNUMANode(lh, container.Resources.Requests, allocatablePerNUMA, scorerFn, resourceToWeightMap)) + lh.V(6).Info("container scope scoring", "container", container.Name, "score", contScore[i]) } finalScore := int64(stat.Mean(contScore, nil)) - klog.V(5).InfoS("container scope scoring final node score", "finalScore", finalScore) + lh.V(5).Info("container scope scoring final node score", "finalScore", finalScore) return finalScore, nil } @@ -164,13 +169,13 @@ func (tm *TopologyMatch) scoringHandlerFromTopologyManagerConfig(conf TopologyMa return nil } if conf.Scope == kubeletconfig.PodTopologyManagerScope { - return func(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - return podScopeScore(pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) + return func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + return podScopeScore(lh, pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) } } if conf.Scope == kubeletconfig.ContainerTopologyManagerScope { - return func(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - return containerScopeScore(pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) + return func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + return containerScopeScore(lh, pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) } } return nil // cannot happen diff --git a/pkg/noderesourcetopology/score_test.go b/pkg/noderesourcetopology/score_test.go index 6d11511c6..42199a72f 100644 --- a/pkg/noderesourcetopology/score_test.go +++ b/pkg/noderesourcetopology/score_test.go @@ -26,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -148,7 +149,7 @@ func TestNodeResourceScorePlugin(t *testing.T) { t.Run(test.name, func(t *testing.T) { tm := &TopologyMatch{ scoreStrategyFunc: test.strategy, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } for _, req := range test.requests { @@ -443,7 +444,7 @@ func TestNodeResourceScorePluginLeastNUMA(t *testing.T) { tm := &TopologyMatch{ scoreStrategyType: apiconfig.LeastNUMANodes, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } nodeToScore := make(nodeToScoreMap, len(nodesMap)) pod := makePodByResourceLists(tc.podRequests...) @@ -569,7 +570,7 @@ func TestNodeResourcePartialDataScorePlugin(t *testing.T) { t.Run(test.name, func(t *testing.T) { tm := &TopologyMatch{ scoreStrategyFunc: test.strategy, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } for _, req := range test.requests { diff --git a/pkg/noderesourcetopology/stringify/stringify.go b/pkg/noderesourcetopology/stringify/stringify.go index ea5ba4905..94b9ca3a1 100644 --- a/pkg/noderesourcetopology/stringify/stringify.go +++ b/pkg/noderesourcetopology/stringify/stringify.go @@ -17,7 +17,6 @@ limitations under the License. package stringify import ( - "fmt" "sort" "strconv" "strings" @@ -28,11 +27,14 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/numanode" ) -func ResourceListToLoggable(logID string, resources corev1.ResourceList) []interface{} { - items := []interface{}{"logID", logID} +func ResourceListToLoggable(resources corev1.ResourceList) []interface{} { + return ResourceListToLoggableWithValues([]interface{}{}, resources) +} +func ResourceListToLoggableWithValues(items []interface{}, resources corev1.ResourceList) []interface{} { resNames := []string{} for resName := range resources { resNames = append(resNames, string(resName)) @@ -75,6 +77,12 @@ func ResourceList(resources corev1.ResourceList) string { func NodeResourceTopologyResources(nrtObj *topologyv1alpha2.NodeResourceTopology) string { zones := []string{} for _, zoneInfo := range nrtObj.Zones { + numaItems := []interface{}{"numaCell"} + if numaID, err := numanode.NameToID(zoneInfo.Name); err == nil { + numaItems = append(numaItems, numaID) + } else { + numaItems = append(numaItems, zoneInfo.Name) + } zones = append(zones, zoneInfo.Name+"=<"+nrtResourceInfoListToString(zoneInfo.Resources)+">") } return nrtObj.Name + "={" + strings.Join(zones, ",") + "}" @@ -83,10 +91,21 @@ func NodeResourceTopologyResources(nrtObj *topologyv1alpha2.NodeResourceTopology func nrtResourceInfoListToString(resInfoList []topologyv1alpha2.ResourceInfo) string { items := []string{} for _, resInfo := range resInfoList { - items = append(items, fmt.Sprintf("%s=%s/%s/%s", resInfo.Name, resInfo.Capacity.String(), resInfo.Allocatable.String(), resInfo.Available.String())) + items = append(items, nrtResourceInfo(resInfo)) } return strings.Join(items, ",") } + +func nrtResourceInfo(resInfo topologyv1alpha2.ResourceInfo) string { + capVal, _ := resInfo.Capacity.AsInt64() + allocVal, _ := resInfo.Allocatable.AsInt64() + availVal, _ := resInfo.Available.AsInt64() + if !needsHumanization(resInfo.Name) { + return resInfo.Name + "=" + strconv.FormatInt(capVal, 10) + "/" + strconv.FormatInt(allocVal, 10) + "/" + strconv.FormatInt(availVal, 10) + } + return resInfo.Name + "=" + humanize.IBytes(uint64(capVal)) + "/" + humanize.IBytes(uint64(allocVal)) + "/" + humanize.IBytes(uint64(availVal)) +} + func needsHumanization(resName string) bool { // memory-related resources may be expressed in KiB/Bytes, which makes // for long numbers, harder to read and compare. To make it easier for diff --git a/pkg/noderesourcetopology/stringify/stringify_test.go b/pkg/noderesourcetopology/stringify/stringify_test.go index cb11d11c7..21a080fc1 100644 --- a/pkg/noderesourcetopology/stringify/stringify_test.go +++ b/pkg/noderesourcetopology/stringify/stringify_test.go @@ -116,7 +116,7 @@ func TestResourceListToLoggable(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var buf bytes.Buffer - keysAndValues := ResourceListToLoggable(tt.logID, tt.resources) + keysAndValues := ResourceListToLoggableWithValues([]interface{}{"logID", tt.logID}, tt.resources) kvListFormat(&buf, keysAndValues...) got := buf.String() if got != tt.expected {