diff --git a/pkg/noderesourcetopology/cache/discardreserved.go b/pkg/noderesourcetopology/cache/discardreserved.go index 0807d3ba3..5ba908877 100644 --- a/pkg/noderesourcetopology/cache/discardreserved.go +++ b/pkg/noderesourcetopology/cache/discardreserved.go @@ -20,6 +20,7 @@ import ( "context" "sync" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" corev1 "k8s.io/api/core/v1" @@ -27,6 +28,7 @@ import ( "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) // DiscardReserved is intended to solve similiar problem as Overreserve Cache, @@ -45,12 +47,14 @@ type DiscardReserved struct { rMutex sync.RWMutex reservationMap map[string]map[types.UID]bool // Key is NodeName, value is Pod UID : reserved status client ctrlclient.Client + lh logr.Logger } -func NewDiscardReserved(client ctrlclient.Client) Interface { +func NewDiscardReserved(lh logr.Logger, client ctrlclient.Client) Interface { return &DiscardReserved{ client: client, reservationMap: make(map[string]map[types.UID]bool), + lh: lh, } } @@ -74,7 +78,7 @@ func (pt *DiscardReserved) NodeMaybeOverReserved(nodeName string, pod *corev1.Po func (pt *DiscardReserved) NodeHasForeignPods(nodeName string, pod *corev1.Pod) {} func (pt *DiscardReserved) ReserveNodeResources(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT Reserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT Reserve", "logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) pt.rMutex.Lock() defer pt.rMutex.Unlock() @@ -85,14 +89,14 @@ func (pt *DiscardReserved) ReserveNodeResources(nodeName string, pod *corev1.Pod } func (pt *DiscardReserved) UnreserveNodeResources(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT Unreserve", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT Unreserve", "logID", klog.KObj(pod), "podUID", pod.GetUID(), "node", nodeName) pt.removeReservationForNode(nodeName, pod) } // PostBind is invoked to cleanup reservationMap func (pt *DiscardReserved) PostBind(nodeName string, pod *corev1.Pod) { - klog.V(5).InfoS("nrtcache NRT PostBind", "logID", klog.KObj(pod), "UID", pod.GetUID(), "node", nodeName) + pt.lh.V(5).Info("NRT PostBind", "logID", klog.KObj(pod), "podUID", pod.GetUID(), "node", nodeName) pt.removeReservationForNode(nodeName, pod) } diff --git a/pkg/noderesourcetopology/cache/discardreserved_test.go b/pkg/noderesourcetopology/cache/discardreserved_test.go index 3eac547e5..df383443d 100644 --- a/pkg/noderesourcetopology/cache/discardreserved_test.go +++ b/pkg/noderesourcetopology/cache/discardreserved_test.go @@ -26,6 +26,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -50,7 +51,7 @@ func TestDiscardReservedNodesGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, _ podlisterv1.PodLister) (Interface, error) { - return NewDiscardReserved(client), nil + return NewDiscardReserved(klog.Background(), client), nil }, testCases..., ) diff --git a/pkg/noderesourcetopology/cache/foreign_pods.go b/pkg/noderesourcetopology/cache/foreign_pods.go index 68956e7ed..b5cf7fab3 100644 --- a/pkg/noderesourcetopology/cache/foreign_pods.go +++ b/pkg/noderesourcetopology/cache/foreign_pods.go @@ -17,11 +17,14 @@ limitations under the License. package cache import ( + "fmt" + + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/sets" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" ) @@ -35,11 +38,11 @@ var ( onlyExclusiveResources = false ) -func SetupForeignPodsDetector(schedProfileName string, podInformer k8scache.SharedInformer, cc Interface) { +func SetupForeignPodsDetector(lh logr.Logger, schedProfileName string, podInformer k8scache.SharedInformer, cc Interface) { foreignCache := func(obj interface{}) { pod, ok := obj.(*corev1.Pod) if !ok { - klog.V(3).InfoS("nrtcache: foreign: unsupported object %T", obj) + lh.V(3).Info("unsupported object", "kind", fmt.Sprintf("%T", obj)) return } if !IsForeignPod(pod) { @@ -47,7 +50,7 @@ func SetupForeignPodsDetector(schedProfileName string, podInformer k8scache.Shar } cc.NodeHasForeignPods(pod.Spec.NodeName, pod) - klog.V(6).InfoS("nrtcache: has foreign pods", "logID", klog.KObj(pod), "node", pod.Spec.NodeName, "podUID", pod.UID) + lh.V(6).Info("detected foreign pods", "logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", pod.Spec.NodeName) } podInformer.AddEventHandler(k8scache.ResourceEventHandlerFuncs{ @@ -67,11 +70,11 @@ func TrackAllForeignPods() { onlyExclusiveResources = false } -func RegisterSchedulerProfileName(schedProfileName string) { - klog.InfoS("nrtcache: setting up foreign pod detection", "profile", schedProfileName) +func RegisterSchedulerProfileName(lh logr.Logger, schedProfileName string) { + lh.Info("setting up detection", "profile", schedProfileName) schedProfileNames.Insert(schedProfileName) - klog.V(5).InfoS("nrtcache: registered scheduler profiles", "names", schedProfileNames.List()) + lh.V(5).Info("registered scheduler profiles", "names", schedProfileNames.List()) } func IsForeignPod(pod *corev1.Pod) bool { diff --git a/pkg/noderesourcetopology/cache/foreign_pods_test.go b/pkg/noderesourcetopology/cache/foreign_pods_test.go index a0333b377..f4338e388 100644 --- a/pkg/noderesourcetopology/cache/foreign_pods_test.go +++ b/pkg/noderesourcetopology/cache/foreign_pods_test.go @@ -22,6 +22,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" ) func TestIsForeignPod(t *testing.T) { @@ -195,7 +196,7 @@ func TestIsForeignPod(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { for _, profileName := range tt.profileNames { - RegisterSchedulerProfileName(profileName) + RegisterSchedulerProfileName(klog.Background(), profileName) } defer CleanRegisteredSchedulerProfileNames() diff --git a/pkg/noderesourcetopology/cache/overreserve.go b/pkg/noderesourcetopology/cache/overreserve.go index 3c373e9f2..37283fda0 100644 --- a/pkg/noderesourcetopology/cache/overreserve.go +++ b/pkg/noderesourcetopology/cache/overreserve.go @@ -21,8 +21,8 @@ import ( "errors" "fmt" "sync" - "time" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -30,17 +30,18 @@ import ( "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" podlisterv1 "k8s.io/client-go/listers/core/v1" - "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/podprovider" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" ) type OverReserve struct { + lh logr.Logger client ctrlclient.Client lock sync.Mutex nrts *nrtStore @@ -54,12 +55,12 @@ type OverReserve struct { isPodRelevant podprovider.PodFilterFunc } -func NewOverReserve(cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient.Client, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (*OverReserve, error) { +func NewOverReserve(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient.Client, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (*OverReserve, error) { if client == nil || podLister == nil { - return nil, fmt.Errorf("nrtcache: received nil references") + return nil, fmt.Errorf("received nil references") } - resyncMethod := getCacheResyncMethod(cfg) + resyncMethod := getCacheResyncMethod(lh, cfg) nrtObjs := &topologyv1alpha2.NodeResourceTopologyList{} // TODO: we should pass-in a context in the future @@ -67,10 +68,11 @@ func NewOverReserve(cfg *apiconfig.NodeResourceTopologyCache, client ctrlclient. return nil, err } - klog.V(3).InfoS("nrtcache: initializing", "objects", len(nrtObjs.Items), "method", resyncMethod) + lh.V(3).Info("initializing", "noderesourcetopologies", len(nrtObjs.Items), "method", resyncMethod) obj := &OverReserve{ + lh: lh, client: client, - nrts: newNrtStore(nrtObjs.Items), + nrts: newNrtStore(lh, nrtObjs.Items), assumedResources: make(map[string]*resourceStore), nodesMaybeOverreserved: newCounter(), nodesWithForeignPods: newCounter(), @@ -97,10 +99,13 @@ func (ov *OverReserve) GetCachedNRTCopy(ctx context.Context, nodeName string, po return nrt, true } - klog.V(6).InfoS("nrtcache NRT", "logID", klog.KObj(pod), "vanilla", stringify.NodeResourceTopologyResources(nrt)) - nodeAssumedResources.UpdateNRT(klog.KObj(pod).String(), nrt) + logID := logging.PodLogID(pod) + lh := ov.lh.WithValues("logID", logID, "podUID", pod.GetUID(), "node", nodeName) - klog.V(5).InfoS("nrtcache NRT", "logID", klog.KObj(pod), "updated", stringify.NodeResourceTopologyResources(nrt)) + lh.V(6).Info("NRT", "fromcache", stringify.NodeResourceTopologyResources(nrt)) + nodeAssumedResources.UpdateNRT(logID, nrt) + + lh.V(5).Info("NRT", "withassumed", stringify.NodeResourceTopologyResources(nrt)) return nrt, true } @@ -108,49 +113,52 @@ func (ov *OverReserve) NodeMaybeOverReserved(nodeName string, pod *corev1.Pod) { ov.lock.Lock() defer ov.lock.Unlock() val := ov.nodesMaybeOverreserved.Incr(nodeName) - klog.V(4).InfoS("nrtcache: mark discarded", "logID", klog.KObj(pod), "node", nodeName, "count", val) + ov.lh.V(4).Info("mark discarded", "node", nodeName, "count", val) } func (ov *OverReserve) NodeHasForeignPods(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() if !ov.nrts.Contains(nodeName) { - klog.V(5).InfoS("nrtcache: ignoring foreign pods", "logID", klog.KObj(pod), "node", nodeName, "nrtinfo", "missing") + lh.V(5).Info("ignoring foreign pods", "nrtinfo", "missing") return } val := ov.nodesWithForeignPods.Incr(nodeName) - klog.V(4).InfoS("nrtcache: marked with foreign pods", "logID", klog.KObj(pod), "node", nodeName, "count", val) + lh.V(4).Info("marked with foreign pods", "count", val) } func (ov *OverReserve) ReserveNodeResources(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() nodeAssumedResources, ok := ov.assumedResources[nodeName] if !ok { - nodeAssumedResources = newResourceStore() + nodeAssumedResources = newResourceStore(ov.lh) ov.assumedResources[nodeName] = nodeAssumedResources } nodeAssumedResources.AddPod(pod) - klog.V(5).InfoS("nrtcache post reserve", "logID", klog.KObj(pod), "node", nodeName, "assumedResources", nodeAssumedResources.String()) + lh.V(5).Info("post reserve", "assumedResources", nodeAssumedResources.String()) ov.nodesMaybeOverreserved.Delete(nodeName) - klog.V(6).InfoS("nrtcache: reset discard counter", "logID", klog.KObj(pod), "node", nodeName) + lh.V(6).Info("reset discard counter") } func (ov *OverReserve) UnreserveNodeResources(nodeName string, pod *corev1.Pod) { + lh := ov.lh.WithValues("logID", logging.PodLogID(pod), "podUID", pod.GetUID(), "node", nodeName) ov.lock.Lock() defer ov.lock.Unlock() nodeAssumedResources, ok := ov.assumedResources[nodeName] if !ok { // this should not happen, so we're vocal about it // we don't return error because not much to do to recover anyway - klog.V(3).InfoS("nrtcache: no resources tracked", "logID", klog.KObj(pod), "node", nodeName) + lh.V(3).Info("no resources tracked") return } nodeAssumedResources.DeletePod(pod) - klog.V(5).InfoS("nrtcache post release", "logID", klog.KObj(pod), "node", nodeName, "assumedResources", nodeAssumedResources.String()) + lh.V(5).Info("post release", "assumedResources", nodeAssumedResources.String()) } // NodesMaybeOverReserved returns a slice of all the node names which have been discarded previously, @@ -160,7 +168,7 @@ func (ov *OverReserve) UnreserveNodeResources(nodeName string, pod *corev1.Pod) // 2. it was pessimistically overallocated, so the node is a candidate for resync // This function enables the caller to know the slice of nodes should be considered for resync, // avoiding the need to rescan the full node list. -func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { +func (ov *OverReserve) NodesMaybeOverReserved(lh logr.Logger) []string { ov.lock.Lock() defer ov.lock.Unlock() // this is intentionally aggressive. We don't yet make any attempt to find out if the @@ -177,7 +185,7 @@ func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { } if nodes.Len() > 0 { - klog.V(4).InfoS("nrtcache: found dirty nodes", "logID", logID, "foreign", foreignCount, "discarded", nodes.Len()-foreignCount, "total", nodes.Len()) + lh.V(4).Info("found dirty nodes", "foreign", foreignCount, "discarded", nodes.Len()-foreignCount, "total", nodes.Len()) } return nodes.Keys() } @@ -192,77 +200,81 @@ func (ov *OverReserve) NodesMaybeOverReserved(logID string) []string { // too aggressive resync attempts, so to more, likely unnecessary, computation work on the scheduler side. func (ov *OverReserve) Resync() { // we are not working with a specific pod, so we need a unique key to track this flow - logID := logIDFromTime() + lh := ov.lh.WithValues("logID", logging.TimeLogID(), "flow", logging.FlowCacheSync) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) - nodeNames := ov.NodesMaybeOverReserved(logID) + nodeNames := ov.NodesMaybeOverReserved(lh) // avoid as much as we can unnecessary work and logs. if len(nodeNames) == 0 { - klog.V(6).InfoS("nrtcache: resync: no dirty nodes detected") + lh.V(6).Info("no dirty nodes detected") return } // node -> pod identifier (namespace, name) - nodeToObjsMap, err := makeNodeToPodDataMap(ov.podLister, ov.isPodRelevant, logID) + nodeToObjsMap, err := makeNodeToPodDataMap(lh, ov.podLister, ov.isPodRelevant) if err != nil { - klog.ErrorS(err, "cannot find the mapping between running pods and nodes") + lh.Error(err, "cannot find the mapping between running pods and nodes") return } - klog.V(6).InfoS("nrtcache: resync NodeTopology cache starting", "logID", logID) - defer klog.V(6).InfoS("nrtcache: resync NodeTopology cache complete", "logID", logID) + lh.V(6).Info("resync NodeTopology cache starting") + defer lh.V(6).Info("resync NodeTopology cache complete") var nrtUpdates []*topologyv1alpha2.NodeResourceTopology for _, nodeName := range nodeNames { + lh = lh.WithValues("node", nodeName) + nrtCandidate := &topologyv1alpha2.NodeResourceTopology{} if err := ov.client.Get(context.Background(), types.NamespacedName{Name: nodeName}, nrtCandidate); err != nil { - klog.V(3).InfoS("nrtcache: failed to get NodeTopology", "logID", logID, "node", nodeName, "error", err) + lh.V(3).Info("failed to get NodeTopology", "error", err) continue } if nrtCandidate == nil { - klog.V(3).InfoS("nrtcache: missing NodeTopology", "logID", logID, "node", nodeName) + lh.V(3).Info("missing NodeTopology") continue } objs, ok := nodeToObjsMap[nodeName] if !ok { // this really should never happen - klog.V(3).InfoS("nrtcache: cannot find any pod for node", "logID", logID, "node", nodeName) + lh.V(3).Info("cannot find any pod for node") continue } pfpExpected, onlyExclRes := podFingerprintForNodeTopology(nrtCandidate, ov.resyncMethod) if pfpExpected == "" { - klog.V(3).InfoS("nrtcache: missing NodeTopology podset fingerprint data", "logID", logID, "node", nodeName) + lh.V(3).Info("missing NodeTopology podset fingerprint data") continue } - klog.V(6).InfoS("nrtcache: trying to resync NodeTopology", "logID", logID, "node", nodeName, "fingerprint", pfpExpected, "onlyExclusiveResources", onlyExclRes) + lh.V(6).Info("trying to sync NodeTopology", "fingerprint", pfpExpected, "onlyExclusiveResources", onlyExclRes) - err = checkPodFingerprintForNode(logID, objs, nodeName, pfpExpected, onlyExclRes) + err = checkPodFingerprintForNode(lh, objs, nodeName, pfpExpected, onlyExclRes) if errors.Is(err, podfingerprint.ErrSignatureMismatch) { // can happen, not critical - klog.V(5).InfoS("nrtcache: NodeTopology podset fingerprint mismatch", "logID", logID, "node", nodeName) + lh.V(5).Info("NodeTopology podset fingerprint mismatch") continue } if err != nil { // should never happen, let's be vocal - klog.V(3).ErrorS(err, "nrtcache: checking NodeTopology podset fingerprint", "logID", logID, "node", nodeName) + lh.V(3).Error(err, "checking NodeTopology podset fingerprint") continue } - klog.V(4).InfoS("nrtcache: overriding cached info", "logID", logID, "node", nodeName) + lh.V(4).Info("overriding cached info") nrtUpdates = append(nrtUpdates, nrtCandidate) } - ov.FlushNodes(logID, nrtUpdates...) + ov.FlushNodes(lh, nrtUpdates...) } // FlushNodes drops all the cached information about a given node, resetting its state clean. -func (ov *OverReserve) FlushNodes(logID string, nrts ...*topologyv1alpha2.NodeResourceTopology) { +func (ov *OverReserve) FlushNodes(lh logr.Logger, nrts ...*topologyv1alpha2.NodeResourceTopology) { ov.lock.Lock() defer ov.lock.Unlock() for _, nrt := range nrts { - klog.V(4).InfoS("nrtcache: flushing", "logID", logID, "node", nrt.Name) + lh.V(4).Info("flushing", "node", nrt.Name) ov.nrts.Update(nrt) delete(ov.assumedResources, nrt.Name) ov.nodesMaybeOverreserved.Delete(nrt.Name) @@ -275,14 +287,14 @@ func (ov *OverReserve) Store() *nrtStore { return ov.nrts } -func makeNodeToPodDataMap(podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc, logID string) (map[string][]podData, error) { +func makeNodeToPodDataMap(lh logr.Logger, podLister podlisterv1.PodLister, isPodRelevant podprovider.PodFilterFunc) (map[string][]podData, error) { nodeToObjsMap := make(map[string][]podData) pods, err := podLister.List(labels.Everything()) if err != nil { return nodeToObjsMap, err } for _, pod := range pods { - if !isPodRelevant(pod, logID) { + if !isPodRelevant(lh, pod) { continue } nodeObjs := nodeToObjsMap[pod.Spec.NodeName] @@ -296,17 +308,13 @@ func makeNodeToPodDataMap(podLister podlisterv1.PodLister, isPodRelevant podprov return nodeToObjsMap, nil } -func logIDFromTime() string { - return fmt.Sprintf("resync%v", time.Now().UnixMilli()) -} - -func getCacheResyncMethod(cfg *apiconfig.NodeResourceTopologyCache) apiconfig.CacheResyncMethod { +func getCacheResyncMethod(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache) apiconfig.CacheResyncMethod { var resyncMethod apiconfig.CacheResyncMethod if cfg != nil && cfg.ResyncMethod != nil { resyncMethod = *cfg.ResyncMethod } else { // explicitly set to nil? resyncMethod = apiconfig.CacheResyncAutodetect - klog.InfoS("cache resync method missing", "fallback", resyncMethod) + lh.Info("cache resync method missing", "fallback", resyncMethod) } return resyncMethod } diff --git a/pkg/noderesourcetopology/cache/overreserve_test.go b/pkg/noderesourcetopology/cache/overreserve_test.go index ec48e2bae..55ffa2348 100644 --- a/pkg/noderesourcetopology/cache/overreserve_test.go +++ b/pkg/noderesourcetopology/cache/overreserve_test.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -89,7 +90,7 @@ func TestGetCacheResyncMethod(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - got := getCacheResyncMethod(testCase.cfg) + got := getCacheResyncMethod(klog.Background(), testCase.cfg) if got != testCase.expected { t.Errorf("cache resync method got %v expected %v", got, testCase.expected) } @@ -104,12 +105,12 @@ func TestInitEmptyLister(t *testing.T) { fakePodLister := &fakePodLister{} - _, err = NewOverReserve(nil, nil, fakePodLister, podprovider.IsPodRelevantAlways) + _, err = NewOverReserve(klog.Background(), nil, nil, fakePodLister, podprovider.IsPodRelevantAlways) if err == nil { t.Fatalf("accepted nil lister") } - _, err = NewOverReserve(nil, fakeClient, nil, podprovider.IsPodRelevantAlways) + _, err = NewOverReserve(klog.Background(), nil, fakeClient, nil, podprovider.IsPodRelevantAlways) if err == nil { t.Fatalf("accepted nil indexer") } @@ -124,7 +125,7 @@ func TestNodesMaybeOverReservedCount(t *testing.T) { fakePodLister := &fakePodLister{} nrtCache := mustOverReserve(t, fakeClient, fakePodLister) - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -149,7 +150,7 @@ func TestDirtyNodesMarkDiscarded(t *testing.T) { nrtCache.ReserveNodeResources(nodeName, &corev1.Pod{}) } - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -158,7 +159,7 @@ func TestDirtyNodesMarkDiscarded(t *testing.T) { nrtCache.NodeMaybeOverReserved(nodeName, &corev1.Pod{}) } - dirtyNodes = nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes = nrtCache.NodesMaybeOverReserved(klog.Background()) sort.Strings(dirtyNodes) if !reflect.DeepEqual(dirtyNodes, expectedNodes) { @@ -185,7 +186,7 @@ func TestDirtyNodesUnmarkedOnReserve(t *testing.T) { nrtCache.ReserveNodeResources(nodeName, &corev1.Pod{}) } - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes from pristine cache: %v", dirtyNodes) } @@ -201,7 +202,7 @@ func TestDirtyNodesUnmarkedOnReserve(t *testing.T) { "node-1", } - dirtyNodes = nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes = nrtCache.NodesMaybeOverReserved(klog.Background()) if !reflect.DeepEqual(dirtyNodes, expectedNodes) { t.Errorf("got=%v expected=%v", dirtyNodes, expectedNodes) @@ -228,7 +229,7 @@ func TestOverreserveGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, podLister podlisterv1.PodLister) (Interface, error) { - return NewOverReserve(nil, client, podLister, podprovider.IsPodRelevantAlways) + return NewOverReserve(klog.Background(), nil, client, podLister, podprovider.IsPodRelevantAlways) }, testCases..., ) @@ -403,8 +404,6 @@ func TestFlush(t *testing.T) { }, } - logID := "testFlush" - nrtCache.ReserveNodeResources("node1", testPod) nrtCache.NodeMaybeOverReserved("node1", testPod) @@ -433,9 +432,11 @@ func TestFlush(t *testing.T) { }, } - nrtCache.FlushNodes(logID, expectedNodeTopology.DeepCopy()) + lh := klog.Background() + + nrtCache.FlushNodes(lh, expectedNodeTopology.DeepCopy()) - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(lh) if len(dirtyNodes) != 0 { t.Errorf("dirty nodes after flush: %v", dirtyNodes) } @@ -517,7 +518,7 @@ func TestResyncNoPodFingerprint(t *testing.T) { nrtCache.Resync() - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) != 1 || dirtyNodes[0] != "node1" { t.Errorf("cleaned nodes after resyncing with bad data: %v", dirtyNodes) @@ -611,7 +612,7 @@ func TestResyncMatchFingerprint(t *testing.T) { nrtCache.Resync() - dirtyNodes := nrtCache.NodesMaybeOverReserved("testing") + dirtyNodes := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(dirtyNodes) > 0 { t.Errorf("node still dirty after resyncing with good data: %v", dirtyNodes) } @@ -641,7 +642,7 @@ func TestUnknownNodeWithForeignPods(t *testing.T) { nrtCache.NodeHasForeignPods("node-bogus", &corev1.Pod{}) - names := nrtCache.NodesMaybeOverReserved("testing") + names := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(names) != 0 { t.Errorf("non-existent node has foreign pods!") } @@ -714,7 +715,7 @@ func TestNodeWithForeignPods(t *testing.T) { target := "node2" nrtCache.NodeHasForeignPods(target, &corev1.Pod{}) - names := nrtCache.NodesMaybeOverReserved("testing") + names := nrtCache.NodesMaybeOverReserved(klog.Background()) if len(names) != 1 || names[0] != target { t.Errorf("unexpected dirty nodes: %v", names) } @@ -726,7 +727,7 @@ func TestNodeWithForeignPods(t *testing.T) { } func mustOverReserve(t *testing.T, client ctrlclient.Client, podLister podlisterv1.PodLister) *OverReserve { - obj, err := NewOverReserve(nil, client, podLister, podprovider.IsPodRelevantAlways) + obj, err := NewOverReserve(klog.Background(), nil, client, podLister, podprovider.IsPodRelevantAlways) if err != nil { t.Fatalf("unexpected error creating cache: %v", err) } @@ -960,7 +961,7 @@ func TestMakeNodeToPodDataMap(t *testing.T) { pods: tcase.pods, err: tcase.err, } - got, err := makeNodeToPodDataMap(podLister, tcase.isPodRelevant, tcase.description) + got, err := makeNodeToPodDataMap(klog.Background(), podLister, tcase.isPodRelevant) if err != tcase.expectedErr { t.Errorf("error mismatch: got %v expected %v", err, tcase.expectedErr) } diff --git a/pkg/noderesourcetopology/cache/passthrough.go b/pkg/noderesourcetopology/cache/passthrough.go index 54e3e5ba9..ac2107613 100644 --- a/pkg/noderesourcetopology/cache/passthrough.go +++ b/pkg/noderesourcetopology/cache/passthrough.go @@ -19,30 +19,32 @@ package cache import ( "context" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) type Passthrough struct { client ctrlclient.Client + lh logr.Logger } -func NewPassthrough(client ctrlclient.Client) Interface { +func NewPassthrough(lh logr.Logger, client ctrlclient.Client) Interface { return Passthrough{ client: client, + lh: lh, } } func (pt Passthrough) GetCachedNRTCopy(ctx context.Context, nodeName string, _ *corev1.Pod) (*topologyv1alpha2.NodeResourceTopology, bool) { - klog.V(5).InfoS("Lister for nodeResTopoPlugin") + pt.lh.V(5).Info("lister for NRT plugin") nrt := &topologyv1alpha2.NodeResourceTopology{} if err := pt.client.Get(ctx, types.NamespacedName{Name: nodeName}, nrt); err != nil { - klog.V(5).ErrorS(err, "Cannot get NodeTopologies from NodeResourceTopologyLister") + pt.lh.V(5).Error(err, "cannot get nrts from lister") return nil, true } return nrt, true diff --git a/pkg/noderesourcetopology/cache/passthrough_test.go b/pkg/noderesourcetopology/cache/passthrough_test.go index 075486d64..03552c07e 100644 --- a/pkg/noderesourcetopology/cache/passthrough_test.go +++ b/pkg/noderesourcetopology/cache/passthrough_test.go @@ -21,6 +21,7 @@ import ( topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -44,7 +45,7 @@ func TestPassthroughGetCachedNRTCopy(t *testing.T) { checkGetCachedNRTCopy( t, func(client ctrlclient.Client, _ podlisterv1.PodLister) (Interface, error) { - return NewPassthrough(client), nil + return NewPassthrough(klog.Background(), client), nil }, testCases..., ) diff --git a/pkg/noderesourcetopology/cache/store.go b/pkg/noderesourcetopology/cache/store.go index 8ff78b321..cbfb986ec 100644 --- a/pkg/noderesourcetopology/cache/store.go +++ b/pkg/noderesourcetopology/cache/store.go @@ -21,8 +21,8 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/klog/v2" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" topologyv1alpha2attr "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/attribute" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -36,17 +36,19 @@ import ( // data is intentionally copied each time it enters and exists the store. E.g, no pointer sharing. type nrtStore struct { data map[string]*topologyv1alpha2.NodeResourceTopology + lh logr.Logger } // newNrtStore creates a new nrtStore and initializes it with copies of the provided Node Resource Topology data. -func newNrtStore(nrts []topologyv1alpha2.NodeResourceTopology) *nrtStore { +func newNrtStore(lh logr.Logger, nrts []topologyv1alpha2.NodeResourceTopology) *nrtStore { data := make(map[string]*topologyv1alpha2.NodeResourceTopology, len(nrts)) for _, nrt := range nrts { data[nrt.Name] = nrt.DeepCopy() } - klog.V(6).InfoS("nrtcache: initialized nrtStore", "objects", len(data)) + lh.V(6).Info("initialized nrtStore", "objects", len(data)) return &nrtStore{ data: data, + lh: lh, } } @@ -60,7 +62,7 @@ func (nrs nrtStore) Contains(nodeName string) bool { func (nrs *nrtStore) GetNRTCopyByNodeName(nodeName string) *topologyv1alpha2.NodeResourceTopology { obj, ok := nrs.data[nodeName] if !ok { - klog.V(3).InfoS("nrtcache: missing cached NodeTopology", "node", nodeName) + nrs.lh.V(3).Info("missing cached NodeTopology", "node", nodeName) return nil } return obj.DeepCopy() @@ -69,18 +71,20 @@ func (nrs *nrtStore) GetNRTCopyByNodeName(nodeName string) *topologyv1alpha2.Nod // Update adds or replace the Node Resource Topology associated to a node. Always do a copy. func (nrs *nrtStore) Update(nrt *topologyv1alpha2.NodeResourceTopology) { nrs.data[nrt.Name] = nrt.DeepCopy() - klog.V(5).InfoS("nrtcache: updated cached NodeTopology", "node", nrt.Name) + nrs.lh.V(5).Info("updated cached NodeTopology", "node", nrt.Name) } // resourceStore maps the resource requested by pod by pod namespaed name. It is not thread safe and needs to be protected by a lock. type resourceStore struct { // key: namespace + "/" name data map[string]corev1.ResourceList + lh logr.Logger } -func newResourceStore() *resourceStore { +func newResourceStore(lh logr.Logger) *resourceStore { return &resourceStore{ data: make(map[string]corev1.ResourceList), + lh: lh, } } @@ -94,27 +98,27 @@ func (rs *resourceStore) String() string { // AddPod returns true if updating existing pod, false if adding for the first time func (rs *resourceStore) AddPod(pod *corev1.Pod) bool { - key := pod.Namespace + "/" + pod.Name // this is also a valid logID + key := pod.Namespace + "/" + pod.Name _, ok := rs.data[key] if ok { // should not happen, so we log with a low level - klog.V(4).InfoS("updating existing entry", "key", key) + rs.lh.V(4).Info("updating existing entry", "key", key) } resData := util.GetPodEffectiveRequest(pod) - klog.V(5).InfoS("nrtcache: resourcestore ADD", stringify.ResourceListToLoggable(key, resData)...) + rs.lh.V(5).Info("resourcestore ADD", stringify.ResourceListToLoggable(resData)...) rs.data[key] = resData return ok } // DeletePod returns true if deleted an existing pod, false otherwise func (rs *resourceStore) DeletePod(pod *corev1.Pod) bool { - key := pod.Namespace + "/" + pod.Name // this is also a valid logID + key := pod.Namespace + "/" + pod.Name _, ok := rs.data[key] if ok { // should not happen, so we log with a low level - klog.V(4).InfoS("removing missing entry", "key", key) + rs.lh.V(4).Info("removing missing entry", "key", key) } - klog.V(5).InfoS("nrtcache: resourcestore DEL", stringify.ResourceListToLoggable(key, rs.data[key])...) + rs.lh.V(5).Info("resourcestore DEL", stringify.ResourceListToLoggable(rs.data[key])...) delete(rs.data, key) return ok } @@ -142,7 +146,7 @@ func (rs *resourceStore) UpdateNRT(logID string, nrt *topologyv1alpha2.NodeResou if zr.Available.Cmp(qty) < 0 { // this should happen rarely, and it is likely caused by // a bug elsewhere. - klog.V(3).InfoS("nrtcache: cannot decrement resource", "logID", logID, "zone", zr.Name, "node", nrt.Name, "available", zr.Available, "requestor", key, "quantity", qty) + rs.lh.V(3).Info("cannot decrement resource", "logID", logID, "zone", zr.Name, "node", nrt.Name, "available", zr.Available, "requestor", key, "quantity", qty.String()) zr.Available = resource.Quantity{} continue } @@ -224,7 +228,7 @@ type podData struct { // checkPodFingerprintForNode verifies if the given pods fingeprint (usually from NRT update) matches the // computed one using the stored data about pods running on nodes. Returns nil on success, or an error // describing the failure -func checkPodFingerprintForNode(logID string, objs []podData, nodeName, pfpExpected string, onlyExclRes bool) error { +func checkPodFingerprintForNode(lh logr.Logger, objs []podData, nodeName, pfpExpected string, onlyExclRes bool) error { st := podfingerprint.MakeStatus(nodeName) pfp := podfingerprint.NewTracingFingerprint(len(objs), &st) for _, obj := range objs { @@ -235,8 +239,8 @@ func checkPodFingerprintForNode(logID string, objs []podData, nodeName, pfpExpec } pfpComputed := pfp.Sign() - klog.V(5).InfoS("nrtcache: podset fingerprint check", "logID", logID, "node", nodeName, "expected", pfpExpected, "computed", pfpComputed, "onlyExclusiveResources", onlyExclRes) - klog.V(6).InfoS("nrtcache: podset fingerprint debug", "logID", logID, "node", nodeName, "status", st.Repr()) + lh.V(5).Info("podset fingerprint check", "expected", pfpExpected, "computed", pfpComputed, "onlyExclusiveResources", onlyExclRes) + lh.V(6).Info("podset fingerprint debug", "status", st.Repr()) err := pfp.Check(pfpExpected) podfingerprint.MarkCompleted(st) diff --git a/pkg/noderesourcetopology/cache/store_test.go b/pkg/noderesourcetopology/cache/store_test.go index 30871d450..c7daa08f4 100644 --- a/pkg/noderesourcetopology/cache/store_test.go +++ b/pkg/noderesourcetopology/cache/store_test.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" podlisterv1 "k8s.io/client-go/listers/core/v1" + "k8s.io/klog/v2" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" "github.com/k8stopologyawareschedwg/podfingerprint" @@ -202,7 +203,7 @@ func TestNRTStoreGet(t *testing.T) { }, }, } - ns := newNrtStore(nrts) + ns := newNrtStore(klog.Background(), nrts) obj := ns.GetNRTCopyByNodeName("node-0") obj.TopologyPolicies[0] = "single-numa-node" @@ -238,7 +239,7 @@ func TestNRTStoreUpdate(t *testing.T) { }, }, } - ns := newNrtStore(nrts) + ns := newNrtStore(klog.Background(), nrts) nrt3 := &topologyv1alpha2.NodeResourceTopology{ ObjectMeta: metav1.ObjectMeta{ @@ -258,14 +259,14 @@ func TestNRTStoreUpdate(t *testing.T) { } func TestNRTStoreGetMissing(t *testing.T) { - ns := newNrtStore(nil) + ns := newNrtStore(klog.Background(), nil) if ns.GetNRTCopyByNodeName("node-missing") != nil { t.Errorf("missing node returned non-nil data") } } func TestNRTStoreContains(t *testing.T) { - ns := newNrtStore(nil) + ns := newNrtStore(klog.Background(), nil) if ns.Contains("node-0") { t.Errorf("unexpected node found") } @@ -288,7 +289,7 @@ func TestNRTStoreContains(t *testing.T) { }, }, } - ns = newNrtStore(nrts) + ns = newNrtStore(klog.Background(), nrts) if !ns.Contains("node-0") { t.Errorf("missing node") } @@ -409,7 +410,7 @@ func TestResourceStoreAddPod(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.AddPod(&pod) if existed { t.Fatalf("replaced a pod into a empty resourceStore") @@ -441,7 +442,7 @@ func TestResourceStoreDeletePod(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.DeletePod(&pod) if existed { t.Fatalf("deleted a pod into a empty resourceStore") @@ -508,7 +509,7 @@ func TestResourceStoreUpdate(t *testing.T) { }, } - rs := newResourceStore() + rs := newResourceStore(klog.Background()) existed := rs.AddPod(&pod) if existed { t.Fatalf("replacing a pod into a empty resourceStore") @@ -581,7 +582,7 @@ func TestCheckPodFingerprintForNode(t *testing.T) { for _, tcase := range tcases { t.Run(tcase.description, func(t *testing.T) { - gotErr := checkPodFingerprintForNode("testing", tcase.objs, "test-node", tcase.pfp, tcase.onlyExclRes) + gotErr := checkPodFingerprintForNode(klog.Background(), tcase.objs, "test-node", tcase.pfp, tcase.onlyExclRes) if !errors.Is(gotErr, tcase.expectedErr) { t.Errorf("got error %v expected %v", gotErr, tcase.expectedErr) } diff --git a/pkg/noderesourcetopology/config.go b/pkg/noderesourcetopology/config.go index 9b58d34bf..3cd18df56 100644 --- a/pkg/noderesourcetopology/config.go +++ b/pkg/noderesourcetopology/config.go @@ -17,9 +17,9 @@ limitations under the License. package noderesourcetopology import ( - "k8s.io/klog/v2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" ) @@ -57,10 +57,10 @@ func makeTopologyManagerConfigDefaults() TopologyManagerConfig { } } -func topologyManagerConfigFromNodeResourceTopology(nodeTopology *topologyv1alpha2.NodeResourceTopology) TopologyManagerConfig { +func topologyManagerConfigFromNodeResourceTopology(lh logr.Logger, nodeTopology *topologyv1alpha2.NodeResourceTopology) TopologyManagerConfig { conf := makeTopologyManagerConfigDefaults() // Backward compatibility (v1alpha2 and previous). Deprecated, will be removed when the NRT API moves to v1beta1. - updateTopologyManagerConfigFromTopologyPolicies(&conf, nodeTopology.Name, nodeTopology.TopologyPolicies) + updateTopologyManagerConfigFromTopologyPolicies(lh, &conf, nodeTopology.Name, nodeTopology.TopologyPolicies) // preferred new configuration source (v1alpha2 and onwards) updateTopologyManagerConfigFromAttributes(&conf, nodeTopology.Attributes) return conf @@ -80,18 +80,17 @@ func updateTopologyManagerConfigFromAttributes(conf *TopologyManagerConfig, attr } } -func updateTopologyManagerConfigFromTopologyPolicies(conf *TopologyManagerConfig, nodeName string, topologyPolicies []string) { +func updateTopologyManagerConfigFromTopologyPolicies(lh logr.Logger, conf *TopologyManagerConfig, nodeName string, topologyPolicies []string) { if len(topologyPolicies) == 0 { - klog.V(3).InfoS("Cannot determine policy", "node", nodeName) return } if len(topologyPolicies) > 1 { - klog.V(4).InfoS("Ignoring extra policies", "node", nodeName, "policies count", len(topologyPolicies)-1) + lh.V(4).Info("ignoring extra policies", "node", nodeName, "policies count", len(topologyPolicies)-1) } policyName := topologyv1alpha2.TopologyManagerPolicy(topologyPolicies[0]) - klog.Warning("The `topologyPolicies` field is deprecated and will be removed with the NRT API v1beta1.") - klog.Warning("The `topologyPolicies` field is deprecated, please use top-level Attributes field instead.") + lh.Info("the `topologyPolicies` field is deprecated and will be removed with the NRT API v1beta1.") + lh.Info("the `topologyPolicies` field is deprecated, please use top-level Attributes field instead.") switch policyName { case topologyv1alpha2.SingleNUMANodePodLevel: diff --git a/pkg/noderesourcetopology/config_test.go b/pkg/noderesourcetopology/config_test.go index a84ae7027..954086698 100644 --- a/pkg/noderesourcetopology/config_test.go +++ b/pkg/noderesourcetopology/config_test.go @@ -20,6 +20,7 @@ import ( "reflect" "testing" + "k8s.io/klog/v2" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" @@ -299,7 +300,7 @@ func TestConfigFromPolicies(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { got := TopologyManagerConfig{} - updateTopologyManagerConfigFromTopologyPolicies(&got, "", tt.policies) + updateTopologyManagerConfigFromTopologyPolicies(klog.Background(), &got, "", tt.policies) if !reflect.DeepEqual(got, tt.expected) { t.Errorf("conf got=%+#v expected=%+#v", got, tt.expected) } @@ -402,7 +403,7 @@ func TestConfigFromNRT(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := topologyManagerConfigFromNodeResourceTopology(&tt.nrt) + got := topologyManagerConfigFromNodeResourceTopology(klog.Background(), &tt.nrt) if !reflect.DeepEqual(got, tt.expected) { t.Errorf("conf got=%+#v expected=%+#v", got, tt.expected) } diff --git a/pkg/noderesourcetopology/filter.go b/pkg/noderesourcetopology/filter.go index 61cde522e..1052608d4 100644 --- a/pkg/noderesourcetopology/filter.go +++ b/pkg/noderesourcetopology/filter.go @@ -18,18 +18,18 @@ package noderesourcetopology import ( "context" - "fmt" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/klog/v2" v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" bm "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests" "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" "sigs.k8s.io/scheduler-plugins/pkg/util" @@ -41,66 +41,63 @@ const highestNUMAID = 8 type PolicyHandler func(pod *v1.Pod, zoneMap topologyv1alpha2.ZoneList) *framework.Status -func singleNUMAContainerLevelHandler(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { - klog.V(5).InfoS("Single NUMA node handler") +func singleNUMAContainerLevelHandler(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { + lh.V(5).Info("container level single NUMA node handler") // prepare NUMANodes list from zoneMap - nodes := createNUMANodeList(zones) + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) // Node() != nil already verified in Filter(), which is the only public entry point - logNumaNodes("container handler NUMA resources", nodeInfo.Node().Name, nodes) + logNumaNodes(lh, "container handler NUMA resources", nodeInfo.Node().Name, nodes) // the init containers are running SERIALLY and BEFORE the normal containers. // https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#understanding-init-containers // therefore, we don't need to accumulate their resources together for _, initContainer := range pod.Spec.InitContainers { - logID := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, initContainer.Name) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, initContainer.Resources.Requests)...) + lh.V(6).Info("init container desired resources", stringify.ResourceListToLoggable(initContainer.Resources.Requests)...) - _, match := resourcesAvailableInAnyNUMANodes(logID, nodes, initContainer.Resources.Requests, qos, nodeInfo) + _, match := resourcesAvailableInAnyNUMANodes(lh, nodes, initContainer.Resources.Requests, qos, nodeInfo) if !match { // we can't align init container, so definitely we can't align a pod - klog.V(2).InfoS("cannot align container", "name", initContainer.Name, "kind", "init") + lh.V(2).Info("cannot align container", "name", initContainer.Name, "kind", "init") return framework.NewStatus(framework.Unschedulable, "cannot align init container") } } for _, container := range pod.Spec.Containers { - logID := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, container.Resources.Requests)...) + // TODO: add containerName + lh.V(6).Info("app container resources", stringify.ResourceListToLoggable(container.Resources.Requests)...) - numaID, match := resourcesAvailableInAnyNUMANodes(logID, nodes, container.Resources.Requests, qos, nodeInfo) + numaID, match := resourcesAvailableInAnyNUMANodes(lh, nodes, container.Resources.Requests, qos, nodeInfo) if !match { // we can't align container, so definitely we can't align a pod - klog.V(2).InfoS("cannot align container", "name", container.Name, "kind", "app") + lh.V(2).Info("cannot align container", "name", container.Name, "kind", "app") return framework.NewStatus(framework.Unschedulable, "cannot align container") } // subtract the resources requested by the container from the given NUMA. // this is necessary, so we won't allocate the same resources for the upcoming containers - subtractFromNUMA(nodes, numaID, container) + subtractFromNUMA(lh, nodes, numaID, container) } return nil } // resourcesAvailableInAnyNUMANodes checks for sufficient resource and return the NUMAID that would be selected by Kubelet. // this function requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63 -func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, resources v1.ResourceList, qos v1.PodQOSClass, nodeInfo *framework.NodeInfo) (int, bool) { +func resourcesAvailableInAnyNUMANodes(lh logr.Logger, numaNodes NUMANodeList, resources v1.ResourceList, qos v1.PodQOSClass, nodeInfo *framework.NodeInfo) (int, bool) { numaID := highestNUMAID bitmask := bm.NewEmptyBitMask() // set all bits, each bit is a NUMA node, if resources couldn't be aligned // on the NUMA node, bit should be unset bitmask.Fill() - // Node() != nil already verified in Filter(), which is the only public entry point - nodeName := nodeInfo.Node().Name nodeResources := util.ResourceList(nodeInfo.Allocatable) for resource, quantity := range resources { if quantity.IsZero() { // why bother? everything's fine from the perspective of this resource - klog.V(4).InfoS("ignoring zero-qty resource request", "logID", logID, "node", nodeName, "resource", resource) + lh.V(4).Info("ignoring zero-qty resource request", "resource", resource) continue } @@ -108,7 +105,7 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso // some resources may not expose NUMA affinity (device plugins, extended resources), but all resources // must be reported at node level; thus, if they are not present at node level, we can safely assume // we don't have the resource at all. - klog.V(5).InfoS("early verdict: cannot meet request", "logID", logID, "node", nodeName, "resource", resource, "suitable", "false") + lh.V(5).Info("early verdict: cannot meet request", "resource", resource, "suitable", "false") return numaID, false } @@ -128,19 +125,19 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso } resourceBitmask.Add(numaNode.NUMAID) - klog.V(6).InfoS("feasible", "logID", logID, "node", nodeName, "NUMA", numaNode.NUMAID, "resource", resource) + lh.V(6).Info("feasible", "numaCell", numaNode.NUMAID, "resource", resource) } // non-native resources or ephemeral-storage may not expose NUMA affinity, // but since they are available at node level, this is fine if !hasNUMAAffinity && (!v1helper.IsNativeResource(resource) || resource == v1.ResourceEphemeralStorage) { - klog.V(6).InfoS("resource available at node level (no NUMA affinity)", "logID", logID, "node", nodeName, "resource", resource) + lh.V(6).Info("resource available at node level (no NUMA affinity)", "resource", resource) continue } bitmask.And(resourceBitmask) if bitmask.IsEmpty() { - klog.V(5).InfoS("early verdict", "logID", logID, "node", nodeName, "resource", resource, "suitable", "false") + lh.V(5).Info("early verdict", "resource", resource, "suitable", "false") return numaID, false } } @@ -152,7 +149,7 @@ func resourcesAvailableInAnyNUMANodes(logID string, numaNodes NUMANodeList, reso // at least one NUMA node is available ret := !bitmask.IsEmpty() - klog.V(5).InfoS("final verdict", "logID", logID, "node", nodeName, "suitable", ret) + lh.V(5).Info("final verdict", "suitable", ret) return numaID, ret } @@ -175,20 +172,19 @@ func isResourceSetSuitable(qos v1.PodQOSClass, resource v1.ResourceName, quantit return numaQuantity.Cmp(quantity) >= 0 } -func singleNUMAPodLevelHandler(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { - klog.V(5).InfoS("Pod Level Resource handler") +func singleNUMAPodLevelHandler(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status { + lh.V(5).Info("pod level single NUMA node handler") resources := util.GetPodEffectiveRequest(pod) - logID := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) - nodes := createNUMANodeList(zones) + nodes := createNUMANodeList(lh, zones) // Node() != nil already verified in Filter(), which is the only public entry point - logNumaNodes("pod handler NUMA resources", nodeInfo.Node().Name, nodes) - klog.V(6).InfoS("target resources", stringify.ResourceListToLoggable(logID, resources)...) + logNumaNodes(lh, "pod handler NUMA resources", nodeInfo.Node().Name, nodes) + lh.V(6).Info("pod desired resources", stringify.ResourceListToLoggable(resources)...) - if _, match := resourcesAvailableInAnyNUMANodes(logID, createNUMANodeList(zones), resources, v1qos.GetPodQOS(pod), nodeInfo); !match { - klog.V(2).InfoS("cannot align pod", "name", pod.Name) + if _, match := resourcesAvailableInAnyNUMANodes(lh, createNUMANodeList(lh, zones), resources, v1qos.GetPodQOS(pod), nodeInfo); !match { + lh.V(2).Info("cannot align pod", "name", pod.Name) return framework.NewStatus(framework.Unschedulable, "cannot align pod") } return nil @@ -204,22 +200,27 @@ func (tm *TopologyMatch) Filter(ctx context.Context, cycleState *framework.Cycle } nodeName := nodeInfo.Node().Name + + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowFilter) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + nodeTopology, ok := tm.nrtCache.GetCachedNRTCopy(ctx, nodeName, pod) if !ok { - klog.V(2).InfoS("invalid topology data", "node", nodeName) + lh.V(2).Info("invalid topology data") return framework.NewStatus(framework.Unschedulable, "invalid node topology data") } if nodeTopology == nil { return nil } - klog.V(5).InfoS("Found NodeResourceTopology", "nodeTopology", klog.KObj(nodeTopology)) + lh.V(5).Info("found nrt data", "object", stringify.NodeResourceTopologyResources(nodeTopology)) - handler := filterHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(nodeTopology)) + handler := filterHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(lh, nodeTopology)) if handler == nil { return nil } - status := handler(pod, nodeTopology.Zones, nodeInfo) + status := handler(lh, pod, nodeTopology.Zones, nodeInfo) if status != nil { tm.nrtCache.NodeMaybeOverReserved(nodeName, pod) } @@ -227,7 +228,7 @@ func (tm *TopologyMatch) Filter(ctx context.Context, cycleState *framework.Cycle } // subtractFromNUMA finds the correct NUMA ID's resources and subtract them from `nodes`. -func subtractFromNUMA(nodes NUMANodeList, numaID int, container v1.Container) { +func subtractFromNUMA(lh logr.Logger, nodes NUMANodeList, numaID int, container v1.Container) { for i := 0; i < len(nodes); i++ { if nodes[i].NUMAID != numaID { continue @@ -241,7 +242,7 @@ func subtractFromNUMA(nodes NUMANodeList, numaID int, container v1.Container) { // when resourcesAvailableInAnyNUMANodes function is passed // but let's log here if such unlikely case will occur if nodeResQuan.Sign() == -1 { - klog.V(4).InfoS("resource quantity should not be a negative value", "resource", resName, "quantity", nodeResQuan.String()) + lh.V(4).Info("resource quantity should not be a negative value", "resource", resName, "quantity", nodeResQuan.String()) } nRes[resName] = nodeResQuan } diff --git a/pkg/noderesourcetopology/filter_test.go b/pkg/noderesourcetopology/filter_test.go index db20bf82c..4a19c6c6a 100644 --- a/pkg/noderesourcetopology/filter_test.go +++ b/pkg/noderesourcetopology/filter_test.go @@ -27,6 +27,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" nrtcache "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/cache" @@ -683,7 +684,7 @@ func TestNodeResourceTopology(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } for _, tt := range tests { @@ -902,7 +903,7 @@ func TestNodeResourceTopologyMultiContainerPodScope(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } nodeInfo := framework.NewNodeInfo() @@ -1164,7 +1165,7 @@ func TestNodeResourceTopologyMultiContainerContainerScope(t *testing.T) { } tm := TopologyMatch{ - nrtCache: nrtcache.NewPassthrough(fakeClient), + nrtCache: nrtcache.NewPassthrough(klog.Background(), fakeClient), } nodeInfo := framework.NewNodeInfo() diff --git a/pkg/noderesourcetopology/least_numa.go b/pkg/noderesourcetopology/least_numa.go index 9cb269b7f..cf14195ae 100644 --- a/pkg/noderesourcetopology/least_numa.go +++ b/pkg/noderesourcetopology/least_numa.go @@ -17,14 +17,12 @@ limitations under the License. package noderesourcetopology import ( - "fmt" - v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "gonum.org/v1/gonum/stat/combin" @@ -36,8 +34,8 @@ const ( maxDistanceValue = 255 ) -func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - nodes := createNUMANodeList(zones) +func leastNUMAContainerScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) maxNUMANodesCount := 0 @@ -49,12 +47,11 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) if onlyNonNUMAResources(nodes, container.Resources.Requests) { continue } - identifier := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, container.Resources.Requests) + numaNodes, isMinAvgDistance := numaNodesRequired(lh, qos, nodes, container.Resources.Requests) // container's resources can't fit onto node, return MinNodeScore for whole pod if numaNodes == nil { // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes - klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier) + lh.Info("cannot calculate how many NUMA nodes are required", "container", container.Name) return framework.MinNodeScore, nil } @@ -78,23 +75,21 @@ func leastNUMAContainerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) return normalizeScore(maxNUMANodesCount, allContainersMinAvgDistance), nil } -func leastNUMAPodScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - nodes := createNUMANodeList(zones) +func leastNUMAPodScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + nodes := createNUMANodeList(lh, zones) qos := v1qos.GetPodQOS(pod) - identifier := fmt.Sprintf("%s/%s", pod.Namespace, pod.Name) - resources := util.GetPodEffectiveRequest(pod) // if a pod requests only non NUMA resources return max score if onlyNonNUMAResources(nodes, resources) { return framework.MaxNodeScore, nil } - numaNodes, isMinAvgDistance := numaNodesRequired(identifier, qos, nodes, resources) + numaNodes, isMinAvgDistance := numaNodesRequired(lh, qos, nodes, resources) // pod's resources can't fit onto node, return MinNodeScore if numaNodes == nil { // score plugin should be running after resource filter plugin so we should always find sufficient amount of NUMA nodes - klog.Warningf("cannot calculate how many NUMA nodes are required for: %s", identifier) + lh.Info("cannot calculate how many NUMA nodes are required") return framework.MinNodeScore, nil } @@ -112,12 +107,12 @@ func normalizeScore(numaNodesCount int, isMinAvgDistance bool) int64 { return score } -func minAvgDistanceInCombinations(numaNodes NUMANodeList, numaNodesCombination [][]int) float32 { +func minAvgDistanceInCombinations(lh logr.Logger, numaNodes NUMANodeList, numaNodesCombination [][]int) float32 { // max distance for NUMA node var minDistance float32 = maxDistanceValue for _, combination := range numaNodesCombination { - avgDistance := nodesAvgDistance(numaNodes, combination...) + avgDistance := nodesAvgDistance(lh, numaNodes, combination...) if avgDistance < minDistance { minDistance = avgDistance } @@ -126,7 +121,7 @@ func minAvgDistanceInCombinations(numaNodes NUMANodeList, numaNodesCombination [ return minDistance } -func nodesAvgDistance(numaNodes NUMANodeList, nodes ...int) float32 { +func nodesAvgDistance(lh logr.Logger, numaNodes NUMANodeList, nodes ...int) float32 { if len(nodes) == 0 { return maxDistanceValue } @@ -140,7 +135,7 @@ func nodesAvgDistance(numaNodes NUMANodeList, nodes ...int) float32 { cost, ok := numaNodes[node1].Costs[numaNodes[node2].NUMAID] // we couldn't read Costs assign maxDistanceValue if !ok { - klog.Warningf("cannot retrieve Costs information for node ID %d", numaNodes[node1].NUMAID) + lh.Info("cannot retrieve Costs information", "nodeID", numaNodes[node1].NUMAID) cost = maxDistanceValue } accu += cost @@ -169,10 +164,10 @@ func combineResources(numaNodes NUMANodeList, combination []int) v1.ResourceList // numaNodesRequired returns bitmask with minimal NUMA nodes required to run given resources // or nil when resources can't be fitted onto the worker node // second value returned is a boolean indicating if bitmask is optimal from distance perspective -func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) (bitmask.BitMask, bool) { +func numaNodesRequired(lh logr.Logger, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList) (bitmask.BitMask, bool) { for bitmaskLen := 1; bitmaskLen <= len(numaNodes); bitmaskLen++ { numaNodesCombination := combin.Combinations(len(numaNodes), bitmaskLen) - suitableCombination, isMinDistance := findSuitableCombination(identifier, qos, numaNodes, resources, numaNodesCombination) + suitableCombination, isMinDistance := findSuitableCombination(lh, qos, numaNodes, resources, numaNodesCombination) // we have found suitable combination for given bitmaskLen if suitableCombination != nil { bm := bitmask.NewEmptyBitMask() @@ -189,8 +184,8 @@ func numaNodesRequired(identifier string, qos v1.PodQOSClass, numaNodes NUMANode // findSuitableCombination returns combination from numaNodesCombination that can fit resources, otherwise return nil // second value returned is a boolean indicating if returned combination is optimal from distance perspective // this function will always return combination that provides minimal average distance between nodes in combination -func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList, numaNodesCombination [][]int) ([]int, bool) { - minAvgDistance := minAvgDistanceInCombinations(numaNodes, numaNodesCombination) +func findSuitableCombination(lh logr.Logger, qos v1.PodQOSClass, numaNodes NUMANodeList, resources v1.ResourceList, numaNodesCombination [][]int) ([]int, bool) { + minAvgDistance := minAvgDistanceInCombinations(lh, numaNodes, numaNodesCombination) var ( minDistanceCombination []int // init as max distance @@ -201,10 +196,10 @@ func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NU continue } combinationResources := combineResources(numaNodes, combination) - resourcesFit := checkResourcesFit(identifier, qos, resources, combinationResources) + resourcesFit := checkResourcesFit(lh, qos, resources, combinationResources) if resourcesFit { - distance := nodesAvgDistance(numaNodes, combination...) + distance := nodesAvgDistance(lh, numaNodes, combination...) if distance == minAvgDistance { // return early if we can fit resources into combination and provide minDistance return combination, true @@ -220,10 +215,10 @@ func findSuitableCombination(identifier string, qos v1.PodQOSClass, numaNodes NU return minDistanceCombination, false } -func checkResourcesFit(identifier string, qos v1.PodQOSClass, resources v1.ResourceList, combinationResources v1.ResourceList) bool { +func checkResourcesFit(lh logr.Logger, qos v1.PodQOSClass, resources v1.ResourceList, combinationResources v1.ResourceList) bool { for resource, quantity := range resources { if quantity.IsZero() { - klog.V(4).InfoS("ignoring zero-qty resource request", "identifier", identifier, "resource", resource) + lh.V(4).Info("ignoring zero-qty resource request", "resource", resource) continue } if combinationQuantity := combinationResources[resource]; !isResourceSetSuitable(qos, resource, quantity, combinationQuantity) { diff --git a/pkg/noderesourcetopology/least_numa_test.go b/pkg/noderesourcetopology/least_numa_test.go index 86876b801..f847b397a 100644 --- a/pkg/noderesourcetopology/least_numa_test.go +++ b/pkg/noderesourcetopology/least_numa_test.go @@ -23,6 +23,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask" ) @@ -683,7 +684,7 @@ func TestNUMANodesRequired(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - bm, isMinDistance := numaNodesRequired("test", v1.PodQOSGuaranteed, tc.numaNodes, tc.podResources) + bm, isMinDistance := numaNodesRequired(klog.Background(), v1.PodQOSGuaranteed, tc.numaNodes, tc.podResources) if bm != nil && !bm.IsEqual(tc.expectedBitmask) { t.Errorf("wrong bitmask expected: %d got: %d", tc.expectedBitmask, bm) @@ -901,7 +902,7 @@ func TestMinDistance(t *testing.T) { } for _, tc := range tcases { t.Run(tc.description, func(t *testing.T) { - distance := minAvgDistanceInCombinations(tc.numaNodes, tc.combinations) + distance := minAvgDistanceInCombinations(klog.Background(), tc.numaNodes, tc.combinations) if distance != tc.expected { t.Errorf("Expected distance to be %f not %f", tc.expected, distance) } diff --git a/pkg/noderesourcetopology/logging.go b/pkg/noderesourcetopology/logging.go deleted file mode 100644 index d5842dc68..000000000 --- a/pkg/noderesourcetopology/logging.go +++ /dev/null @@ -1,49 +0,0 @@ -/* -Copyright 2021 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package noderesourcetopology - -import ( - "encoding/json" - "fmt" - - "k8s.io/klog/v2" - - topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" - - "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" -) - -func logNumaNodes(desc, nodeName string, nodes NUMANodeList) { - for _, numaNode := range nodes { - numaLogKey := fmt.Sprintf("%s/node-%d", nodeName, numaNode.NUMAID) - klog.V(6).InfoS(desc, stringify.ResourceListToLoggable(numaLogKey, numaNode.Resources)...) - } -} - -func logNRT(desc string, nrtObj *topologyv1alpha2.NodeResourceTopology) { - if !klog.V(6).Enabled() { - // avoid the expensive marshal operation - return - } - - ntrJson, err := json.MarshalIndent(nrtObj, "", " ") - if err != nil { - klog.V(6).ErrorS(err, "failed to marshal noderesourcetopology object") - return - } - klog.V(6).Info(desc, "noderesourcetopology", string(ntrJson)) -} diff --git a/pkg/noderesourcetopology/logging/logging.go b/pkg/noderesourcetopology/logging/logging.go new file mode 100644 index 000000000..14b151339 --- /dev/null +++ b/pkg/noderesourcetopology/logging/logging.go @@ -0,0 +1,77 @@ +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package logging + +import ( + "fmt" + "time" + + "github.com/go-logr/logr" + + corev1 "k8s.io/api/core/v1" +) + +// before to replace with FromContext(), at least in filter and score, +// we would need a way to inject a logger instance (preferably a +// per-plugin logger instance) when we create the Scheduler +// (with app.NewSchedulerCommand) + +// well-known structured log keys +const ( + KeyLogID string = "logID" + KeyPodUID string = "podUID" + KeyNode string = "node" + KeyFlow string = "flow" +) + +const ( + FlowBegin string = "begin" + FlowEnd string = "end" +) + +const ( + FlowCacheSync string = "cachesync" + FlowFilter string = "filter" + FlowPostBind string = "postbind" + FlowReserve string = "reserve" + FlowUnreserve string = "unreserve" + FlowScore string = "score" +) + +var logh logr.Logger + +func SetLogger(lh logr.Logger) { + logh = lh +} + +func Log() logr.Logger { + return logh +} + +func PodLogID(pod *corev1.Pod) string { + if pod == nil { + return "" + } + if pod.Namespace == "" { + return pod.Name + } + return pod.Namespace + "/" + pod.Name +} + +func TimeLogID() string { + return fmt.Sprintf("uts/%v", time.Now().UnixMilli()) +} diff --git a/pkg/noderesourcetopology/plugin.go b/pkg/noderesourcetopology/plugin.go index 153a4fccd..7ad6619d0 100644 --- a/pkg/noderesourcetopology/plugin.go +++ b/pkg/noderesourcetopology/plugin.go @@ -31,7 +31,9 @@ import ( apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" "sigs.k8s.io/scheduler-plugins/apis/config/validation" nrtcache "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/cache" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" + "github.com/go-logr/logr" topologyapi "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" ) @@ -94,8 +96,8 @@ func subtractFromNUMAs(resources v1.ResourceList, numaNodes NUMANodeList, nodes } } -type filterFn func(pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status -type scoringFn func(*v1.Pod, topologyv1alpha2.ZoneList) (int64, *framework.Status) +type filterFn func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status +type scoringFn func(logr.Logger, *v1.Pod, topologyv1alpha2.ZoneList) (int64, *framework.Status) // TopologyMatch plugin which run simplified version of TopologyManager's admit handler type TopologyMatch struct { @@ -118,7 +120,11 @@ func (tm *TopologyMatch) Name() string { // New initializes a new plugin and returns it. func New(_ context.Context, args runtime.Object, handle framework.Handle) (framework.Plugin, error) { - klog.V(5).InfoS("Creating new TopologyMatch plugin") + // we do this later to make sure klog is initialized. We don't need this anyway before this point + lh := klog.Background() + logging.SetLogger(lh) + + lh.V(5).Info("creating new noderesourcetopology plugin") tcfg, ok := args.(*apiconfig.NodeResourceTopologyMatchArgs) if !ok { return nil, fmt.Errorf("want args to be of type NodeResourceTopologyMatchArgs, got %T", args) @@ -128,9 +134,9 @@ func New(_ context.Context, args runtime.Object, handle framework.Handle) (frame return nil, err } - nrtCache, err := initNodeTopologyInformer(tcfg, handle) + nrtCache, err := initNodeTopologyInformer(lh, tcfg, handle) if err != nil { - klog.ErrorS(err, "Cannot create clientset for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) + lh.Error(err, "cannot create clientset for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) return nil, err } diff --git a/pkg/noderesourcetopology/pluginhelpers.go b/pkg/noderesourcetopology/pluginhelpers.go index 832c95433..54b95e390 100644 --- a/pkg/noderesourcetopology/pluginhelpers.go +++ b/pkg/noderesourcetopology/pluginhelpers.go @@ -22,9 +22,9 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/util/wait" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/numanode" @@ -40,64 +40,64 @@ const ( maxNUMAId = 64 ) -func initNodeTopologyInformer(tcfg *apiconfig.NodeResourceTopologyMatchArgs, handle framework.Handle) (nrtcache.Interface, error) { +func initNodeTopologyInformer(lh logr.Logger, tcfg *apiconfig.NodeResourceTopologyMatchArgs, handle framework.Handle) (nrtcache.Interface, error) { client, err := ctrlclient.New(handle.KubeConfig(), ctrlclient.Options{Scheme: scheme}) if err != nil { - klog.ErrorS(err, "Cannot create client for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) + lh.Error(err, "cannot create client for NodeTopologyResource", "kubeConfig", handle.KubeConfig()) return nil, err } if tcfg.DiscardReservedNodes { - return nrtcache.NewDiscardReserved(client), nil + return nrtcache.NewDiscardReserved(lh.WithName("nrtcache"), client), nil } if tcfg.CacheResyncPeriodSeconds <= 0 { - return nrtcache.NewPassthrough(client), nil + return nrtcache.NewPassthrough(lh.WithName("nrtcache"), client), nil } - podSharedInformer, podLister, isPodRelevant := podprovider.NewFromHandle(handle, tcfg.Cache) + podSharedInformer, podLister, isPodRelevant := podprovider.NewFromHandle(lh, handle, tcfg.Cache) - nrtCache, err := nrtcache.NewOverReserve(tcfg.Cache, client, podLister, isPodRelevant) + nrtCache, err := nrtcache.NewOverReserve(lh.WithName("nrtcache"), tcfg.Cache, client, podLister, isPodRelevant) if err != nil { return nil, err } - initNodeTopologyForeignPodsDetection(tcfg.Cache, handle, podSharedInformer, nrtCache) + initNodeTopologyForeignPodsDetection(lh, tcfg.Cache, handle, podSharedInformer, nrtCache) resyncPeriod := time.Duration(tcfg.CacheResyncPeriodSeconds) * time.Second go wait.Forever(nrtCache.Resync, resyncPeriod) - klog.V(3).InfoS("enable NodeTopology cache (needs the Reserve plugin)", "resyncPeriod", resyncPeriod) + lh.V(3).Info("enable NodeTopology cache (needs the Reserve plugin)", "resyncPeriod", resyncPeriod) return nrtCache, nil } -func initNodeTopologyForeignPodsDetection(cfg *apiconfig.NodeResourceTopologyCache, handle framework.Handle, podSharedInformer k8scache.SharedInformer, nrtCache *nrtcache.OverReserve) { - foreignPodsDetect := getForeignPodsDetectMode(cfg) +func initNodeTopologyForeignPodsDetection(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache, handle framework.Handle, podSharedInformer k8scache.SharedInformer, nrtCache *nrtcache.OverReserve) { + foreignPodsDetect := getForeignPodsDetectMode(lh, cfg) if foreignPodsDetect == apiconfig.ForeignPodsDetectNone { - klog.InfoS("foreign pods detection disabled by configuration") + lh.Info("foreign pods detection disabled by configuration") return } fwk, ok := handle.(framework.Framework) if !ok { - klog.Warningf("cannot determine the scheduler profile names - no foreign pod detection enabled") + lh.Info("cannot determine the scheduler profile names - no foreign pod detection enabled") return } profileName := fwk.ProfileName() - klog.InfoS("setting up foreign pods detection", "name", profileName, "mode", foreignPodsDetect) + lh.Info("setting up foreign pods detection", "name", profileName, "mode", foreignPodsDetect) if foreignPodsDetect == apiconfig.ForeignPodsDetectOnlyExclusiveResources { nrtcache.TrackOnlyForeignPodsWithExclusiveResources() } else { nrtcache.TrackAllForeignPods() } - nrtcache.RegisterSchedulerProfileName(profileName) - nrtcache.SetupForeignPodsDetector(profileName, podSharedInformer, nrtCache) + nrtcache.RegisterSchedulerProfileName(lh.WithName("foreignpods"), profileName) + nrtcache.SetupForeignPodsDetector(lh.WithName("foreignpods"), profileName, podSharedInformer, nrtCache) } -func createNUMANodeList(zones topologyv1alpha2.ZoneList) NUMANodeList { +func createNUMANodeList(lh logr.Logger, zones topologyv1alpha2.ZoneList) NUMANodeList { numaIDToZoneIDx := make([]int, maxNUMAId) nodes := NUMANodeList{} // filter non Node zones and create idToIdx lookup array @@ -108,14 +108,15 @@ func createNUMANodeList(zones topologyv1alpha2.ZoneList) NUMANodeList { numaID, err := numanode.NameToID(zone.Name) if err != nil || numaID > maxNUMAId { - klog.Error(err) + lh.Error(err, "error getting the numaID", "zone", zone.Name, "numaID", numaID) continue } numaIDToZoneIDx[numaID] = i resources := extractResources(zone) - klog.V(6).InfoS("extracted NUMA resources", stringify.ResourceListToLoggable(zone.Name, resources)...) + numaItems := []interface{}{"numaCell", numaID} + lh.V(6).Info("extracted NUMA resources", stringify.ResourceListToLoggableWithValues(numaItems, resources)...) nodes = append(nodes, NUMANode{NUMAID: numaID, Resources: resources}) } @@ -166,13 +167,20 @@ func onlyNonNUMAResources(numaNodes NUMANodeList, resources corev1.ResourceList) return true } -func getForeignPodsDetectMode(cfg *apiconfig.NodeResourceTopologyCache) apiconfig.ForeignPodsDetectMode { +func getForeignPodsDetectMode(lh logr.Logger, cfg *apiconfig.NodeResourceTopologyCache) apiconfig.ForeignPodsDetectMode { var foreignPodsDetect apiconfig.ForeignPodsDetectMode if cfg != nil && cfg.ForeignPodsDetect != nil { foreignPodsDetect = *cfg.ForeignPodsDetect } else { // explicitly set to nil? foreignPodsDetect = apiconfig.ForeignPodsDetectAll - klog.InfoS("foreign pods detection value missing", "fallback", foreignPodsDetect) + lh.Info("foreign pods detection value missing", "fallback", foreignPodsDetect) } return foreignPodsDetect } + +func logNumaNodes(lh logr.Logger, desc, nodeName string, nodes NUMANodeList) { + for _, numaNode := range nodes { + numaItems := []interface{}{"numaCell", numaNode.NUMAID} + lh.V(6).Info(desc, stringify.ResourceListToLoggableWithValues(numaItems, numaNode.Resources)...) + } +} diff --git a/pkg/noderesourcetopology/pluginhelpers_test.go b/pkg/noderesourcetopology/pluginhelpers_test.go index 8138fdebd..0ae260b35 100644 --- a/pkg/noderesourcetopology/pluginhelpers_test.go +++ b/pkg/noderesourcetopology/pluginhelpers_test.go @@ -21,6 +21,7 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/klog/v2" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" ) @@ -148,7 +149,7 @@ func TestGetForeignPodsDetectMode(t *testing.T) { for _, testCase := range testCases { t.Run(testCase.description, func(t *testing.T) { - got := getForeignPodsDetectMode(testCase.cfg) + got := getForeignPodsDetectMode(klog.Background(), testCase.cfg) if got != testCase.expected { t.Errorf("foreign pods detect mode got %v expected %v", got, testCase.expected) } diff --git a/pkg/noderesourcetopology/podprovider/podprovider.go b/pkg/noderesourcetopology/podprovider/podprovider.go index 7aa6ec208..2fa2f7bb4 100644 --- a/pkg/noderesourcetopology/podprovider/podprovider.go +++ b/pkg/noderesourcetopology/podprovider/podprovider.go @@ -19,21 +19,21 @@ package podprovider import ( "context" + "github.com/go-logr/logr" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" coreinformers "k8s.io/client-go/informers/core/v1" podlisterv1 "k8s.io/client-go/listers/core/v1" "k8s.io/client-go/tools/cache" k8scache "k8s.io/client-go/tools/cache" - "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" ) -type PodFilterFunc func(pod *corev1.Pod, logID string) bool +type PodFilterFunc func(lh logr.Logger, pod *corev1.Pod) bool -func NewFromHandle(handle framework.Handle, cacheConf *apiconfig.NodeResourceTopologyCache) (k8scache.SharedIndexInformer, podlisterv1.PodLister, PodFilterFunc) { +func NewFromHandle(lh logr.Logger, handle framework.Handle, cacheConf *apiconfig.NodeResourceTopologyCache) (k8scache.SharedIndexInformer, podlisterv1.PodLister, PodFilterFunc) { dedicated := wantsDedicatedInformer(cacheConf) if !dedicated { podHandle := handle.SharedInformerFactory().Core().V1().Pods() // shortcut @@ -43,38 +43,38 @@ func NewFromHandle(handle framework.Handle, cacheConf *apiconfig.NodeResourceTop podInformer := coreinformers.NewFilteredPodInformer(handle.ClientSet(), metav1.NamespaceAll, 0, cache.Indexers{}, nil) podLister := podlisterv1.NewPodLister(podInformer.GetIndexer()) - klog.V(5).InfoS("Start custom pod informer") + lh.V(5).Info("start custom pod informer") ctx := context.Background() go podInformer.Run(ctx.Done()) - klog.V(5).InfoS("Syncing custom pod informer") + lh.V(5).Info("syncing custom pod informer") cache.WaitForCacheSync(ctx.Done(), podInformer.HasSynced) - klog.V(5).InfoS("Synced custom pod informer") + lh.V(5).Info("synced custom pod informer") return podInformer, podLister, IsPodRelevantDedicated } // IsPodRelevantAlways is meant to be used in test only -func IsPodRelevantAlways(pod *corev1.Pod, logID string) bool { +func IsPodRelevantAlways(lh logr.Logger, pod *corev1.Pod) bool { return true } -func IsPodRelevantShared(pod *corev1.Pod, logID string) bool { +func IsPodRelevantShared(lh logr.Logger, pod *corev1.Pod) bool { // we are interested only about nodes which consume resources return pod.Status.Phase == corev1.PodRunning } -func IsPodRelevantDedicated(pod *corev1.Pod, logID string) bool { +func IsPodRelevantDedicated(lh logr.Logger, pod *corev1.Pod) bool { // Every other phase we're interested into (see https://github.com/kubernetes-sigs/scheduler-plugins/pull/599). // Note PodUnknown is deprecated and reportedly no longer set since 2015 (!!) if pod.Status.Phase == corev1.PodPending { // this is unexpected, so we're loud about it - klog.V(2).InfoS("nrtcache: Listed pod in Pending phase, ignored", "logID", logID, "podUID", pod.UID) + lh.V(2).Info("listed pod in Pending phase, ignored", "podUID", pod.GetUID()) return false } if pod.Spec.NodeName == "" { // this is very unexpected, so we're louder about it - klog.InfoS("nrtcache: Listed pod unbound, ignored", "logID", logID, "podUID", pod.UID) + lh.Info("listed pod unbound, ignored", "podUID", pod.GetUID()) return false } return true diff --git a/pkg/noderesourcetopology/postbind.go b/pkg/noderesourcetopology/postbind.go index 45ea1ee68..a0218e25d 100644 --- a/pkg/noderesourcetopology/postbind.go +++ b/pkg/noderesourcetopology/postbind.go @@ -21,8 +21,13 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/scheduler/framework" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) func (tm *TopologyMatch) PostBind(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowPostBind) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.PostBind(nodeName, pod) } diff --git a/pkg/noderesourcetopology/reserve.go b/pkg/noderesourcetopology/reserve.go index 5c79c12e6..b7c5d2397 100644 --- a/pkg/noderesourcetopology/reserve.go +++ b/pkg/noderesourcetopology/reserve.go @@ -21,14 +21,23 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/kubernetes/pkg/scheduler/framework" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" ) func (tm *TopologyMatch) Reserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) *framework.Status { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowReserve) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.ReserveNodeResources(nodeName, pod) // can't fail return framework.NewStatus(framework.Success, "") } func (tm *TopologyMatch) Unreserve(ctx context.Context, state *framework.CycleState, pod *corev1.Pod, nodeName string) { + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowUnreserve) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + tm.nrtCache.UnreserveNodeResources(nodeName, pod) } diff --git a/pkg/noderesourcetopology/score.go b/pkg/noderesourcetopology/score.go index 1e88deafd..e6b611194 100644 --- a/pkg/noderesourcetopology/score.go +++ b/pkg/noderesourcetopology/score.go @@ -23,13 +23,15 @@ import ( "gonum.org/v1/gonum/stat" v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos" kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" apiconfig "sigs.k8s.io/scheduler-plugins/apis/config" + "github.com/go-logr/logr" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/logging" + "sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify" "sigs.k8s.io/scheduler-plugins/pkg/util" ) @@ -57,7 +59,11 @@ func (rw resourceToWeightMap) weight(r v1.ResourceName) int64 { } func (tm *TopologyMatch) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { - klog.V(6).InfoS("scoring node", "nodeName", nodeName) + lh := logging.Log().WithValues(logging.KeyLogID, logging.PodLogID(pod), logging.KeyPodUID, pod.GetUID(), logging.KeyNode, nodeName, logging.KeyFlow, logging.FlowScore) + lh.V(4).Info(logging.FlowBegin) + defer lh.V(4).Info(logging.FlowEnd) + + lh.V(6).Info("scoring node") // if it's a non-guaranteed pod, every node is considered to be a good fit if v1qos.GetPodQOS(pod) != v1.PodQOSGuaranteed { return framework.MaxNodeScore, nil @@ -66,21 +72,21 @@ func (tm *TopologyMatch) Score(ctx context.Context, state *framework.CycleState, nodeTopology, ok := tm.nrtCache.GetCachedNRTCopy(ctx, nodeName, pod) if !ok { - klog.V(4).InfoS("noderesourcetopology is not valid for node", "node", nodeName) + lh.V(4).Info("noderesourcetopology is not valid for node") return 0, nil } if nodeTopology == nil { - klog.V(5).InfoS("noderesourcetopology was not found for node", "node", nodeName) + lh.V(5).Info("noderesourcetopology was not found for node") return 0, nil } - logNRT("noderesourcetopology found", nodeTopology) + lh.V(6).Info("found object", "noderesourcetopology", stringify.NodeResourceTopologyResources(nodeTopology)) - handler := tm.scoringHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(nodeTopology)) + handler := tm.scoringHandlerFromTopologyManagerConfig(topologyManagerConfigFromNodeResourceTopology(lh, nodeTopology)) if handler == nil { return 0, nil } - return handler(pod, nodeTopology.Zones) + return handler(lh, pod, nodeTopology.Zones) } func (tm *TopologyMatch) ScoreExtensions() framework.ScoreExtensions { @@ -89,7 +95,7 @@ func (tm *TopologyMatch) ScoreExtensions() framework.ScoreExtensions { // scoreForEachNUMANode will iterate over all NUMA zones of the node and invoke the scoreStrategyFn func for every zone. // it will return the minimal score of all the calculated NUMA's score, in order to avoid edge cases. -func scoreForEachNUMANode(requested v1.ResourceList, numaList NUMANodeList, score scoreStrategyFn, resourceToWeightMap resourceToWeightMap) int64 { +func scoreForEachNUMANode(lh logr.Logger, requested v1.ResourceList, numaList NUMANodeList, score scoreStrategyFn, resourceToWeightMap resourceToWeightMap) int64 { numaScores := make([]int64, len(numaList)) minScore := int64(0) @@ -100,7 +106,7 @@ func scoreForEachNUMANode(requested v1.ResourceList, numaList NUMANodeList, scor minScore = numaScore } numaScores[numa.NUMAID] = numaScore - klog.V(6).InfoS("numa score result", "numaID", numa.NUMAID, "score", numaScore) + lh.V(6).Info("numa score result", "numaCell", numa.NUMAID, "score", numaScore) } return minScore } @@ -121,32 +127,31 @@ func getScoringStrategyFunction(strategy apiconfig.ScoringStrategyType) (scoreSt } } -func podScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { +func podScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { // This code is in Admit implementation of pod scope // https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/pkg/kubelet/cm/topologymanager/scope_pod.go#L52 // but it works with HintProviders, takes into account all possible allocations. resources := util.GetPodEffectiveRequest(pod) - allocatablePerNUMA := createNUMANodeList(zones) - finalScore := scoreForEachNUMANode(resources, allocatablePerNUMA, scorerFn, resourceToWeightMap) - klog.V(5).InfoS("pod scope scoring final node score", "finalScore", finalScore) + allocatablePerNUMA := createNUMANodeList(lh, zones) + finalScore := scoreForEachNUMANode(lh, resources, allocatablePerNUMA, scorerFn, resourceToWeightMap) + lh.V(5).Info("pod scope scoring final node score", "finalScore", finalScore) return finalScore, nil } -func containerScopeScore(pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { +func containerScopeScore(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList, scorerFn scoreStrategyFn, resourceToWeightMap resourceToWeightMap) (int64, *framework.Status) { // This code is in Admit implementation of container scope // https://github.com/kubernetes/kubernetes/blob/9ff3b7e744b34c099c1405d9add192adbef0b6b1/pkg/kubelet/cm/topologymanager/scope_container.go#L52 containers := append(pod.Spec.InitContainers, pod.Spec.Containers...) contScore := make([]float64, len(containers)) - allocatablePerNUMA := createNUMANodeList(zones) + allocatablePerNUMA := createNUMANodeList(lh, zones) for i, container := range containers { - identifier := fmt.Sprintf("%s/%s/%s", pod.Namespace, pod.Name, container.Name) - contScore[i] = float64(scoreForEachNUMANode(container.Resources.Requests, allocatablePerNUMA, scorerFn, resourceToWeightMap)) - klog.V(6).InfoS("container scope scoring", "container", identifier, "score", contScore[i]) + contScore[i] = float64(scoreForEachNUMANode(lh, container.Resources.Requests, allocatablePerNUMA, scorerFn, resourceToWeightMap)) + lh.V(6).Info("container scope scoring", "container", container.Name, "score", contScore[i]) } finalScore := int64(stat.Mean(contScore, nil)) - klog.V(5).InfoS("container scope scoring final node score", "finalScore", finalScore) + lh.V(5).Info("container scope scoring final node score", "finalScore", finalScore) return finalScore, nil } @@ -164,13 +169,13 @@ func (tm *TopologyMatch) scoringHandlerFromTopologyManagerConfig(conf TopologyMa return nil } if conf.Scope == kubeletconfig.PodTopologyManagerScope { - return func(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - return podScopeScore(pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) + return func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + return podScopeScore(lh, pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) } } if conf.Scope == kubeletconfig.ContainerTopologyManagerScope { - return func(pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { - return containerScopeScore(pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) + return func(lh logr.Logger, pod *v1.Pod, zones topologyv1alpha2.ZoneList) (int64, *framework.Status) { + return containerScopeScore(lh, pod, zones, tm.scoreStrategyFunc, tm.resourceToWeightMap) } } return nil // cannot happen diff --git a/pkg/noderesourcetopology/score_test.go b/pkg/noderesourcetopology/score_test.go index 6d11511c6..42199a72f 100644 --- a/pkg/noderesourcetopology/score_test.go +++ b/pkg/noderesourcetopology/score_test.go @@ -26,6 +26,7 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" "k8s.io/kubernetes/pkg/scheduler/framework" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -148,7 +149,7 @@ func TestNodeResourceScorePlugin(t *testing.T) { t.Run(test.name, func(t *testing.T) { tm := &TopologyMatch{ scoreStrategyFunc: test.strategy, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } for _, req := range test.requests { @@ -443,7 +444,7 @@ func TestNodeResourceScorePluginLeastNUMA(t *testing.T) { tm := &TopologyMatch{ scoreStrategyType: apiconfig.LeastNUMANodes, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } nodeToScore := make(nodeToScoreMap, len(nodesMap)) pod := makePodByResourceLists(tc.podRequests...) @@ -569,7 +570,7 @@ func TestNodeResourcePartialDataScorePlugin(t *testing.T) { t.Run(test.name, func(t *testing.T) { tm := &TopologyMatch{ scoreStrategyFunc: test.strategy, - nrtCache: nrtcache.NewPassthrough(lister), + nrtCache: nrtcache.NewPassthrough(klog.Background(), lister), } for _, req := range test.requests { diff --git a/pkg/noderesourcetopology/stringify/stringify.go b/pkg/noderesourcetopology/stringify/stringify.go index ea5ba4905..94b9ca3a1 100644 --- a/pkg/noderesourcetopology/stringify/stringify.go +++ b/pkg/noderesourcetopology/stringify/stringify.go @@ -17,7 +17,6 @@ limitations under the License. package stringify import ( - "fmt" "sort" "strconv" "strings" @@ -28,11 +27,14 @@ import ( v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2" + "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/numanode" ) -func ResourceListToLoggable(logID string, resources corev1.ResourceList) []interface{} { - items := []interface{}{"logID", logID} +func ResourceListToLoggable(resources corev1.ResourceList) []interface{} { + return ResourceListToLoggableWithValues([]interface{}{}, resources) +} +func ResourceListToLoggableWithValues(items []interface{}, resources corev1.ResourceList) []interface{} { resNames := []string{} for resName := range resources { resNames = append(resNames, string(resName)) @@ -75,6 +77,12 @@ func ResourceList(resources corev1.ResourceList) string { func NodeResourceTopologyResources(nrtObj *topologyv1alpha2.NodeResourceTopology) string { zones := []string{} for _, zoneInfo := range nrtObj.Zones { + numaItems := []interface{}{"numaCell"} + if numaID, err := numanode.NameToID(zoneInfo.Name); err == nil { + numaItems = append(numaItems, numaID) + } else { + numaItems = append(numaItems, zoneInfo.Name) + } zones = append(zones, zoneInfo.Name+"=<"+nrtResourceInfoListToString(zoneInfo.Resources)+">") } return nrtObj.Name + "={" + strings.Join(zones, ",") + "}" @@ -83,10 +91,21 @@ func NodeResourceTopologyResources(nrtObj *topologyv1alpha2.NodeResourceTopology func nrtResourceInfoListToString(resInfoList []topologyv1alpha2.ResourceInfo) string { items := []string{} for _, resInfo := range resInfoList { - items = append(items, fmt.Sprintf("%s=%s/%s/%s", resInfo.Name, resInfo.Capacity.String(), resInfo.Allocatable.String(), resInfo.Available.String())) + items = append(items, nrtResourceInfo(resInfo)) } return strings.Join(items, ",") } + +func nrtResourceInfo(resInfo topologyv1alpha2.ResourceInfo) string { + capVal, _ := resInfo.Capacity.AsInt64() + allocVal, _ := resInfo.Allocatable.AsInt64() + availVal, _ := resInfo.Available.AsInt64() + if !needsHumanization(resInfo.Name) { + return resInfo.Name + "=" + strconv.FormatInt(capVal, 10) + "/" + strconv.FormatInt(allocVal, 10) + "/" + strconv.FormatInt(availVal, 10) + } + return resInfo.Name + "=" + humanize.IBytes(uint64(capVal)) + "/" + humanize.IBytes(uint64(allocVal)) + "/" + humanize.IBytes(uint64(availVal)) +} + func needsHumanization(resName string) bool { // memory-related resources may be expressed in KiB/Bytes, which makes // for long numbers, harder to read and compare. To make it easier for diff --git a/pkg/noderesourcetopology/stringify/stringify_test.go b/pkg/noderesourcetopology/stringify/stringify_test.go index cb11d11c7..21a080fc1 100644 --- a/pkg/noderesourcetopology/stringify/stringify_test.go +++ b/pkg/noderesourcetopology/stringify/stringify_test.go @@ -116,7 +116,7 @@ func TestResourceListToLoggable(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { var buf bytes.Buffer - keysAndValues := ResourceListToLoggable(tt.logID, tt.resources) + keysAndValues := ResourceListToLoggableWithValues([]interface{}{"logID", tt.logID}, tt.resources) kvListFormat(&buf, keysAndValues...) got := buf.String() if got != tt.expected {