diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index efc829cc3..99a5b00ad 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -134,6 +134,7 @@ func (p *policy) Start() error { p.checkColdstartOff() p.root.Dump("") + p.checkAllocations(" ") return nil } @@ -148,12 +149,74 @@ func (p *policy) Sync(add []cache.Container, del []cache.Container) error { p.AllocateResources(c) } + p.checkAllocations(" ") + return nil } +func (p *policy) checkAllocations(format string, args ...interface{}) { + var ( + prefix = fmt.Sprintf(format, args...) + cpuExcl = 0 + cpuPart = 0 + mem = uint64(0) + ctr = map[string]Grant{} + dup = map[string][]Grant{} + ) + + getMemorySize := func(g Grant) uint64 { + var ( + limit = g.MemLimit() + total = uint64(0) + ) + for _, memType := range []memoryType{memoryDRAM, memoryPMEM, memoryHBM} { + total += limit[memType] + } + return total + } + + for _, g := range p.allocations.grants { + log.Debug("%s %s (%s)", prefix, g, g.GetContainer().GetID()) + full := g.ExclusiveCPUs().Size() + part := g.CPUPortion() + cpuExcl += full + cpuPart += part + + mem += getMemorySize(g) + + _, ok := p.cache.LookupContainer(g.GetContainer().GetID()) + if !ok { + log.Error("%s %s STALE container among allocations, not found in cache", prefix, g) + } + + key := g.GetContainer().PrettyName() + old, ok := ctr[key] + if ok { + if len(dup[key]) == 0 { + dup[key] = []Grant{old, g} + } else { + dup[key] = append(dup[key], g) + } + } else { + ctr[key] = g + } + } + + for key, grants := range dup { + log.Error("%s DUPLICATE allocation entries for container %s", prefix, key) + for _, g := range grants { + log.Error("%s %s (%s)", prefix, g, g.GetContainer().GetID()) + } + } + + log.Info("%s total CPU granted: %dm (%d exclusive + %dm shared), total memory granted: %s", + prefix, 1000*cpuExcl+cpuPart, cpuExcl, cpuPart, prettyMem(mem)) + +} + // AllocateResources is a resource allocation request for this policy. func (p *policy) AllocateResources(container cache.Container) error { - log.Debug("allocating resources for %s...", container.PrettyName()) + log.Debug("allocating resources for %s (%s)...", container.PrettyName(), container.GetID()) err := p.allocateResources(container, "") if err != nil { @@ -161,6 +224,7 @@ func (p *policy) AllocateResources(container cache.Container) error { } p.root.Dump("") + p.checkAllocations(" ", container.PrettyName()) return nil } @@ -179,13 +243,14 @@ func (p *policy) allocateResources(container cache.Container, poolHint string) e // ReleaseResources is a resource release request for this policy. func (p *policy) ReleaseResources(container cache.Container) error { - log.Debug("releasing resources of %s...", container.PrettyName()) + log.Debug("releasing resources for %s (%s)...", container.PrettyName(), container.GetID()) if grant, found := p.releasePool(container); found { p.updateSharedAllocations(&grant) } p.root.Dump("") + p.checkAllocations(" ", container.PrettyName()) return nil } @@ -208,6 +273,7 @@ func (p *policy) UpdateResources(container cache.Container) error { } p.root.Dump("") + p.checkAllocations(" ", container.PrettyName()) return nil } @@ -394,7 +460,7 @@ func (p *policy) reallocateResources(containers []cache.Container, pools map[str p.releasePool(c) } for _, c := range containers { - log.Debug("reallocating resources for %s...", c.PrettyName()) + log.Debug("reallocating resources for %s (%s)...", c.PrettyName(), c.GetID()) grant, err := p.allocatePool(c, pools[c.GetID()]) if err != nil { @@ -410,8 +476,6 @@ func (p *policy) reallocateResources(containers []cache.Container, pools map[str p.updateSharedAllocations(nil) - p.root.Dump("") - return nil } @@ -452,6 +516,7 @@ func (p *policy) Reconfigure(newCfg interface{}) error { } p.root.Dump("") + p.checkAllocations(" ") return nil } diff --git a/pkg/resmgr/cache/pod.go b/pkg/resmgr/cache/pod.go index 7e6768ad3..c69bcbe56 100644 --- a/pkg/resmgr/cache/pod.go +++ b/pkg/resmgr/cache/pod.go @@ -93,12 +93,9 @@ func (p *pod) PrettyName() string { } namespace := p.GetNamespace() - switch namespace { - case "default": - p.prettyName = "" - case "": + if namespace == "" { p.prettyName = "/" - default: + } else { p.prettyName = namespace + "/" } diff --git a/pkg/resmgr/flags.go b/pkg/resmgr/flags.go index fbc88a3d2..88f7bdf08 100644 --- a/pkg/resmgr/flags.go +++ b/pkg/resmgr/flags.go @@ -18,7 +18,7 @@ import ( "flag" "time" - nri "github.com/containerd/nri/pkg/api" + "github.com/containerd/nri/pkg/api" "github.com/containers/nri-plugins/pkg/pidfile" ) @@ -54,7 +54,7 @@ func init() { "NRI plugin name to register.") flag.StringVar(&opt.NriPluginIdx, "nri-plugin-index", defaultPluginIndex, "NRI plugin index to register.") - flag.StringVar(&opt.NriSocket, "nri-socket", nri.DefaultSocketPath, + flag.StringVar(&opt.NriSocket, "nri-socket", api.DefaultSocketPath, "NRI unix domain socket path to connect to.") flag.StringVar(&opt.PidFile, "pid-file", pidfile.GetPath(), diff --git a/pkg/resmgr/nri.go b/pkg/resmgr/nri.go index 7460f9c9a..635206423 100644 --- a/pkg/resmgr/nri.go +++ b/pkg/resmgr/nri.go @@ -32,18 +32,22 @@ import ( ) type nriPlugin struct { - logger.Logger stub stub.Stub resmgr *resmgr + byname map[string]cache.Container } +var ( + nri = logger.NewLogger("nri-plugin") +) + func newNRIPlugin(resmgr *resmgr) (*nriPlugin, error) { p := &nriPlugin{ - Logger: logger.NewLogger("nri-plugin"), resmgr: resmgr, + byname: make(map[string]cache.Container), } - p.Info("creating plugin...") + nri.Info("creating plugin...") return p, nil } @@ -71,7 +75,7 @@ func (p *nriPlugin) createStub() error { err error ) - p.Info("creating plugin stub...") + nri.Info("creating plugin stub...") if p.stub, err = stub.New(p, opts...); err != nil { return fmt.Errorf("failed to create NRI plugin stub: %w", err) @@ -85,7 +89,7 @@ func (p *nriPlugin) start() error { return nil } - p.Info("starting plugin...") + nri.Info("starting plugin...") if err := p.createStub(); err != nil { return err @@ -103,15 +107,65 @@ func (p *nriPlugin) stop() { return } - p.Info("stopping plugin...") + nri.Info("stopping plugin...") p.stub.Stop() } func (p *nriPlugin) onClose() { - p.Error("connection to NRI/runtime lost, exiting...") + nri.Error("connection to NRI/runtime lost, exiting...") os.Exit(1) } +func (p *nriPlugin) syncNamesToContainers(containers []cache.Container) []cache.Container { + unmapped := make([]cache.Container, 0, len(p.byname)) + + for _, ctr := range containers { + old := p.mapNameToContainer(ctr) + if old != nil && old.GetID() != ctr.GetID() { + unmapped = append(unmapped, old) + } + } + + return unmapped +} + +func (p *nriPlugin) mapNameToContainer(ctr cache.Container) cache.Container { + name := ctr.PrettyName() + old, ok := p.byname[name] + + p.byname[name] = ctr + if ok { + nri.Info("%s: remapped container from %s to %s", name, old.GetID(), ctr.GetID()) + return old + } + + nri.Info("%s: mapped container to %s", name, ctr.GetID()) + return nil +} + +func (p *nriPlugin) unmapName(name string) (cache.Container, bool) { + old, ok := p.byname[name] + if ok { + delete(p.byname, name) + nri.Info("%s: unmapped container from %s", name, old.GetID()) + } + return old, ok +} + +func (p *nriPlugin) unmapContainer(ctr cache.Container) { + name := ctr.PrettyName() + old, ok := p.byname[name] + if ok { + if old == ctr { + delete(p.byname, name) + nri.Info("%s: unmapped container (%s)", name, ctr.GetID()) + } else { + nri.Warn("%s: leaving container mapped, ID mismatch (%s != %s)", name, + old.GetID(), ctr.GetID()) + } + } +} + func (p *nriPlugin) Configure(ctx context.Context, cfg, runtime, version string) (stub.EventMask, error) { event := Configure @@ -141,17 +195,17 @@ func (p *nriPlugin) syncWithNRI(pods []*api.PodSandbox, containers []*api.Contai allocated := []cache.Container{} released := []cache.Container{} - m.Info("synchronizing cache state with NRI runtime...") + nri.Info("synchronizing cache state with NRI runtime...") _, _, deleted := m.cache.RefreshPods(pods) for _, c := range deleted { - m.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) + nri.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) released = append(released, c) } _, deleted = m.cache.RefreshContainers(containers) for _, c := range deleted { - m.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) + nri.Info("discovered stale container %s (%s)...", c.PrettyName(), c.GetID()) released = append(released, c) } @@ -162,7 +216,7 @@ func (p *nriPlugin) syncWithNRI(pods []*api.PodSandbox, containers []*api.Contai for _, c := range ctrs { switch c.GetState() { case cache.ContainerStateRunning, cache.ContainerStateCreated: - m.Info("discovered created/running container %s (%s)...", + nri.Info("discovered created/running container %s (%s)...", c.PrettyName(), c.GetID()) allocated = append(allocated, c) @@ -173,12 +227,12 @@ func (p *nriPlugin) syncWithNRI(pods []*api.PodSandbox, containers []*api.Contai case cache.ContainerStateExited: /* Treat stopped containers as deleted */ - m.Info("discovered stopped container %s (%s)...", + nri.Info("discovered stopped container %s (%s)...", c.PrettyName(), c.GetID()) released = append(released, c) default: - m.Info("discovered container %s (%s), in state %v, ignoring it...", + nri.Info("discovered container %s (%s), in state %v, ignoring it...", c.PrettyName(), c.GetID(), c.GetState()) } } @@ -206,11 +260,12 @@ func (p *nriPlugin) Synchronize(ctx context.Context, pods []*api.PodSandbox, con allocated, released, err := p.syncWithNRI(pods, containers) if err != nil { - p.resmgr.Error("failed to synchronize with NRI: %v", err) + nri.Error("failed to synchronize with NRI: %v", err) return nil, err } - if err := m.policy.Sync(allocated, released); err != nil { + unmapped := p.syncNamesToContainers(allocated) + if err := m.policy.Sync(allocated, append(released, unmapped...)); err != nil { return nil, fmt.Errorf("failed to sync policy %s: %w", m.policy.ActivePolicy(), err) } @@ -266,7 +321,7 @@ func (p *nriPlugin) StopPodSandbox(ctx context.Context, podSandbox *api.PodSandb } if err := p.runPostReleaseHooks(event, released...); err != nil { - m.Error("%s: failed to run post-release hooks for pod %s: %v", + nri.Error("%s: failed to run post-release hooks for pod %s: %v", event, pod.GetName(), err) } @@ -305,7 +360,7 @@ func (p *nriPlugin) RemovePodSandbox(ctx context.Context, podSandbox *api.PodSan } if err := p.runPostReleaseHooks(event, released...); err != nil { - m.Error("%s: failed to run post-release hooks for pod %s: %v", + nri.Error("%s: failed to run post-release hooks for pod %s: %v", event, pod.GetName(), err) } @@ -343,6 +398,14 @@ func (p *nriPlugin) CreateContainer(ctx context.Context, podSandbox *api.PodSand } c.UpdateState(cache.ContainerStateCreating) + if old, ok := p.unmapName(c.PrettyName()); ok { + nri.Info("%s: releasing stale instance %s", c.PrettyName(), old.GetID()) + if err := m.policy.ReleaseResources(old); err != nil { + nri.Error("%s: failed to release stale instance %s", c.PrettyName(), old.GetID()) + } + old.UpdateState(cache.ContainerStateExited) + } + if err := m.policy.AllocateResources(c); err != nil { c.UpdateState(cache.ContainerStateStale) return nil, nil, fmt.Errorf("failed to allocate resources: %w", err) @@ -358,7 +421,7 @@ func (p *nriPlugin) CreateContainer(ctx context.Context, podSandbox *api.PodSand c.UpdateState(cache.ContainerStateCreated) if err := p.runPostAllocateHooks(event, c); err != nil { - m.Error("%s: failed to run post-allocate hooks for %s: %v", + nri.Error("%s: failed to run post-allocate hooks for %s: %v", event, container.GetName(), err) p.runPostReleaseHooks(event, c) return nil, nil, fmt.Errorf("failed to allocate container resources: %w", err) @@ -370,6 +433,8 @@ func (p *nriPlugin) CreateContainer(ctx context.Context, podSandbox *api.PodSand adjust = p.getPendingAdjustment(container) updates = p.getPendingUpdates(container) + p.mapNameToContainer(c) + return adjust, updates, nil } @@ -408,11 +473,11 @@ func (p *nriPlugin) StartContainer(ctx context.Context, pod *api.PodSandbox, con } if _, err := m.policy.HandleEvent(e); err != nil { - m.Error("%s: policy failed to handle event %s: %v", event, e.Type, err) + nri.Error("%s: policy failed to handle event %s: %v", event, e.Type, err) } if err := p.runPostStartHooks(event, c); err != nil { - m.Error("%s: failed to run post-start hooks for %s: %v", + nri.Error("%s: failed to run post-start hooks for %s: %v", event, c.PrettyName(), err) } @@ -446,7 +511,7 @@ func (p *nriPlugin) UpdateContainer(ctx context.Context, pod *api.PodSandbox, co } if realUpdates := c.SetResourceUpdates(res); !realUpdates { - p.Warn("UpdateContainer with identical resources, short-circuiting it...") + nri.Warn("UpdateContainer with identical resources, short-circuiting it...") if v := c.GetCPUShares(); v != 0 { c.SetCPUShares(v) } @@ -471,7 +536,7 @@ func (p *nriPlugin) UpdateContainer(ctx context.Context, pod *api.PodSandbox, co } else { old := c.GetResourceRequirements() upd, _ := c.GetResourceUpdates() - p.Warn("UpdateContainer with real resource changes: %s -> %s", + nri.Warn("UpdateContainer with real resource changes: %s -> %s", old.String(), upd.String()) if err := m.policy.UpdateResources(c); err != nil { return nil, fmt.Errorf("failed to update resources: %w", err) @@ -507,6 +572,8 @@ func (p *nriPlugin) StopContainer(ctx context.Context, pod *api.PodSandbox, cont return nil, nil } + p.unmapContainer(c) + if err := m.policy.ReleaseResources(c); err != nil { return nil, fmt.Errorf("failed to release resources: %w", err) } @@ -623,69 +690,70 @@ func (p *nriPlugin) dump(dir, event string, args ...interface{}) { case RunPodSandbox, StopPodSandbox, RemovePodSandbox: if dir == in { if len(args) != 1 { - p.Error("%s %s ", dir, event, len(args)) + nri.Error("%s %s ", dir, event, len(args)) return } pod, ok := args[0].(*api.PodSandbox) if !ok { - p.Error("%s %s ", dir, event, args[0]) + nri.Error("%s %s ", dir, event, args[0]) return } - p.Info("%s %s %s/%s", dir, event, pod.GetNamespace(), pod.GetName()) + nri.Info("%s %s %s/%s", dir, event, pod.GetNamespace(), pod.GetName()) p.dumpDetails(dir, event, pod) } else { if len(args) != 1 { - p.Error("%s %s ", dir, event, len(args)) + nri.Error("%s %s ", dir, event, len(args)) return } err := args[0] if err != nil { - p.Error("%s %s FAILED: %v", dir, event, err.(error)) + nri.Error("%s %s FAILED: %v", dir, event, err.(error)) return } - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) } case CreateContainer, StartContainer, StopContainer, RemoveContainer: if dir == in { if len(args) != 2 { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, len(args)) return } pod, ok := args[0].(*api.PodSandbox) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1]) return } ctr, ok := args[1].(*api.Container) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1]) return } - p.Info("%s %s %s/%s:%s", dir, event, pod.GetNamespace(), pod.GetName(), ctr.GetName()) + nri.Info("%s %s %s/%s/%s (%s)", dir, event, + pod.GetNamespace(), pod.GetName(), ctr.GetName(), ctr.GetId()) p.dumpDetails(dir, event, ctr) } else { if len(args) < 1 { - p.Error("%s %s ", dir, event) + nri.Error("%s %s ", dir, event) return } err := args[len(args)-1] if err != nil { - p.Error("%s %s FAILED: %v", dir, event, err.(error)) + nri.Error("%s %s FAILED: %v", dir, event, err.(error)) return } - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) switch event { case CreateContainer: @@ -699,46 +767,47 @@ func (p *nriPlugin) dump(dir, event string, args ...interface{}) { case UpdateContainer: if dir == in { if len(args) != 3 { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, len(args)) return } pod, ok := args[0].(*api.PodSandbox) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1], args[2]) return } ctr, ok := args[1].(*api.Container) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1], args[2]) return } res, ok := args[2].(*api.LinuxResources) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1], args[2]) return } - p.Info("%s %s %s/%s:%s", dir, event, pod.GetNamespace(), pod.GetName(), ctr.GetName()) + nri.Info("%s %s %s/%s/%s (%s)", dir, event, + pod.GetNamespace(), pod.GetName(), ctr.GetName(), ctr.GetId()) p.dumpDetails(dir, event, ctr) p.dumpDetails(dir, event, res) } else { if len(args) < 1 { - p.Error("%s %s ", dir, event) + nri.Error("%s %s ", dir, event) return } err := args[len(args)-1] if err != nil { - p.Error("%s %s FAILED: %v", dir, event, err.(error)) + nri.Error("%s %s FAILED: %v", dir, event, err.(error)) return } - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) switch event { case CreateContainer: @@ -752,88 +821,88 @@ func (p *nriPlugin) dump(dir, event string, args ...interface{}) { case UpdateContainers: // post-config outgoing UpdateContainers if dir == out { if len(args) != 1 { - p.Error("%s %s ", dir, event, len(args)) + nri.Error("%s %s ", dir, event, len(args)) return } - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) p.dumpDetails(dir, event, args[0]) } else { if len(args) != 1 { - p.Error("%s %s ", dir, event, len(args)) + nri.Error("%s %s ", dir, event, len(args)) return } err := args[0] if err == nil { - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) return } - p.Error("%s %s FAILED: %v", dir, event, err.(error)) + nri.Error("%s %s FAILED: %v", dir, event, err.(error)) } case Configure: if dir == in { if len(args) != 2 { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, len(args)) return } runtime, ok := args[0].(string) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1]) return } version, ok := args[1].(string) if !ok { - p.Error("%s %s ", + nri.Error("%s %s ", dir, event, args[0], args[1]) return } - p.Info("%s %s, runtime %s %s", dir, event, runtime, version) + nri.Info("%s %s, runtime %s %s", dir, event, runtime, version) } else { - p.Info("%s %s", dir, event) + nri.Info("%s %s", dir, event) } case Synchronize: if dir == in { if len(args) != 2 { - p.Error("%s %s ", "%s", data) + nri.DebugBlock(dir+" ", "%s", data) case *api.Container: data := marshal("container", obj) - p.DebugBlock(dir+" ", "%s", data) + nri.DebugBlock(dir+" ", "%s", data) case *api.LinuxResources: data := marshal("updated resources", obj) - p.DebugBlock(dir+" ", "%s", data) + nri.DebugBlock(dir+" ", "%s", data) case *api.ContainerAdjustment: data := marshal("adjustment", obj) - p.DebugBlock(dir+" ", "%s", data) + nri.DebugBlock(dir+" ", "%s", data) case []*api.ContainerUpdate: for idx, update := range obj { data := marshal("update", update) - p.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) + nri.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) } case []*api.PodSandbox: for idx, pod := range obj { data := marshal("pod", pod) - p.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) + nri.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) } case []*api.Container: for idx, ctr := range obj { data := marshal("container", ctr) - p.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) + nri.DebugBlock(dir+fmt.Sprintf(" ", idx), "%s", data) } default: - p.DebugBlock(dir+" ", "%s", []byte(fmt.Sprintf("%T", arg))) + nri.DebugBlock(dir+" ", "%s", []byte(fmt.Sprintf("%T", arg))) } } @@ -934,7 +1003,7 @@ func (p *nriPlugin) runPostAllocateHooks(method string, created cache.Container) for _, c := range m.cache.GetPendingContainers() { if c == created { if err := m.control.RunPreCreateHooks(c); err != nil { - m.Warn("%s pre-create hook failed for %s: %v", + nri.Warn("%s pre-create hook failed for %s: %v", method, c.PrettyName(), err) } continue @@ -943,11 +1012,11 @@ func (p *nriPlugin) runPostAllocateHooks(method string, created cache.Container) switch c.GetState() { case cache.ContainerStateRunning, cache.ContainerStateCreated: if err := m.control.RunPostUpdateHooks(c); err != nil { - m.Warn("%s post-update hook failed for %s: %v", + nri.Warn("%s post-update hook failed for %s: %v", method, c.PrettyName(), err) } default: - m.Warn("%s: skipping container %s (in state %v)", method, + nri.Warn("%s: skipping container %s (in state %v)", method, c.PrettyName(), c.GetState()) } } @@ -958,7 +1027,7 @@ func (p *nriPlugin) runPostAllocateHooks(method string, created cache.Container) func (p *nriPlugin) runPostStartHooks(method string, c cache.Container) error { m := p.resmgr if err := m.control.RunPostStartHooks(c); err != nil { - m.Error("%s: post-start hook failed for %s: %v", method, c.PrettyName(), err) + nri.Error("%s: post-start hook failed for %s: %v", method, c.PrettyName(), err) } return nil } @@ -968,21 +1037,21 @@ func (p *nriPlugin) runPostReleaseHooks(method string, released ...cache.Contain m := p.resmgr for _, c := range released { if err := m.control.RunPostStopHooks(c); err != nil { - m.Warn("post-stop hook failed for %s: %v", c.PrettyName(), err) + nri.Warn("post-stop hook failed for %s: %v", c.PrettyName(), err) } } for _, c := range m.cache.GetPendingContainers() { switch state := c.GetState(); state { case cache.ContainerStateStale, cache.ContainerStateExited: if err := m.control.RunPostStopHooks(c); err != nil { - m.Warn("post-stop hook failed for %s: %v", c.PrettyName(), err) + nri.Warn("post-stop hook failed for %s: %v", c.PrettyName(), err) } case cache.ContainerStateRunning, cache.ContainerStateCreated: if err := m.control.RunPostUpdateHooks(c); err != nil { - m.Warn("post-update hook failed for %s: %v", c.PrettyName(), err) + nri.Warn("post-update hook failed for %s: %v", c.PrettyName(), err) } default: - m.Warn("%s: skipping pending container %s (in state %v)", + nri.Warn("%s: skipping pending container %s (in state %v)", method, c.PrettyName(), c.GetState()) } } @@ -999,7 +1068,7 @@ func (p *nriPlugin) runPostUpdateHooks(method string) error { return err } default: - m.Warn("%s: skipping container %s (in state %v)", method, + nri.Warn("%s: skipping container %s (in state %v)", method, c.PrettyName(), c.GetState()) } } diff --git a/pkg/resmgr/resource-manager.go b/pkg/resmgr/resource-manager.go index b827c75ad..93b11a4b2 100644 --- a/pkg/resmgr/resource-manager.go +++ b/pkg/resmgr/resource-manager.go @@ -23,7 +23,6 @@ import ( "github.com/containers/nri-plugins/pkg/agent" "github.com/containers/nri-plugins/pkg/healthz" "github.com/containers/nri-plugins/pkg/instrumentation" - "github.com/containers/nri-plugins/pkg/log" logger "github.com/containers/nri-plugins/pkg/log" "github.com/containers/nri-plugins/pkg/pidfile" "github.com/containers/nri-plugins/pkg/resmgr/cache" @@ -54,7 +53,6 @@ type Config = cfgapi.CommonConfig // resmgr is the implementation of ResourceManager. type resmgr struct { - logger.Logger sync.RWMutex agent *agent.Agent cfg cfgapi.ResmgrConfig @@ -72,6 +70,10 @@ const ( topologyLogger = "topology-hints" ) +var ( + log = logger.Get("resource-manager") +) + // NewResourceManager creates a new ResourceManager instance. func NewResourceManager(backend policy.Backend, agt *agent.Agent) (ResourceManager, error) { topology.SetLogger(logger.Get(topologyLogger)) @@ -83,15 +85,14 @@ func NewResourceManager(backend policy.Backend, agt *agent.Agent) (ResourceManag } m := &resmgr{ - Logger: logger.NewLogger("resource-manager"), - agent: agt, + agent: agt, } if err := m.setupCache(); err != nil { return nil, err } - m.Info("running as an NRI plugin...") + log.Info("running as an NRI plugin...") nrip, err := newNRIPlugin(m) if err != nil { return nil, err @@ -121,7 +122,7 @@ func NewResourceManager(backend policy.Backend, agt *agent.Agent) (ResourceManag // Start the resource manager. func (m *resmgr) Start() error { - m.Infof("starting agent, waiting for initial configuration...") + log.Infof("starting agent, waiting for initial configuration...") err := m.agent.Start(m.updateConfig) if err != nil { return err @@ -137,7 +138,7 @@ func (m *resmgr) updateConfig(newCfg interface{}) error { cfg, ok := newCfg.(cfgapi.ResmgrConfig) if !ok { if !m.running { - m.Fatalf("got initial configuration of unexpected type %T", newCfg) + log.Fatalf("got initial configuration of unexpected type %T", newCfg) } else { return fmt.Errorf("got configuration of unexpected type %T", newCfg) } @@ -147,32 +148,32 @@ func (m *resmgr) updateConfig(newCfg interface{}) error { dump, _ := yaml.Marshal(cfg) if !m.running { - m.Infof("acquired initial configuration %s (generation %d):", + log.Infof("acquired initial configuration %s (generation %d):", meta.GetName(), meta.GetGeneration()) - m.InfoBlock(" ", "%s", dump) + log.InfoBlock(" ", "%s", dump) if err := m.start(cfg); err != nil { - m.Fatalf("failed to start with initial configuration: %v", err) + log.Fatalf("failed to start with initial configuration: %v", err) } m.running = true return nil } - m.Infof("configuration update %s (generation %d):", meta.GetName(), meta.GetGeneration()) - m.InfoBlock(" ", "%s", dump) + log.Infof("configuration update %s (generation %d):", meta.GetName(), meta.GetGeneration()) + log.InfoBlock(" ", "%s", dump) return m.reconfigure(cfg) } // Start resource management once we acquired initial configuration. func (m *resmgr) start(cfg cfgapi.ResmgrConfig) error { - m.Info("starting resource manager...") + log.Info("starting resource manager...") m.cfg = cfg mCfg := cfg.CommonConfig() - log.Configure(&mCfg.Log) + logger.Configure(&mCfg.Log) instrumentation.Reconfigure(&mCfg.Instrumentation) if err := m.policy.Start(m.cfg.PolicyConfig()); err != nil { @@ -198,14 +199,14 @@ func (m *resmgr) start(cfg cfgapi.ResmgrConfig) error { return resmgrError("failed to write PID file: %v", err) } - m.Info("up and running") + log.Info("up and running") return nil } // Stop stops the resource manager. func (m *resmgr) Stop() { - m.Info("shutting down...") + log.Info("shutting down...") m.Lock() defer m.Unlock() @@ -272,9 +273,9 @@ func (m *resmgr) startControllers() error { // updateTopologyZones updates the 'topology zone' CRDs. func (m *resmgr) updateTopologyZones() { if zones := m.policy.GetTopologyZones(); len(zones) != 0 { - m.Info("updating topology zones...") + log.Info("updating topology zones...") if err := m.agent.UpdateNrtCR(m.policy.ActivePolicy(), zones); err != nil { - m.Error("failed to update topology zones: %v", err) + log.Error("failed to update topology zones: %v", err) } } } @@ -286,7 +287,7 @@ func (m *resmgr) registerPolicyMetricsCollector() error { if pc.HasPolicySpecificMetrics() { return pc.RegisterPolicyMetricsCollector() } - m.Info("%s policy has no policy-specific metrics.", m.policy.ActivePolicy()) + log.Info("%s policy has no policy-specific metrics.", m.policy.ActivePolicy()) return nil } @@ -294,7 +295,7 @@ func (m *resmgr) reconfigure(cfg cfgapi.ResmgrConfig) error { apply := func(cfg cfgapi.ResmgrConfig) error { mCfg := cfg.CommonConfig() - log.Configure(&mCfg.Log) + logger.Configure(&mCfg.Log) instrumentation.Reconfigure(&mCfg.Instrumentation) m.control.StartStopControllers(&mCfg.Control) @@ -305,7 +306,7 @@ func (m *resmgr) reconfigure(cfg cfgapi.ResmgrConfig) error { err = m.nri.updateContainers() if err != nil { - m.Warnf("failed to apply configuration to containers: %v", err) + log.Warnf("failed to apply configuration to containers: %v", err) } return nil @@ -314,18 +315,18 @@ func (m *resmgr) reconfigure(cfg cfgapi.ResmgrConfig) error { m.Lock() defer m.Unlock() - m.Infof("activating new configuration...") + log.Infof("activating new configuration...") err := apply(cfg) if err == nil { m.cfg = cfg return nil } - m.Errorf("failed to apply update: %v", err) + log.Errorf("failed to apply update: %v", err) revertErr := apply(m.cfg) if revertErr != nil { - m.Warnf("failed to revert configuration: %v", revertErr) + log.Warnf("failed to revert configuration: %v", revertErr) } return err