From 3de94a9345927222b1627855f8c048f63232b342 Mon Sep 17 00:00:00 2001 From: jay-mckay Date: Wed, 18 Dec 2024 14:39:43 -0700 Subject: [PATCH] remember last seen process usage for more accurate process aggregation --- metrics/collector.go | 75 +++++++++++++++++++++++++++++++++++++------- metrics/legacy.go | 2 +- metrics/unified.go | 2 +- 3 files changed, 66 insertions(+), 13 deletions(-) diff --git a/metrics/collector.go b/metrics/collector.go index 8611c07..9e47eb6 100644 --- a/metrics/collector.go +++ b/metrics/collector.go @@ -24,7 +24,8 @@ var ( namespace = "cgroup_warden" labels = []string{"cgroup", "username"} procLabels = []string{"cgroup", "username", "proc"} - lock = sync.RWMutex{} + metricLock = sync.RWMutex{} + cacheLock = sync.RWMutex{} ) func MetricsHandler(root string) http.HandlerFunc { @@ -133,15 +134,21 @@ func (c *Collector) CollectMetrics() []Metric { defer wg.Done() metric := h.CreateMetric(group, pids) if metric != nil { - lock.Lock() + metricLock.Lock() metrics = append(metrics, *metric) - lock.Unlock() + metricLock.Unlock() } }(group, pids) } wg.Wait() + for group := range groupCache { + if _, found := groupPIDs[group]; !found { + delete(groupPIDs, group) + } + } + return metrics } @@ -151,10 +158,27 @@ type Process struct { count uint64 } +type PIDCacheEntry struct { + cpu float64 + memory uint64 +} + +type PIDCache map[uint64]PIDCacheEntry + +type CommandCacheEntry struct { + inactiveCPU float64 + inactiveMem uint64 + activePIDs PIDCache +} + +type CommandCache map[string]CommandCacheEntry + +var groupCache = make(map[string]CommandCache) + // Given a set of PIDs, aggregate process count, memory, and // CPU usage on the process name associated with the PIDs. // Returns a map of process names -> aggregate usage. -func ProcInfo(pids map[uint64]bool) map[string]Process { +func ProcInfo(pids map[uint64]bool, cgroup string) map[string]Process { processes := make(map[string]Process) fs, err := procfs.NewDefaultFS() @@ -163,6 +187,14 @@ func ProcInfo(pids map[uint64]bool) map[string]Process { return processes } + cacheLock.Lock() + commandCache, found := groupCache[cgroup] + cacheLock.Unlock() + + if !found { + commandCache = make(CommandCache) + } + for pid := range pids { proc, err := fs.Proc(int(pid)) if err != nil { @@ -179,18 +211,39 @@ func ProcInfo(pids map[uint64]bool) map[string]Process { continue } - process, ok := processes[comm] - if !ok { - process = Process{cpu: 0, memory: 0, count: 0} + pce := PIDCacheEntry{cpu: stat.CPUTime(), memory: uint64(stat.ResidentMemory())} + + commandCacheEntry, found := commandCache[comm] + if !found { + commandCacheEntry = CommandCacheEntry{inactiveCPU: 0, inactiveMem: 0, activePIDs: make(PIDCache)} } - process.cpu = process.cpu + stat.CPUTime() - process.memory = process.memory + uint64(stat.ResidentMemory()) - process.count = process.count + 1 + commandCacheEntry.activePIDs[pid] = pce + commandCache[comm] = commandCacheEntry + } - processes[comm] = process + for command, commandEntry := range commandCache { + var cpu float64 + var mem uint64 + for pid, pidEntry := range commandEntry.activePIDs { + if _, found := pids[pid]; !found { + commandEntry.inactiveCPU += pidEntry.cpu + commandEntry.inactiveMem += pidEntry.memory + delete(commandEntry.activePIDs, pid) + } else { + cpu += pidEntry.cpu + mem += pidEntry.memory + } + } + p := Process{cpu: cpu + commandEntry.inactiveCPU, memory: mem + commandEntry.inactiveMem, count: uint64(len(commandEntry.activePIDs))} + processes[command] = p + commandCache[command] = commandEntry } + cacheLock.Lock() + groupCache[cgroup] = commandCache + cacheLock.Unlock() + return processes } diff --git a/metrics/legacy.go b/metrics/legacy.go index 6b06aa5..d990614 100644 --- a/metrics/legacy.go +++ b/metrics/legacy.go @@ -66,7 +66,7 @@ func (l *legacy) CreateMetric(group string, pids pidSet) *Metric { metric.memoryUsage = stat.Memory.TotalRSS } - metric.processes = ProcInfo(pids) + metric.processes = ProcInfo(pids, group) metric.cgroup = group diff --git a/metrics/unified.go b/metrics/unified.go index ad58b27..6f443fd 100644 --- a/metrics/unified.go +++ b/metrics/unified.go @@ -70,7 +70,7 @@ func (u *unified) CreateMetric(group string, pids pidSet) *Metric { metric.memoryUsage = stat.Memory.Usage } - metric.processes = ProcInfo(pids) + metric.processes = ProcInfo(pids, group) metric.cgroup = group