Skip to content

Commit

Permalink
remember last seen process usage for more accurate process aggregation
Browse files Browse the repository at this point in the history
  • Loading branch information
jay-mckay committed Dec 18, 2024
1 parent 92ff124 commit 3de94a9
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 13 deletions.
75 changes: 64 additions & 11 deletions metrics/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ var (
namespace = "cgroup_warden"
labels = []string{"cgroup", "username"}
procLabels = []string{"cgroup", "username", "proc"}
lock = sync.RWMutex{}
metricLock = sync.RWMutex{}
cacheLock = sync.RWMutex{}
)

func MetricsHandler(root string) http.HandlerFunc {
Expand Down Expand Up @@ -133,15 +134,21 @@ func (c *Collector) CollectMetrics() []Metric {
defer wg.Done()
metric := h.CreateMetric(group, pids)
if metric != nil {
lock.Lock()
metricLock.Lock()
metrics = append(metrics, *metric)
lock.Unlock()
metricLock.Unlock()
}
}(group, pids)
}

wg.Wait()

for group := range groupCache {
if _, found := groupPIDs[group]; !found {
delete(groupPIDs, group)
}
}

return metrics
}

Expand All @@ -151,10 +158,27 @@ type Process struct {
count uint64
}

type PIDCacheEntry struct {
cpu float64
memory uint64
}

type PIDCache map[uint64]PIDCacheEntry

type CommandCacheEntry struct {
inactiveCPU float64
inactiveMem uint64
activePIDs PIDCache
}

type CommandCache map[string]CommandCacheEntry

var groupCache = make(map[string]CommandCache)

// Given a set of PIDs, aggregate process count, memory, and
// CPU usage on the process name associated with the PIDs.
// Returns a map of process names -> aggregate usage.
func ProcInfo(pids map[uint64]bool) map[string]Process {
func ProcInfo(pids map[uint64]bool, cgroup string) map[string]Process {
processes := make(map[string]Process)

fs, err := procfs.NewDefaultFS()
Expand All @@ -163,6 +187,14 @@ func ProcInfo(pids map[uint64]bool) map[string]Process {
return processes
}

cacheLock.Lock()
commandCache, found := groupCache[cgroup]
cacheLock.Unlock()

if !found {
commandCache = make(CommandCache)
}

for pid := range pids {
proc, err := fs.Proc(int(pid))
if err != nil {
Expand All @@ -179,18 +211,39 @@ func ProcInfo(pids map[uint64]bool) map[string]Process {
continue
}

process, ok := processes[comm]
if !ok {
process = Process{cpu: 0, memory: 0, count: 0}
pce := PIDCacheEntry{cpu: stat.CPUTime(), memory: uint64(stat.ResidentMemory())}

commandCacheEntry, found := commandCache[comm]
if !found {
commandCacheEntry = CommandCacheEntry{inactiveCPU: 0, inactiveMem: 0, activePIDs: make(PIDCache)}
}

process.cpu = process.cpu + stat.CPUTime()
process.memory = process.memory + uint64(stat.ResidentMemory())
process.count = process.count + 1
commandCacheEntry.activePIDs[pid] = pce
commandCache[comm] = commandCacheEntry
}

processes[comm] = process
for command, commandEntry := range commandCache {
var cpu float64
var mem uint64
for pid, pidEntry := range commandEntry.activePIDs {
if _, found := pids[pid]; !found {
commandEntry.inactiveCPU += pidEntry.cpu
commandEntry.inactiveMem += pidEntry.memory
delete(commandEntry.activePIDs, pid)
} else {
cpu += pidEntry.cpu
mem += pidEntry.memory
}
}
p := Process{cpu: cpu + commandEntry.inactiveCPU, memory: mem + commandEntry.inactiveMem, count: uint64(len(commandEntry.activePIDs))}
processes[command] = p
commandCache[command] = commandEntry
}

cacheLock.Lock()
groupCache[cgroup] = commandCache
cacheLock.Unlock()

return processes
}

Expand Down
2 changes: 1 addition & 1 deletion metrics/legacy.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func (l *legacy) CreateMetric(group string, pids pidSet) *Metric {
metric.memoryUsage = stat.Memory.TotalRSS
}

metric.processes = ProcInfo(pids)
metric.processes = ProcInfo(pids, group)

metric.cgroup = group

Expand Down
2 changes: 1 addition & 1 deletion metrics/unified.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func (u *unified) CreateMetric(group string, pids pidSet) *Metric {
metric.memoryUsage = stat.Memory.Usage
}

metric.processes = ProcInfo(pids)
metric.processes = ProcInfo(pids, group)

metric.cgroup = group

Expand Down

0 comments on commit 3de94a9

Please sign in to comment.