diff --git a/plugins/telemetry/config.go b/plugins/telemetry/config.go index 1ef38375f6..f3796e51f9 100644 --- a/plugins/telemetry/config.go +++ b/plugins/telemetry/config.go @@ -16,12 +16,22 @@ package telemetry import "time" +const ( + // default period between updates + defaultUpdatePeriod = time.Second * 30 + // minimum period between updates + minimumUpdatePeriod = time.Second * 1 +) + // Config file representation for telemetry plugin type Config struct { // Custom polling interval, default value is 30s PollingInterval time.Duration `json:"polling-interval"` // Allows to disable plugin Disabled bool `json:"disabled"` + // Skip collecting some of the metrics: + // runtime, memory, buffers, nodes, interfaces + Skipped []string `json:"skipped"` } func defaultConfig() *Config { diff --git a/plugins/telemetry/metrics.go b/plugins/telemetry/metrics.go index 333442c858..8db159ec45 100644 --- a/plugins/telemetry/metrics.go +++ b/plugins/telemetry/metrics.go @@ -318,29 +318,70 @@ func (p *Plugin) registerPrometheus() error { func (p *Plugin) updatePrometheus(ctx context.Context) { p.tracef("running update") - // Update runtime - runtimeInfo, err := p.handler.GetRuntimeInfo(ctx) - if err != nil { - p.Log.Errorf("GetRuntimeInfo failed: %v", err) - } else { - p.tracef("runtime info: %+v", runtimeInfo) - for _, thread := range runtimeInfo.GetThreads() { - for _, item := range thread.Items { - stats, ok := p.runtimeStats[item.Name] + if !p.skipped[runtimeMetricsNamespace] { + // Update runtime + runtimeInfo, err := p.handler.GetRuntimeInfo(ctx) + if err != nil { + p.Log.Errorf("GetRuntimeInfo failed: %v", err) + } else { + p.tracef("runtime info: %+v", runtimeInfo) + for _, thread := range runtimeInfo.GetThreads() { + for _, item := range thread.Items { + stats, ok := p.runtimeStats[item.Name] + if !ok { + stats = &runtimeStats{ + threadID: thread.ID, + threadName: thread.Name, + itemName: item.Name, + metrics: map[string]prometheus.Gauge{}, + } + + // add gauges with corresponding labels into vectors + for k, vec := range p.runtimeGaugeVecs { + stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ + runtimeItemLabel: item.Name, + runtimeThreadLabel: thread.Name, + runtimeThreadIDLabel: strconv.Itoa(int(thread.ID)), + }) + if err != nil { + p.Log.Error(err) + } + } + } + + stats.metrics[runtimeCallsMetric].Set(float64(item.Calls)) + stats.metrics[runtimeVectorsMetric].Set(float64(item.Vectors)) + stats.metrics[runtimeSuspendsMetric].Set(float64(item.Suspends)) + stats.metrics[runtimeClocksMetric].Set(item.Clocks) + stats.metrics[runtimeVectorsPerCallMetric].Set(item.VectorsPerCall) + } + } + } + } + + if !p.skipped[buffersMetricsNamespace] { + // Update buffers + buffersInfo, err := p.handler.GetBuffersInfo(ctx) + if err != nil { + p.Log.Errorf("GetBuffersInfo failed: %v", err) + } else { + p.tracef("buffers info: %+v", buffersInfo) + for _, item := range buffersInfo.GetItems() { + stats, ok := p.buffersStats[item.Name] if !ok { - stats = &runtimeStats{ - threadID: thread.ID, - threadName: thread.Name, - itemName: item.Name, - metrics: map[string]prometheus.Gauge{}, + stats = &buffersStats{ + threadID: item.ThreadID, + itemName: item.Name, + itemIndex: item.Index, + metrics: map[string]prometheus.Gauge{}, } // add gauges with corresponding labels into vectors - for k, vec := range p.runtimeGaugeVecs { + for k, vec := range p.buffersGaugeVecs { stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ - runtimeItemLabel: item.Name, - runtimeThreadLabel: thread.Name, - runtimeThreadIDLabel: strconv.Itoa(int(thread.ID)), + buffersThreadIDLabel: strconv.Itoa(int(item.ThreadID)), + buffersItemLabel: item.Name, + buffersIndexLabel: strconv.Itoa(int(item.Index)), }) if err != nil { p.Log.Error(err) @@ -348,162 +389,131 @@ func (p *Plugin) updatePrometheus(ctx context.Context) { } } - stats.metrics[runtimeCallsMetric].Set(float64(item.Calls)) - stats.metrics[runtimeVectorsMetric].Set(float64(item.Vectors)) - stats.metrics[runtimeSuspendsMetric].Set(float64(item.Suspends)) - stats.metrics[runtimeClocksMetric].Set(item.Clocks) - stats.metrics[runtimeVectorsPerCallMetric].Set(item.VectorsPerCall) + stats.metrics[buffersSizeMetric].Set(float64(item.Size)) + stats.metrics[buffersAllocMetric].Set(float64(item.Alloc)) + stats.metrics[buffersFreeMetric].Set(float64(item.Free)) + stats.metrics[buffersNumAllocMetric].Set(float64(item.NumAlloc)) + stats.metrics[buffersNumFreeMetric].Set(float64(item.NumFree)) } } } - // Update buffers - buffersInfo, err := p.handler.GetBuffersInfo(ctx) - if err != nil { - p.Log.Errorf("GetBuffersInfo failed: %v", err) - } else { - p.tracef("buffers info: %+v", buffersInfo) - for _, item := range buffersInfo.GetItems() { - stats, ok := p.buffersStats[item.Name] - if !ok { - stats = &buffersStats{ - threadID: item.ThreadID, - itemName: item.Name, - itemIndex: item.Index, - metrics: map[string]prometheus.Gauge{}, - } - - // add gauges with corresponding labels into vectors - for k, vec := range p.buffersGaugeVecs { - stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ - buffersThreadIDLabel: strconv.Itoa(int(item.ThreadID)), - buffersItemLabel: item.Name, - buffersIndexLabel: strconv.Itoa(int(item.Index)), - }) - if err != nil { - p.Log.Error(err) + if !p.skipped[memoryMetricsNamespace] { + // Update memory + memoryInfo, err := p.handler.GetMemory(ctx) + if err != nil { + p.Log.Errorf("GetMemory failed: %v", err) + } else { + p.tracef("memory info: %+v", memoryInfo) + for _, thread := range memoryInfo.GetThreads() { + stats, ok := p.memoryStats[thread.Name] + if !ok { + stats = &memoryStats{ + threadName: thread.Name, + threadID: thread.ID, + metrics: map[string]prometheus.Gauge{}, } - } - } - - stats.metrics[buffersSizeMetric].Set(float64(item.Size)) - stats.metrics[buffersAllocMetric].Set(float64(item.Alloc)) - stats.metrics[buffersFreeMetric].Set(float64(item.Free)) - stats.metrics[buffersNumAllocMetric].Set(float64(item.NumAlloc)) - stats.metrics[buffersNumFreeMetric].Set(float64(item.NumFree)) - } - } - - // Update memory - memoryInfo, err := p.handler.GetMemory(ctx) - if err != nil { - p.Log.Errorf("GetMemory failed: %v", err) - } else { - p.tracef("memory info: %+v", memoryInfo) - for _, thread := range memoryInfo.GetThreads() { - stats, ok := p.memoryStats[thread.Name] - if !ok { - stats = &memoryStats{ - threadName: thread.Name, - threadID: thread.ID, - metrics: map[string]prometheus.Gauge{}, - } - // add gauges with corresponding labels into vectors - for k, vec := range p.memoryGaugeVecs { - stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ - memoryThreadLabel: thread.Name, - memoryThreadIDLabel: strconv.Itoa(int(thread.ID)), - }) - if err != nil { - p.Log.Error(err) + // add gauges with corresponding labels into vectors + for k, vec := range p.memoryGaugeVecs { + stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ + memoryThreadLabel: thread.Name, + memoryThreadIDLabel: strconv.Itoa(int(thread.ID)), + }) + if err != nil { + p.Log.Error(err) + } } } - } - stats.metrics[memoryObjectsMetric].Set(float64(thread.Objects)) - stats.metrics[memoryUsedMetric].Set(float64(thread.Used)) - stats.metrics[memoryTotalMetric].Set(float64(thread.Total)) - stats.metrics[memoryFreeMetric].Set(float64(thread.Free)) - stats.metrics[memoryReclaimedMetric].Set(float64(thread.Reclaimed)) - stats.metrics[memoryOverheadMetric].Set(float64(thread.Overhead)) - stats.metrics[memorySizeMetric].Set(float64(thread.Size)) - stats.metrics[memoryPagesMetric].Set(float64(thread.Pages)) + stats.metrics[memoryObjectsMetric].Set(float64(thread.Objects)) + stats.metrics[memoryUsedMetric].Set(float64(thread.Used)) + stats.metrics[memoryTotalMetric].Set(float64(thread.Total)) + stats.metrics[memoryFreeMetric].Set(float64(thread.Free)) + stats.metrics[memoryReclaimedMetric].Set(float64(thread.Reclaimed)) + stats.metrics[memoryOverheadMetric].Set(float64(thread.Overhead)) + stats.metrics[memorySizeMetric].Set(float64(thread.Size)) + stats.metrics[memoryPagesMetric].Set(float64(thread.Pages)) + } } } - // Update node counters - nodeCountersInfo, err := p.handler.GetNodeCounters(ctx) - if err != nil { - p.Log.Errorf("GetNodeCounters failed: %v", err) - } else { - p.tracef("node counters info: %+v", nodeCountersInfo) - for _, item := range nodeCountersInfo.GetCounters() { - stats, ok := p.nodeCounterStats[item.Name] - if !ok { - stats = &nodeCounterStats{ - itemName: item.Name, - metrics: map[string]prometheus.Gauge{}, - } + if !p.skipped[nodeMetricsNamespace] { + // Update node counters + nodeCountersInfo, err := p.handler.GetNodeCounters(ctx) + if err != nil { + p.Log.Errorf("GetNodeCounters failed: %v", err) + } else { + p.tracef("node counters info: %+v", nodeCountersInfo) + for _, item := range nodeCountersInfo.GetCounters() { + stats, ok := p.nodeCounterStats[item.Name] + if !ok { + stats = &nodeCounterStats{ + itemName: item.Name, + metrics: map[string]prometheus.Gauge{}, + } - // add gauges with corresponding labels into vectors - for k, vec := range p.nodeCounterGaugeVecs { - stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ - nodeCounterItemLabel: item.Node, - nodeCounterReasonLabel: item.Name, - }) - if err != nil { - p.Log.Error(err) + // add gauges with corresponding labels into vectors + for k, vec := range p.nodeCounterGaugeVecs { + stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ + nodeCounterItemLabel: item.Node, + nodeCounterReasonLabel: item.Name, + }) + if err != nil { + p.Log.Error(err) + } } } - } - stats.metrics[nodeCounterCounterMetric].Set(float64(item.Value)) + stats.metrics[nodeCounterCounterMetric].Set(float64(item.Value)) + } } } - // Update interface counters - ifStats, err := p.handler.GetInterfaceStats(ctx) - if err != nil { - p.Log.Errorf("GetInterfaceStats failed: %v", err) - return - } else { - p.tracef("interface stats: %+v", ifStats) - if ifStats == nil { + if !p.skipped[ifMetricsNamespace] { + // Update interface counters + ifStats, err := p.handler.GetInterfaceStats(ctx) + if err != nil { + p.Log.Errorf("GetInterfaceStats failed: %v", err) return - } - for _, item := range ifStats.Interfaces { - stats, ok := p.ifCounterStats[item.InterfaceName] - if !ok { - stats = &ifCounterStats{ - name: item.InterfaceName, - metrics: map[string]prometheus.Gauge{}, - } + } else { + p.tracef("interface stats: %+v", ifStats) + if ifStats == nil { + return + } + for _, item := range ifStats.Interfaces { + stats, ok := p.ifCounterStats[item.InterfaceName] + if !ok { + stats = &ifCounterStats{ + name: item.InterfaceName, + metrics: map[string]prometheus.Gauge{}, + } - // add gauges with corresponding labels into vectors - for k, vec := range p.ifCounterGaugeVecs { - stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ - ifCounterNameLabel: item.InterfaceName, - ifCounterIndexLabel: fmt.Sprint(item.InterfaceIndex), - }) - if err != nil { - p.Log.Error(err) + // add gauges with corresponding labels into vectors + for k, vec := range p.ifCounterGaugeVecs { + stats.metrics[k], err = vec.GetMetricWith(prometheus.Labels{ + ifCounterNameLabel: item.InterfaceName, + ifCounterIndexLabel: fmt.Sprint(item.InterfaceIndex), + }) + if err != nil { + p.Log.Error(err) + } } } - } - stats.metrics[ifCounterRxPackets].Set(float64(item.RxPackets)) - stats.metrics[ifCounterRxBytes].Set(float64(item.RxBytes)) - stats.metrics[ifCounterRxErrors].Set(float64(item.RxErrors)) - stats.metrics[ifCounterTxPackets].Set(float64(item.TxPackets)) - stats.metrics[ifCounterTxBytes].Set(float64(item.TxBytes)) - stats.metrics[ifCounterTxErrors].Set(float64(item.TxErrors)) - stats.metrics[ifCounterDrops].Set(float64(item.Drops)) - stats.metrics[ifCounterPunts].Set(float64(item.Punts)) - stats.metrics[ifCounterIP4].Set(float64(item.IP4)) - stats.metrics[ifCounterIP6].Set(float64(item.IP6)) - stats.metrics[ifCounterRxNoBuf].Set(float64(item.RxNoBuf)) - stats.metrics[ifCounterRxMiss].Set(float64(item.RxMiss)) + stats.metrics[ifCounterRxPackets].Set(float64(item.RxPackets)) + stats.metrics[ifCounterRxBytes].Set(float64(item.RxBytes)) + stats.metrics[ifCounterRxErrors].Set(float64(item.RxErrors)) + stats.metrics[ifCounterTxPackets].Set(float64(item.TxPackets)) + stats.metrics[ifCounterTxBytes].Set(float64(item.TxBytes)) + stats.metrics[ifCounterTxErrors].Set(float64(item.TxErrors)) + stats.metrics[ifCounterDrops].Set(float64(item.Drops)) + stats.metrics[ifCounterPunts].Set(float64(item.Punts)) + stats.metrics[ifCounterIP4].Set(float64(item.IP4)) + stats.metrics[ifCounterIP6].Set(float64(item.IP6)) + stats.metrics[ifCounterRxNoBuf].Set(float64(item.RxNoBuf)) + stats.metrics[ifCounterRxMiss].Set(float64(item.RxMiss)) + } } } diff --git a/plugins/telemetry/telemetry.conf b/plugins/telemetry/telemetry.conf index d1f8027191..3a05580371 100644 --- a/plugins/telemetry/telemetry.conf +++ b/plugins/telemetry/telemetry.conf @@ -3,3 +3,7 @@ polling-interval: 30000000000 # If set to true, telemetry plugin is disabled. disabled: false + +# Skip collecting some of the metrics. +# runtime, memory, buffers, nodes, interfaces +#skipped: [nodes] diff --git a/plugins/telemetry/telemetry.go b/plugins/telemetry/telemetry.go index 1acfe4590f..9fb24a9d08 100644 --- a/plugins/telemetry/telemetry.go +++ b/plugins/telemetry/telemetry.go @@ -34,13 +34,6 @@ import ( _ "github.com/ligato/vpp-agent/plugins/telemetry/vppcalls/vpp1908" ) -const ( - // default period between updates - defaultUpdatePeriod = time.Second * 30 - // minimum period between updates - minimumUpdatePeriod = time.Second * 1 -) - var debug = os.Getenv("DEBUG_TELEMETRY") != "" // Plugin registers Telemetry Plugin @@ -54,6 +47,7 @@ type Plugin struct { // From config file updatePeriod time.Duration disabled bool + skipped map[string]bool wg sync.WaitGroup quit chan struct{} @@ -70,6 +64,7 @@ type Deps struct { // Init initializes Telemetry Plugin func (p *Plugin) Init() error { p.quit = make(chan struct{}) + p.skipped = make(map[string]bool, 0) // Telemetry config file config, err := p.loadConfig() @@ -92,6 +87,10 @@ func (p *Plugin) Init() error { p.Log.Warnf("polling period has to be at least %s, using default: %v", minimumUpdatePeriod, defaultUpdatePeriod) } + // Store map of skipped metrics + for _, skip := range config.Skipped { + p.skipped[skip] = true + } } // This serves as fallback if the config was not found or if the value is not set in config. if p.updatePeriod == 0 {