Skip to content

Commit

Permalink
feat:add some self metric for agent cpu and memory usage (#243)
Browse files Browse the repository at this point in the history
* feat:add some self metric for agent cpu and memory usage

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>

* feat: support to exporter realtime selfMetric Data

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>

* config: register the agent preformance metric by option

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>

* config: update config and describe

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>

* refactor: rename selector to extra_metrics

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>

* fix: fix typo

Signed-off-by: niejiangang <niejiangang@harmonycloud.cn>
  • Loading branch information
NeJan2020 authored Jun 13, 2022
1 parent b626a2d commit e962764
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 6 deletions.
3 changes: 3 additions & 0 deletions collector/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ require (
github.com/pebbe/zmq4 v1.2.7
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0 // indirect
github.com/shirou/gopsutil v3.21.11+incompatible
github.com/spf13/viper v1.10.1
github.com/stretchr/testify v1.7.1
github.com/tklauser/go-sysconf v0.3.10 // indirect
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opentelemetry.io/otel v1.2.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.25.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.2.0
Expand Down
11 changes: 11 additions & 0 deletions collector/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7
github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc=
github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg=
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc=
Expand Down Expand Up @@ -439,6 +441,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/sagikazarmark/crypt v0.4.0/go.mod h1:ALv2SRj7GxYV4HO9elxH9nS6M9gW+xDNxqmyJ6RfDFM=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
Expand Down Expand Up @@ -471,6 +475,10 @@ github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMT
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
Expand All @@ -481,6 +489,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=
Expand Down Expand Up @@ -681,6 +691,7 @@ golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
3 changes: 2 additions & 1 deletion collector/observability/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ type Config struct {
}

type PrometheusConfig struct {
Port string `mapstructure:"port,omitempty"`
Port string `mapstructure:"port,omitempty"`
ExtraMetrics []string `mapstructure:"extra_metrics"`
}

type OtlpGrpcConfig struct {
Expand Down
66 changes: 61 additions & 5 deletions collector/observability/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@ import (
"context"
"errors"
"fmt"
"net/http"
"os"
"sync"
"time"

"github.com/shirou/gopsutil/process"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
Expand All @@ -17,9 +24,6 @@ import (
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
"go.uber.org/zap"
"net/http"
"os"
"time"
)

const (
Expand All @@ -32,6 +36,21 @@ const (
PrometheusKindExporter = "prometheus"
)

var (
selfTelemetryOnce sync.Once
agentCPUTimeSeconds metric.Float64CounterObserver
agentMemUsedBytes metric.Float64GaugeObserver
)

const (
resourcePerformance = "resource"
)

const (
agentCPUTimeSecondsMetric = "kindling_telemetry_agent_cpu_time_seconds"
agentMemoryUsedBytesMetric = "kindling_telemetry_agent_memory_used_bytes"
)

type otelLoggerHandler struct {
logger *zap.Logger
}
Expand All @@ -40,6 +59,39 @@ func (h *otelLoggerHandler) Handle(err error) {
h.logger.Warn("Opentelemetry-go encountered an error: ", zap.Error(err))
}

func RegisterExtraMetrics(selectors []string, mp metric.MeterProvider) {
for _, selector := range selectors {
switch selector {
case resourcePerformance:
registerAgentResourcePerformanceMetrics(mp)
}
}
}

func registerAgentResourcePerformanceMetrics(mp metric.MeterProvider) (err error) {
proc, _ := process.NewProcess(int32(os.Getpid()))
meter := mp.Meter("kindling")
selfTelemetryOnce.Do(func() {
agentCPUTimeSeconds, err = meter.NewFloat64CounterObserver(agentCPUTimeSecondsMetric, func(ctx context.Context, result metric.Float64ObserverResult) {
cpuTime, _ := proc.Times()
result.Observe(cpuTime.User, attribute.String("type", "user"))
result.Observe(cpuTime.System, attribute.String("type", "system"))
})
if err != nil {
return
}
agentMemUsedBytes, err = meter.NewFloat64GaugeObserver(agentMemoryUsedBytesMetric, func(ctx context.Context, result metric.Float64ObserverResult) {
mem, _ := proc.MemoryInfo()
result.Observe(float64(mem.RSS), attribute.String("type", "rss"))
result.Observe(float64(mem.VMS), attribute.String("type", "vms"))
})
if err != nil {
return
}
})
return nil
}

func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, error) {
otel.SetErrorHandler(&otelLoggerHandler{logger: logger})
hostName, err := os.Hostname()
Expand All @@ -55,7 +107,7 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
}

serviceName := KindlingServiceNamePrefix + "-" + clusterId
rs, err := resource.New(context.Background(),
rs, _ := resource.New(context.Background(),
resource.WithAttributes(
semconv.ServiceNameKey.String(serviceName),
semconv.ServiceInstanceIDKey.String(hostName),
Expand All @@ -69,6 +121,7 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
selector.NewWithInexpensiveDistribution(),
aggregation.CumulativeTemporalitySelector(),
),
controller.WithCollectPeriod(0), // In pull mode, make prometheu deceide the collect Period
controller.WithResource(rs),
)

Expand All @@ -86,7 +139,10 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
logger.Warn("error starting self-telemetry server: ", zap.Error(err))
}
}()
return exp.MeterProvider(), nil

mp := exp.MeterProvider()
RegisterExtraMetrics(config.PromCfg.ExtraMetrics, mp)
return mp, nil
} else {
var collectPeriod time.Duration

Expand Down
3 changes: 3 additions & 0 deletions deploy/agent/kindling-collector-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ observability:
export_kind: stdout
prometheus:
port: :9501
# Self-metrics for special purpose
# "resource" for agent CPU and memory usage metricss
# extra_metrics: ["resource"]
otlp:
collect_period: 15s
# Note: DO NOT add the prefix "http://"
Expand Down

0 comments on commit e962764

Please sign in to comment.