Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:add some self metric for agent cpu and memory usage #243

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions collector/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@ require (
github.com/pebbe/zmq4 v1.2.7
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.11.0 // indirect
github.com/shirou/gopsutil v3.21.11+incompatible
github.com/spf13/viper v1.10.1
github.com/stretchr/testify v1.7.1
github.com/tklauser/go-sysconf v0.3.10 // indirect
github.com/vishvananda/netns v0.0.0-20211101163701-50045581ed74
github.com/yusufpapurcu/wmi v1.2.2 // indirect
go.opentelemetry.io/otel v1.2.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.25.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.2.0
Expand Down
11 changes: 11 additions & 0 deletions collector/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7
github.com/go-logr/logr v0.4.0 h1:K7/B1jt6fIBQVd4Owv2MqGQClcgf0R266+7C/QjRcLc=
github.com/go-logr/logr v0.4.0/go.mod h1:z6/tIYblkpsD+a4lm/fGIIU9mZ+XfAiaFtq7xTgseGU=
github.com/go-ole/go-ole v1.2.4/go.mod h1:XCwSNxSkXRo4vlyPy93sltvi/qJq0jqQhjqQNIwKuxM=
github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY=
github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0=
github.com/go-openapi/jsonpointer v0.19.2/go.mod h1:3akKfEdA7DF1sugOqz1dVQHBcuDBPKZGEoHC/NkiQRg=
github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg=
github.com/go-openapi/jsonreference v0.19.2/go.mod h1:jMjeRr2HHw6nAVajTXJ4eiUwohSTlpa0o73RUL1owJc=
Expand Down Expand Up @@ -439,6 +441,8 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/sagikazarmark/crypt v0.4.0/go.mod h1:ALv2SRj7GxYV4HO9elxH9nS6M9gW+xDNxqmyJ6RfDFM=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/shirou/gopsutil v3.21.11+incompatible h1:+1+c1VGhc88SSonWP6foOcLhvnKlUeu/erjjvaPEYiI=
github.com/shirou/gopsutil v3.21.11+incompatible/go.mod h1:5b4v6he4MtMOwMlS0TUMTu2PcXUg8+E1lC7eC3UO/RA=
github.com/shirou/w32 v0.0.0-20160930032740-bb4de0191aa4/go.mod h1:qsXQc7+bwAM3Q1u/4XEfrquwF8Lw7D7y5cD8CuHnfIc=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
Expand Down Expand Up @@ -471,6 +475,10 @@ github.com/stretchr/testify v1.7.1 h1:5TQK59W5E3v0r2duFAb7P95B6hEeOyEnHRa8MjYSMT
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/tklauser/go-sysconf v0.3.10 h1:IJ1AZGZRWbY8T5Vfk04D9WOA5WSejdflXxP03OUqALw=
github.com/tklauser/go-sysconf v0.3.10/go.mod h1:C8XykCvCb+Gn0oNCWPIlcb0RuglQTYaQ2hGm7jmxEFk=
github.com/tklauser/numcpus v0.4.0 h1:E53Dm1HjH1/R2/aoCtXtPgzmElmn51aOkhCFSuZq//o=
github.com/tklauser/numcpus v0.4.0/go.mod h1:1+UI3pD8NW14VMwdgJNJ1ESk2UnwhAnz5hMwiKKqXCQ=
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=
github.com/vishvananda/netlink v1.1.0/go.mod h1:cTgwzPIzzgDAYoQrMm0EdrjRUBkTqKYppBueQtXaqoE=
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
Expand All @@ -481,6 +489,8 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yusufpapurcu/wmi v1.2.2 h1:KBNDSne4vP5mbSWnJbO+51IMOXJB67QiYCSBrubbPRg=
github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0=
go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs=
go.etcd.io/etcd/client/pkg/v3 v3.5.1/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g=
go.etcd.io/etcd/client/v2 v2.305.1/go.mod h1:pMEacxZW7o8pg4CrFE7pquyCJJzZvkvdD2RibOCCCGs=
Expand Down Expand Up @@ -681,6 +691,7 @@ golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190922100055-0a153f010e69/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190924154521-2837fb4f24fe/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
Expand Down
3 changes: 2 additions & 1 deletion collector/observability/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ type Config struct {
}

type PrometheusConfig struct {
Port string `mapstructure:"port,omitempty"`
Port string `mapstructure:"port,omitempty"`
ExtraMetrics []string `mapstructure:"extra_metrics"`
}

type OtlpGrpcConfig struct {
Expand Down
66 changes: 61 additions & 5 deletions collector/observability/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,14 @@ import (
"context"
"errors"
"fmt"
"net/http"
"os"
"sync"
"time"

"github.com/shirou/gopsutil/process"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/exporters/stdout/stdoutmetric"
Expand All @@ -17,9 +24,6 @@ import (
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.7.0"
"go.uber.org/zap"
"net/http"
"os"
"time"
)

const (
Expand All @@ -32,6 +36,21 @@ const (
PrometheusKindExporter = "prometheus"
)

var (
selfTelemetryOnce sync.Once
agentCPUTimeSeconds metric.Float64CounterObserver
agentMemUsedBytes metric.Float64GaugeObserver
)

const (
resourcePerformance = "resource"
)

const (
agentCPUTimeSecondsMetric = "kindling_telemetry_agent_cpu_time_seconds"
agentMemoryUsedBytesMetric = "kindling_telemetry_agent_memory_used_bytes"
)

type otelLoggerHandler struct {
logger *zap.Logger
}
Expand All @@ -40,6 +59,39 @@ func (h *otelLoggerHandler) Handle(err error) {
h.logger.Warn("Opentelemetry-go encountered an error: ", zap.Error(err))
}

func RegisterExtraMetrics(selectors []string, mp metric.MeterProvider) {
for _, selector := range selectors {
switch selector {
case resourcePerformance:
registerAgentResourcePerformanceMetrics(mp)
}
}
}

func registerAgentResourcePerformanceMetrics(mp metric.MeterProvider) (err error) {
proc, _ := process.NewProcess(int32(os.Getpid()))
meter := mp.Meter("kindling")
selfTelemetryOnce.Do(func() {
agentCPUTimeSeconds, err = meter.NewFloat64CounterObserver(agentCPUTimeSecondsMetric, func(ctx context.Context, result metric.Float64ObserverResult) {
cpuTime, _ := proc.Times()
result.Observe(cpuTime.User, attribute.String("type", "user"))
result.Observe(cpuTime.System, attribute.String("type", "system"))
})
if err != nil {
return
}
agentMemUsedBytes, err = meter.NewFloat64GaugeObserver(agentMemoryUsedBytesMetric, func(ctx context.Context, result metric.Float64ObserverResult) {
mem, _ := proc.MemoryInfo()
result.Observe(float64(mem.RSS), attribute.String("type", "rss"))
result.Observe(float64(mem.VMS), attribute.String("type", "vms"))
})
if err != nil {
return
}
})
return nil
}

func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, error) {
otel.SetErrorHandler(&otelLoggerHandler{logger: logger})
hostName, err := os.Hostname()
Expand All @@ -55,7 +107,7 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
}

serviceName := KindlingServiceNamePrefix + "-" + clusterId
rs, err := resource.New(context.Background(),
rs, _ := resource.New(context.Background(),
resource.WithAttributes(
semconv.ServiceNameKey.String(serviceName),
semconv.ServiceInstanceIDKey.String(hostName),
Expand All @@ -69,6 +121,7 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
selector.NewWithInexpensiveDistribution(),
aggregation.CumulativeTemporalitySelector(),
),
controller.WithCollectPeriod(0), // In pull mode, make prometheu deceide the collect Period
controller.WithResource(rs),
)

Expand All @@ -86,7 +139,10 @@ func InitTelemetry(logger *zap.Logger, config *Config) (metric.MeterProvider, er
logger.Warn("error starting self-telemetry server: ", zap.Error(err))
}
}()
return exp.MeterProvider(), nil

mp := exp.MeterProvider()
RegisterExtraMetrics(config.PromCfg.ExtraMetrics, mp)
return mp, nil
} else {
var collectPeriod time.Duration

Expand Down
3 changes: 3 additions & 0 deletions deploy/agent/kindling-collector-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ observability:
export_kind: stdout
prometheus:
port: :9501
# Self-metrics for special purpose
# "resource" for agent CPU and memory usage metricss
# extra_metrics: ["resource"]
otlp:
collect_period: 15s
# Note: DO NOT add the prefix "http://"
Expand Down