Skip to content

Commit

Permalink
Prometheus metrics scraping from CNI metrics helper (#2603)
Browse files Browse the repository at this point in the history
* Prometheus metrics scraping from CNI metrics helper
  • Loading branch information
jayanthvn authored Nov 15, 2023
1 parent dcc2856 commit 2ac9e0a
Show file tree
Hide file tree
Showing 15 changed files with 505 additions and 427 deletions.
1 change: 1 addition & 0 deletions charts/cni-metrics-helper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The following table lists the configurable parameters for this chart and their d
| image.account | ECR repository account number | 602401143452 |
| image.domain | ECR repository domain | amazonaws.com |
| env.USE_CLOUDWATCH | Whether to export CNI metrics to CloudWatch | true |
| env.USE_PROMETHEUS | Whether to export CNI metrics to Prometheus | false |
| env.AWS_CLUSTER_ID | ID of the cluster to use when exporting metrics to CloudWatch | default |
| env.AWS_VPC_K8S_CNI_LOGLEVEL | Log verbosity level (ie. FATAL, ERROR, WARN, INFO, DEBUG) | INFO |
| env.METRIC_UPDATE_INTERVAL | Interval at which to update CloudWatch metrics, in seconds. | |
Expand Down
1 change: 1 addition & 0 deletions charts/cni-metrics-helper/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ image:

env:
USE_CLOUDWATCH: "true"
USE_PROMETHEUS: "false"
AWS_CLUSTER_ID: ""
AWS_VPC_K8S_CNI_LOGLEVEL: "INFO"

Expand Down
13 changes: 11 additions & 2 deletions cmd/aws-k8s-agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,17 @@ import (
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/eventrecorder"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger"
"github.com/aws/amazon-vpc-cni-k8s/pkg/version"
"github.com/aws/amazon-vpc-cni-k8s/utils"
metrics "github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics"
)

const (
appName = "aws-node"
// metricsPort is the port for prometheus metrics
metricsPort = 61678

// Environment variable to disable the metrics endpoint on 61678
envDisableMetrics = "DISABLE_METRICS"
)

func main() {
Expand Down Expand Up @@ -67,8 +74,10 @@ func _main() int {
// Pool manager
go ipamContext.StartNodeIPPoolManager()

// Prometheus metrics
go ipamContext.ServeMetrics()
if utils.GetBoolAsStringEnvVar(envDisableMetrics, false) {
// Prometheus metrics
go metrics.ServeMetrics(metricsPort)
}

// CNI introspection endpoints
go ipamContext.ServeIntrospection()
Expand Down
46 changes: 42 additions & 4 deletions cmd/cni-metrics-helper/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,34 @@ import (
"github.com/aws/amazon-vpc-cni-k8s/cmd/cni-metrics-helper/metrics"
"github.com/aws/amazon-vpc-cni-k8s/pkg/k8sapi"
"github.com/aws/amazon-vpc-cni-k8s/pkg/publisher"
"github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics"
)

const (
appName = "cni-metrics-helper"

// metricsPort is the port for prometheus metrics
metricsPort = 61681

// Environment variable to enable the metrics endpoint on 61681
envEnablePrometheusMetrics = "USE_PROMETHEUS"
)

var (
prometheusRegistered = false
)

type options struct {
submitCW bool
help bool
submitCW bool
help bool
submitPrometheus bool
}

func prometheusRegister() {
if !prometheusRegistered {
prometheusmetrics.PrometheusRegister()
prometheusRegistered = true
}
}

func main() {
Expand All @@ -52,6 +71,7 @@ func main() {
flags := pflag.NewFlagSet("", pflag.ExitOnError)
flags.AddGoFlagSet(flag.CommandLine)
flags.BoolVar(&options.submitCW, "cloudwatch", true, "a bool")
flags.BoolVar(&options.submitPrometheus, "prometheus metrics", false, "a bool")

flags.Usage = func() {
_, _ = fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
Expand All @@ -77,6 +97,7 @@ func main() {

cwENV, found := os.LookupEnv("USE_CLOUDWATCH")
if found {
cwENV = strings.ToLower(cwENV)
if strings.Compare(cwENV, "yes") == 0 || strings.Compare(cwENV, "true") == 0 {
options.submitCW = true
}
Expand All @@ -85,6 +106,18 @@ func main() {
}
}

prometheusENV, found := os.LookupEnv(envEnablePrometheusMetrics)
if found {
prometheusENV = strings.ToLower(prometheusENV)
if strings.Compare(prometheusENV, "yes") == 0 || strings.Compare(prometheusENV, "true") == 0 {
options.submitPrometheus = true
prometheusRegister()
}
if strings.Compare(prometheusENV, "no") == 0 || strings.Compare(prometheusENV, "false") == 0 {
options.submitPrometheus = false
}
}

metricUpdateIntervalEnv, found := os.LookupEnv("METRIC_UPDATE_INTERVAL")
if !found {
metricUpdateIntervalEnv = "30"
Expand All @@ -103,7 +136,7 @@ func main() {
// should be name/identifier for the cluster if specified
clusterID, _ := os.LookupEnv("AWS_CLUSTER_ID")

log.Infof("Starting CNIMetricsHelper. Sending metrics to CloudWatch: %v, LogLevel %s, metricUpdateInterval %d", options.submitCW, logConfig.LogLevel, metricUpdateInterval)
log.Infof("Starting CNIMetricsHelper. Sending metrics to CloudWatch: %v, Prometheus: %v, LogLevel %s, metricUpdateInterval %d", options.submitCW, options.submitPrometheus, logConfig.LogLevel, metricUpdateInterval)

clientSet, err := k8sapi.GetKubeClientSet()
if err != nil {
Expand All @@ -129,8 +162,13 @@ func main() {
defer cw.Stop()
}

if options.submitPrometheus {
// Start prometheus server
go prometheusmetrics.ServeMetrics(metricsPort)
}

podWatcher := metrics.NewDefaultPodWatcher(k8sClient, log)
var cniMetric = metrics.CNIMetricsNew(clientSet, cw, options.submitCW, log, podWatcher)
var cniMetric = metrics.CNIMetricsNew(clientSet, cw, options.submitCW, options.submitPrometheus, log, podWatcher)

// metric loop
for range time.Tick(time.Duration(metricUpdateInterval) * time.Second) {
Expand Down
32 changes: 19 additions & 13 deletions cmd/cni-metrics-helper/metrics/cni_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,24 +165,26 @@ var InterestingCNIMetrics = map[string]metricsConvert{

// CNIMetricsTarget defines data structure for kube-state-metric target
type CNIMetricsTarget struct {
interestingMetrics map[string]metricsConvert
cwMetricsPublisher publisher.Publisher
kubeClient kubernetes.Interface
podWatcher *defaultPodWatcher
submitCW bool
log logger.Logger
interestingMetrics map[string]metricsConvert
cwMetricsPublisher publisher.Publisher
kubeClient kubernetes.Interface
podWatcher *defaultPodWatcher
submitCW bool
submitPrometheusMetrics bool
log logger.Logger
}

// CNIMetricsNew creates a new metricsTarget
func CNIMetricsNew(k8sClient kubernetes.Interface, cw publisher.Publisher, submitCW bool, l logger.Logger,
func CNIMetricsNew(k8sClient kubernetes.Interface, cw publisher.Publisher, submitCW bool, submitPrometheus bool, l logger.Logger,
watcher *defaultPodWatcher) *CNIMetricsTarget {
return &CNIMetricsTarget{
interestingMetrics: InterestingCNIMetrics,
cwMetricsPublisher: cw,
kubeClient: k8sClient,
podWatcher: watcher,
submitCW: submitCW,
log: l,
interestingMetrics: InterestingCNIMetrics,
cwMetricsPublisher: cw,
kubeClient: k8sClient,
podWatcher: watcher,
submitCW: submitCW,
submitPrometheusMetrics: submitPrometheus,
log: l,
}
}

Expand Down Expand Up @@ -220,3 +222,7 @@ func (t *CNIMetricsTarget) submitCloudWatch() bool {
func (t *CNIMetricsTarget) getLogger() logger.Logger {
return t.log
}

func (t *CNIMetricsTarget) submitPrometheus() bool {
return t.submitPrometheusMetrics
}
2 changes: 1 addition & 1 deletion cmd/cni-metrics-helper/metrics/cni_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func TestCNIMetricsNew(t *testing.T) {
ctx := context.Background()
_, _ = m.clientset.CoreV1().Pods("kube-system").Create(ctx, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Name: "aws-node-1"}}, metav1.CreateOptions{})
//cniMetric := CNIMetricsNew(m.clientset, m.mockPublisher, m.discoverController, false, log)
cniMetric := CNIMetricsNew(m.clientset, m.mockPublisher, false, testLog, m.podWatcher)
cniMetric := CNIMetricsNew(m.clientset, m.mockPublisher, false, false, testLog, m.podWatcher)
assert.NotNil(t, cniMetric)
assert.NotNil(t, cniMetric.getCWMetricsPublisher())
assert.NotEmpty(t, cniMetric.getInterestingMetrics())
Expand Down
74 changes: 49 additions & 25 deletions cmd/cni-metrics-helper/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (

"github.com/aws/amazon-vpc-cni-k8s/pkg/publisher"
"github.com/aws/amazon-vpc-cni-k8s/pkg/utils/logger"
"github.com/aws/amazon-vpc-cni-k8s/utils/prometheusmetrics"
"github.com/prometheus/client_golang/prometheus"
)

type metricMatcher func(metric *dto.Metric) bool
Expand All @@ -38,6 +40,7 @@ type metricsTarget interface {
getCWMetricsPublisher() publisher.Publisher
getTargetList(ctx context.Context) ([]string, error)
submitCloudWatch() bool
submitPrometheus() bool
getLogger() logger.Logger
}

Expand Down Expand Up @@ -319,35 +322,50 @@ func produceCloudWatchMetrics(t metricsTarget, families map[string]*dto.MetricFa
for _, action := range convertMetrics.actions {
switch metricType {
case dto.MetricType_COUNTER:
if t.submitCloudWatch() {
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
case dto.MetricType_GAUGE:
if t.submitCloudWatch() {
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
case dto.MetricType_SUMMARY:
if t.submitCloudWatch() {
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
dataPoint := &cloudwatch.MetricDatum{
MetricName: aws.String(action.cwMetricName),
Unit: aws.String(cloudwatch.StandardUnitCount),
Value: aws.Float64(action.data.curSingleDataPoint),
}
cw.Publish(dataPoint)
case dto.MetricType_HISTOGRAM:
if t.submitCloudWatch() {
produceHistogram(action, cw)
produceHistogram(action, cw)
}
}
}
}

// Prometheus export supports only gauge metrics for now.

func producePrometheusMetrics(t metricsTarget, families map[string]*dto.MetricFamily, convertDef map[string]metricsConvert) {
prometheusCNIMetrics := prometheusmetrics.GetSupportedPrometheusCNIMetricsMapping()
if len(prometheusCNIMetrics) == 0 {
t.getLogger().Infof("Skipping since prometheus mapping is missing")
return
}
for key, family := range families {
convertMetrics := convertDef[key]
metricType := family.GetType()
for _, action := range convertMetrics.actions {
switch metricType {
case dto.MetricType_GAUGE:
metrics, ok := prometheusCNIMetrics[family.GetName()]
if ok {
metrics.(prometheus.Gauge).Set(action.data.curSingleDataPoint)
}
}
}
Expand Down Expand Up @@ -424,6 +442,12 @@ func Handler(ctx context.Context, t metricsTarget) {
t.getLogger().Infof("Skipping 1st poll after reset, error: %v", err)
}

cw := t.getCWMetricsPublisher()
produceCloudWatchMetrics(t, families, interestingMetrics, cw)
if t.submitCloudWatch() {
cw := t.getCWMetricsPublisher()
produceCloudWatchMetrics(t, families, interestingMetrics, cw)
}

if t.submitPrometheus() {
producePrometheusMetrics(t, families, interestingMetrics)
}
}
4 changes: 4 additions & 0 deletions cmd/cni-metrics-helper/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ func (target *testMetricsTarget) submitCloudWatch() bool {
return false
}

func (target *testMetricsTarget) submitPrometheus() bool {
return false
}

func TestAPIServerMetric(t *testing.T) {
testTarget := newTestMetricsTarget("cni_test1.data", InterestingCNIMetrics)
ctx := context.Background()
Expand Down
Loading

0 comments on commit 2ac9e0a

Please sign in to comment.