-
Notifications
You must be signed in to change notification settings - Fork 584
/
Copy pathmetrics.go
107 lines (95 loc) · 3.7 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics provides a way to capture request metrics.
package metrics
import (
"net/url"
"strconv"
"strings"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/request"
"github.com/prometheus/client_golang/prometheus"
"sigs.k8s.io/controller-runtime/pkg/metrics"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors"
)
const (
metricAWSSubsystem = "aws"
metricRequestCountKey = "api_requests_total"
metricRequestDurationKey = "api_request_duration_seconds"
metricAPICallRetries = "api_call_retries"
metricServiceLabel = "service"
metricRegionLabel = "region"
metricOperationLabel = "operation"
metricControllerLabel = "controller"
metricStatusCodeLabel = "status_code"
metricErrorCodeLabel = "error_code"
)
var (
awsRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Subsystem: metricAWSSubsystem,
Name: metricRequestCountKey,
Help: "Total number of AWS requests",
}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel, metricStatusCodeLabel, metricErrorCodeLabel})
awsRequestDurationSeconds = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Subsystem: metricAWSSubsystem,
Name: metricRequestDurationKey,
Help: "Latency of HTTP requests to AWS",
}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel})
awsCallRetries = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Subsystem: metricAWSSubsystem,
Name: metricAPICallRetries,
Help: "Number of retries made against an AWS API",
Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel})
)
func init() {
metrics.Registry.MustRegister(awsRequestCount)
metrics.Registry.MustRegister(awsRequestDurationSeconds)
metrics.Registry.MustRegister(awsCallRetries)
}
// CaptureRequestMetrics will monitor and capture request metrics.
func CaptureRequestMetrics(controller string) func(r *request.Request) {
return func(r *request.Request) {
duration := time.Since(r.AttemptTime)
operation := r.Operation.Name
region := aws.StringValue(r.Config.Region)
service := endpointToService(r.ClientInfo.Endpoint)
statusCode := "0"
errorCode := ""
if r.HTTPResponse != nil {
statusCode = strconv.Itoa(r.HTTPResponse.StatusCode)
}
if r.Error != nil {
var ok bool
if errorCode, ok = awserrors.Code(r.Error); !ok {
errorCode = "internal"
}
}
awsRequestCount.WithLabelValues(controller, service, region, operation, statusCode, errorCode).Inc()
awsRequestDurationSeconds.WithLabelValues(controller, service, region, operation).Observe(duration.Seconds())
awsCallRetries.WithLabelValues(controller, service, region, operation).Observe(float64(r.RetryCount))
}
}
func endpointToService(endpoint string) string {
endpointURL, err := url.Parse(endpoint)
// If possible extract the service name, else return entire endpoint address
if err == nil {
host := endpointURL.Host
components := strings.Split(host, ".")
if len(components) > 0 {
return components[0]
}
}
return endpoint
}