From e0031164ab6814fbf1af2662107b845c1e43a5a3 Mon Sep 17 00:00:00 2001 From: Robert Grandl Date: Thu, 11 Jul 2024 12:27:29 -0700 Subject: [PATCH] Reduce number of buckets for auto-generated metrics (#780) For the auto-generated histogram metrics, we use a large number of buckets. The largest bucket captures values up to 50 quintillion. However, having a large number of buckets results in a significant size for exported metrics, even if they carry 0 data (as most buckets will have 0 values). This leads to: (1) higher costs for the user (e.g., Google Cloud monitoring charges you based on the volume of metrics exported), and (2) increased system load due to handling large metrics data. This PR reduces the number of buckets to capture values up to 5 billion (5e9), which seems reasonable in practice. E.g., auto-generated metrics using these buckets: 1) http_request_latency_micros - even if this is 10 minutes, the value is still 6e7 2) http_request_bytes_received - http request size of more than 5 GB seem rare 3) http_request_bytes_returned - http response size of more than 5 GB seem rare 4) serviceweaver_method_latency_micros - component method execution of more than 1.3 hours seem rare 5) serviceweaver_method_bytes_request - request size of more than 5 GB seem rare 6) serviceweaver_method_bytes_reply - response size of more than 5 GB seem rare --- godeps.txt | 8 ++++--- http.go | 7 +++--- internal/metrics/generated.go | 43 +++++++++++++++++++++++++++++++++++ internal/metrics/stats.go | 32 ++++++++++++++++++++------ internal/status/dashboard.go | 4 ++-- runtime/codegen/metrics.go | 26 ++++++++------------- runtime/protomsg/handler.go | 7 +++--- 7 files changed, 92 insertions(+), 35 deletions(-) create mode 100644 internal/metrics/generated.go diff --git a/godeps.txt b/godeps.txt index 22770f7c1..7a180436c 100644 --- a/godeps.txt +++ b/godeps.txt @@ -4,6 +4,7 @@ github.com/ServiceWeaver/weaver errors fmt github.com/ServiceWeaver/weaver/internal/control + github.com/ServiceWeaver/weaver/internal/metrics github.com/ServiceWeaver/weaver/internal/reflection github.com/ServiceWeaver/weaver/internal/weaver github.com/ServiceWeaver/weaver/metrics @@ -315,8 +316,7 @@ github.com/ServiceWeaver/weaver/internal/heap container/heap github.com/ServiceWeaver/weaver/internal/metrics context - github.com/ServiceWeaver/weaver/runtime/codegen - github.com/ServiceWeaver/weaver/runtime/logging + fmt github.com/ServiceWeaver/weaver/runtime/metrics strings sync @@ -386,8 +386,8 @@ github.com/ServiceWeaver/weaver/internal/status flag fmt github.com/ServiceWeaver/weaver/internal/files + github.com/ServiceWeaver/weaver/internal/metrics github.com/ServiceWeaver/weaver/internal/traceio - github.com/ServiceWeaver/weaver/runtime/codegen github.com/ServiceWeaver/weaver/runtime/colors github.com/ServiceWeaver/weaver/runtime/logging github.com/ServiceWeaver/weaver/runtime/metrics @@ -742,6 +742,7 @@ github.com/ServiceWeaver/weaver/runtime/codegen errors fmt github.com/ServiceWeaver/weaver/internal/config + github.com/ServiceWeaver/weaver/internal/metrics github.com/ServiceWeaver/weaver/metrics github.com/ServiceWeaver/weaver/runtime github.com/ServiceWeaver/weaver/runtime/protos @@ -875,6 +876,7 @@ github.com/ServiceWeaver/weaver/runtime/protomsg encoding/binary errors fmt + github.com/ServiceWeaver/weaver/internal/metrics github.com/ServiceWeaver/weaver/metrics github.com/ServiceWeaver/weaver/runtime/codegen google.golang.org/protobuf/proto diff --git a/http.go b/http.go index 8e344b0f5..d74bd8a81 100644 --- a/http.go +++ b/http.go @@ -21,6 +21,7 @@ import ( "sync/atomic" "time" + imetrics "github.com/ServiceWeaver/weaver/internal/metrics" "github.com/ServiceWeaver/weaver/metrics" "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" ) @@ -58,17 +59,17 @@ var ( httpRequestLatencyMicros = metrics.NewHistogramMap[httpLabels]( "serviceweaver_http_request_latency_micros", "Duration, in microseconds, of HTTP request execution", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) httpRequestBytesReceived = metrics.NewHistogramMap[httpLabels]( "serviceweaver_http_request_bytes_received", "Number of bytes received by HTTP request handlers", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) httpRequestBytesReturned = metrics.NewHistogramMap[httpLabels]( "serviceweaver_http_request_bytes_returned", "Number of bytes returned by HTTP request handlers", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) ) diff --git a/internal/metrics/generated.go b/internal/metrics/generated.go new file mode 100644 index 000000000..f3df39b17 --- /dev/null +++ b/internal/metrics/generated.go @@ -0,0 +1,43 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +// Names of automatically populated metrics. +const ( + MethodCountsName = "serviceweaver_method_count" + MethodErrorsName = "serviceweaver_method_error_count" + MethodLatenciesName = "serviceweaver_method_latency_micros" + MethodBytesRequestName = "serviceweaver_method_bytes_request" + MethodBytesReplyName = "serviceweaver_method_bytes_reply" +) + +// GeneratedBuckets provides rounded bucket boundaries for histograms +// that will only store non-negative values. +// +// Note that these buckets are intended to be used only by the metrics generated +// by the weaver runtime. +var GeneratedBuckets = []float64{ + // Adjacent buckets differ from each other by 2x or 2.5x. + 1, 2, 5, + 10, 20, 50, + 100, 200, 500, + 1000, 2000, 5000, + 10000, 20000, 50000, + 100000, 200000, 500000, + 1000000, 2000000, 5000000, + 10000000, 20000000, 50000000, + 100000000, 200000000, 500000000, + 1000000000, 2000000000, 5000000000, // i.e., 5e9 +} diff --git a/internal/metrics/stats.go b/internal/metrics/stats.go index a278c084a..f547e3ac0 100644 --- a/internal/metrics/stats.go +++ b/internal/metrics/stats.go @@ -16,12 +16,11 @@ package metrics import ( "context" + "fmt" "strings" "sync" "time" - "github.com/ServiceWeaver/weaver/runtime/codegen" - "github.com/ServiceWeaver/weaver/runtime/logging" "github.com/ServiceWeaver/weaver/runtime/metrics" ) @@ -154,7 +153,7 @@ func (s *StatsProcessor) getSnapshot(snapshot []*metrics.MetricSnapshot) { var comp, method string for k, v := range m.Labels { if k == "component" { - comp = logging.ShortenComponent(v) + comp = shortenComponent(v) } else if k == "method" { method = v } @@ -176,13 +175,13 @@ func (s *StatsProcessor) getSnapshot(snapshot []*metrics.MetricSnapshot) { // Aggregate stats within a bucket, based on metric values from different // replicas for the method. switch m.Name { - case codegen.MethodCountsName: + case MethodCountsName: bucket.calls += m.Value - case codegen.MethodBytesReplyName: + case MethodBytesReplyName: bucket.kbSent += m.Value / 1024 // B to KB - case codegen.MethodBytesRequestName: + case MethodBytesRequestName: bucket.kbRecvd += m.Value / 1024 // B to KB - case codegen.MethodLatenciesName: + case MethodLatenciesName: bucket.latencyMs += m.Value / 1000 // µs to ms var count uint64 @@ -293,3 +292,22 @@ func (s *statsMethod) computeStatsStatusz(startTime time.Time) methodStatuszInfo } return result } + +// shortenComponent shortens the given component name to be of the format +// .. (Recall that the full component name is of the format +// //...//.) +// +// TODO(rgrandl): To avoid cyclic dependencies, we have created a local copy of the +// logging.ShortenComponent method. Consider moving logging.ShortenComponent to a +// different package instead. +func shortenComponent(component string) string { + parts := strings.Split(component, "/") + switch len(parts) { + case 0: // should never happen + return "nil" + case 1: + return parts[0] + default: + return fmt.Sprintf("%s.%s", parts[len(parts)-2], parts[len(parts)-1]) + } +} diff --git a/internal/status/dashboard.go b/internal/status/dashboard.go index 7706b9509..519b2b6da 100644 --- a/internal/status/dashboard.go +++ b/internal/status/dashboard.go @@ -28,8 +28,8 @@ import ( "strings" "time" + metrics2 "github.com/ServiceWeaver/weaver/internal/metrics" "github.com/ServiceWeaver/weaver/internal/traceio" - "github.com/ServiceWeaver/weaver/runtime/codegen" "github.com/ServiceWeaver/weaver/runtime/logging" "github.com/ServiceWeaver/weaver/runtime/metrics" "github.com/ServiceWeaver/weaver/runtime/perfetto" @@ -267,7 +267,7 @@ func computeTraffic(status *Status, metrics []*protos.MetricSnapshot) []edge { } byPair := map[pair]int{} for _, metric := range metrics { - if metric.Name != codegen.MethodCountsName { + if metric.Name != metrics2.MethodCountsName { continue } call := pair{ diff --git a/runtime/codegen/metrics.go b/runtime/codegen/metrics.go index f3d19cd61..1647cb3eb 100644 --- a/runtime/codegen/metrics.go +++ b/runtime/codegen/metrics.go @@ -17,42 +17,34 @@ package codegen import ( "time" + imetrics "github.com/ServiceWeaver/weaver/internal/metrics" "github.com/ServiceWeaver/weaver/metrics" ) -// Names of automatically populated metrics. -const ( - MethodCountsName = "serviceweaver_method_count" - MethodErrorsName = "serviceweaver_method_error_count" - MethodLatenciesName = "serviceweaver_method_latency_micros" - MethodBytesRequestName = "serviceweaver_method_bytes_request" - MethodBytesReplyName = "serviceweaver_method_bytes_reply" -) - var ( // The following metrics are automatically populated for the user. methodCounts = metrics.NewCounterMap[MethodLabels]( - MethodCountsName, + imetrics.MethodCountsName, "Count of Service Weaver component method invocations", ) methodErrors = metrics.NewCounterMap[MethodLabels]( - MethodErrorsName, + imetrics.MethodErrorsName, "Count of Service Weaver component method invocations that result in an error", ) methodLatencies = metrics.NewHistogramMap[MethodLabels]( - MethodLatenciesName, + imetrics.MethodLatenciesName, "Duration, in microseconds, of Service Weaver component method execution", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) methodBytesRequest = metrics.NewHistogramMap[MethodLabels]( - MethodBytesRequestName, + imetrics.MethodBytesRequestName, "Number of bytes in Service Weaver component method requests", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) methodBytesReply = metrics.NewHistogramMap[MethodLabels]( - MethodBytesReplyName, + imetrics.MethodBytesReplyName, "Number of bytes in Service Weaver component method replies", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) ) diff --git a/runtime/protomsg/handler.go b/runtime/protomsg/handler.go index 2dc8f5d79..56bacd478 100644 --- a/runtime/protomsg/handler.go +++ b/runtime/protomsg/handler.go @@ -24,6 +24,7 @@ import ( "runtime/debug" "time" + imetrics "github.com/ServiceWeaver/weaver/internal/metrics" "github.com/ServiceWeaver/weaver/metrics" "google.golang.org/protobuf/proto" ) @@ -40,17 +41,17 @@ var ( httpRequestLatencyMicros = metrics.NewHistogramMap[handlerLabels]( "serviceweaver_system_http_request_latency_micros", "Duration, in microseconds, of Service Weaver HTTP request execution", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) httpRequestBytesReceived = metrics.NewHistogramMap[handlerLabels]( "serviceweaver_system_http_request_bytes_received", "Number of bytes received by Service Weaver HTTP request handlers", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) httpRequestBytesReturned = metrics.NewHistogramMap[handlerLabels]( "serviceweaver_system_http_request_bytes_returned", "Number of bytes returned by Service Weaver HTTP request handlers", - metrics.NonNegativeBuckets, + imetrics.GeneratedBuckets, ) )