From b6381b78b9cbb004c3659f4e96042ec8a99635ef Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Thu, 27 Apr 2023 16:01:10 -0400 Subject: [PATCH 001/100] Move hcp client to subpackage hcpclient (#16800) --- agent/consul/server.go | 3 ++- agent/consul/server_test.go | 9 ++++----- agent/hcp/bootstrap/bootstrap.go | 8 ++++---- agent/hcp/{ => client}/client.go | 8 ++++---- agent/hcp/{ => client}/mock_Client.go | 2 +- agent/hcp/deps.go | 5 +++-- agent/hcp/discover/discover.go | 4 ++-- agent/hcp/manager.go | 5 +++-- agent/hcp/manager_test.go | 25 +++++++++++++------------ 9 files changed, 36 insertions(+), 33 deletions(-) rename agent/hcp/{ => client}/client.go (99%) rename agent/hcp/{ => client}/mock_Client.go (99%) diff --git a/agent/consul/server.go b/agent/consul/server.go index e5140fcc2a3a..b6edb58a8f1a 100644 --- a/agent/consul/server.go +++ b/agent/consul/server.go @@ -59,6 +59,7 @@ import ( agentgrpc "github.com/hashicorp/consul/agent/grpc-internal" "github.com/hashicorp/consul/agent/grpc-internal/services/subscribe" "github.com/hashicorp/consul/agent/hcp" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" logdrop "github.com/hashicorp/consul/agent/log-drop" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/pool" @@ -1973,7 +1974,7 @@ func (s *Server) trackLeaderChanges() { // hcpServerStatus is the callback used by the HCP manager to emit status updates to the HashiCorp Cloud Platform when // enabled. func (s *Server) hcpServerStatus(deps Deps) hcp.StatusCallback { - return func(ctx context.Context) (status hcp.ServerStatus, err error) { + return func(ctx context.Context) (status hcpclient.ServerStatus, err error) { status.Name = s.config.NodeName status.ID = string(s.config.NodeID) status.Version = cslversion.GetHumanVersion() diff --git a/agent/consul/server_test.go b/agent/consul/server_test.go index c246428b2f29..1b8da8d1e2cb 100644 --- a/agent/consul/server_test.go +++ b/agent/consul/server_test.go @@ -27,8 +27,6 @@ import ( "golang.org/x/time/rate" "google.golang.org/grpc" - "github.com/hashicorp/consul/agent/hcp" - "github.com/hashicorp/consul-net-rpc/net/rpc" "github.com/hashicorp/consul/agent/connect" @@ -36,6 +34,7 @@ import ( rpcRate "github.com/hashicorp/consul/agent/consul/rate" external "github.com/hashicorp/consul/agent/grpc-external" grpcmiddleware "github.com/hashicorp/consul/agent/grpc-middleware" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/metadata" "github.com/hashicorp/consul/agent/rpc/middleware" "github.com/hashicorp/consul/agent/structs" @@ -2082,10 +2081,10 @@ func TestServer_hcpManager(t *testing.T) { _, conf1 := testServerConfig(t) conf1.BootstrapExpect = 1 conf1.RPCAdvertise = &net.TCPAddr{IP: []byte{127, 0, 0, 2}, Port: conf1.RPCAddr.Port} - hcp1 := hcp.NewMockClient(t) - hcp1.EXPECT().PushServerStatus(mock.Anything, mock.MatchedBy(func(status *hcp.ServerStatus) bool { + hcp1 := hcpclient.NewMockClient(t) + hcp1.EXPECT().PushServerStatus(mock.Anything, mock.MatchedBy(func(status *hcpclient.ServerStatus) bool { return status.ID == string(conf1.NodeID) - })).Run(func(ctx context.Context, status *hcp.ServerStatus) { + })).Run(func(ctx context.Context, status *hcpclient.ServerStatus) { require.Equal(t, status.LanAddress, "127.0.0.2") }).Call.Return(nil) diff --git a/agent/hcp/bootstrap/bootstrap.go b/agent/hcp/bootstrap/bootstrap.go index 55e1231f5cf4..20a485ea38f2 100644 --- a/agent/hcp/bootstrap/bootstrap.go +++ b/agent/hcp/bootstrap/bootstrap.go @@ -19,7 +19,7 @@ import ( "time" "github.com/hashicorp/consul/agent/config" - "github.com/hashicorp/consul/agent/hcp" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/lib" "github.com/hashicorp/consul/lib/retry" ) @@ -128,9 +128,9 @@ func doHCPBootstrap(ctx context.Context, rc *config.RuntimeConfig, ui UI) (strin Jitter: retry.NewJitter(50), } - var bsCfg *hcp.BootstrapConfig + var bsCfg *hcpclient.BootstrapConfig - client, err := hcp.NewClient(rc.Cloud) + client, err := hcpclient.NewClient(rc.Cloud) if err != nil { return "", err } @@ -191,7 +191,7 @@ func doHCPBootstrap(ctx context.Context, rc *config.RuntimeConfig, ui UI) (strin return cfgJSON, nil } -func persistTLSCerts(dataDir string, bsCfg *hcp.BootstrapConfig) error { +func persistTLSCerts(dataDir string, bsCfg *hcpclient.BootstrapConfig) error { dir := filepath.Join(dataDir, subDir) if bsCfg.TLSCert == "" || bsCfg.TLSCertKey == "" { diff --git a/agent/hcp/client.go b/agent/hcp/client/client.go similarity index 99% rename from agent/hcp/client.go rename to agent/hcp/client/client.go index e48bb0f65f72..f4e20746a07c 100644 --- a/agent/hcp/client.go +++ b/agent/hcp/client/client.go @@ -1,7 +1,7 @@ // Copyright (c) HashiCorp, Inc. // SPDX-License-Identifier: MPL-2.0 -package hcp +package client import ( "context" @@ -11,13 +11,13 @@ import ( httptransport "github.com/go-openapi/runtime/client" "github.com/go-openapi/strfmt" + + "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/consul/version" hcpgnm "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/client/global_network_manager_service" gnmmod "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/models" "github.com/hashicorp/hcp-sdk-go/httpclient" "github.com/hashicorp/hcp-sdk-go/resource" - - "github.com/hashicorp/consul/agent/hcp/config" - "github.com/hashicorp/consul/version" ) // Client interface exposes HCP operations that can be invoked by Consul diff --git a/agent/hcp/mock_Client.go b/agent/hcp/client/mock_Client.go similarity index 99% rename from agent/hcp/mock_Client.go rename to agent/hcp/client/mock_Client.go index 29bd27cbf1bc..27eb35a747cb 100644 --- a/agent/hcp/mock_Client.go +++ b/agent/hcp/client/mock_Client.go @@ -1,6 +1,6 @@ // Code generated by mockery v2.15.0. DO NOT EDIT. -package hcp +package client import ( context "context" diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 6575b8d67980..b4d67154fb45 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -4,6 +4,7 @@ package hcp import ( + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/go-hclog" @@ -11,12 +12,12 @@ import ( // Deps contains the interfaces that the rest of Consul core depends on for HCP integration. type Deps struct { - Client Client + Client hcpclient.Client Provider scada.Provider } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { - d.Client, err = NewClient(cfg) + d.Client, err = hcpclient.NewClient(cfg) if err != nil { return } diff --git a/agent/hcp/discover/discover.go b/agent/hcp/discover/discover.go index 43bf0c771910..12024b7dd6a0 100644 --- a/agent/hcp/discover/discover.go +++ b/agent/hcp/discover/discover.go @@ -9,7 +9,7 @@ import ( "log" "time" - "github.com/hashicorp/consul/agent/hcp" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" ) @@ -32,7 +32,7 @@ func (p *Provider) Addrs(args map[string]string, l *log.Logger) ([]string, error return nil, err } - client, err := hcp.NewClient(cfg.CloudConfig) + client, err := hcpclient.NewClient(cfg.CloudConfig) if err != nil { return nil, err } diff --git a/agent/hcp/manager.go b/agent/hcp/manager.go index 9d5a2b44abc9..0dc9db95da29 100644 --- a/agent/hcp/manager.go +++ b/agent/hcp/manager.go @@ -8,6 +8,7 @@ import ( "sync" "time" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/lib" "github.com/hashicorp/go-hclog" ) @@ -18,7 +19,7 @@ var ( ) type ManagerConfig struct { - Client Client + Client hcpclient.Client StatusFn StatusCallback MinInterval time.Duration @@ -47,7 +48,7 @@ func (cfg *ManagerConfig) nextHeartbeat() time.Duration { return min + lib.RandomStagger(max-min) } -type StatusCallback func(context.Context) (ServerStatus, error) +type StatusCallback func(context.Context) (hcpclient.ServerStatus, error) type Manager struct { logger hclog.Logger diff --git a/agent/hcp/manager_test.go b/agent/hcp/manager_test.go index 41530b28af24..4a3bdf582c49 100644 --- a/agent/hcp/manager_test.go +++ b/agent/hcp/manager_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" @@ -15,12 +16,12 @@ import ( ) func TestManager_Run(t *testing.T) { - client := NewMockClient(t) - statusF := func(ctx context.Context) (ServerStatus, error) { - return ServerStatus{ID: t.Name()}, nil + client := hcpclient.NewMockClient(t) + statusF := func(ctx context.Context) (hcpclient.ServerStatus, error) { + return hcpclient.ServerStatus{ID: t.Name()}, nil } updateCh := make(chan struct{}, 1) - client.EXPECT().PushServerStatus(mock.Anything, &ServerStatus{ID: t.Name()}).Return(nil).Once() + client.EXPECT().PushServerStatus(mock.Anything, &hcpclient.ServerStatus{ID: t.Name()}).Return(nil).Once() mgr := NewManager(ManagerConfig{ Client: client, Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), @@ -43,14 +44,14 @@ func TestManager_Run(t *testing.T) { } func TestManager_SendUpdate(t *testing.T) { - client := NewMockClient(t) - statusF := func(ctx context.Context) (ServerStatus, error) { - return ServerStatus{ID: t.Name()}, nil + client := hcpclient.NewMockClient(t) + statusF := func(ctx context.Context) (hcpclient.ServerStatus, error) { + return hcpclient.ServerStatus{ID: t.Name()}, nil } updateCh := make(chan struct{}, 1) // Expect two calls, once during run startup and again when SendUpdate is called - client.EXPECT().PushServerStatus(mock.Anything, &ServerStatus{ID: t.Name()}).Return(nil).Twice() + client.EXPECT().PushServerStatus(mock.Anything, &hcpclient.ServerStatus{ID: t.Name()}).Return(nil).Twice() mgr := NewManager(ManagerConfig{ Client: client, Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), @@ -73,14 +74,14 @@ func TestManager_SendUpdate(t *testing.T) { } func TestManager_SendUpdate_Periodic(t *testing.T) { - client := NewMockClient(t) - statusF := func(ctx context.Context) (ServerStatus, error) { - return ServerStatus{ID: t.Name()}, nil + client := hcpclient.NewMockClient(t) + statusF := func(ctx context.Context) (hcpclient.ServerStatus, error) { + return hcpclient.ServerStatus{ID: t.Name()}, nil } updateCh := make(chan struct{}, 1) // Expect two calls, once during run startup and again when SendUpdate is called - client.EXPECT().PushServerStatus(mock.Anything, &ServerStatus{ID: t.Name()}).Return(nil).Twice() + client.EXPECT().PushServerStatus(mock.Anything, &hcpclient.ServerStatus{ID: t.Name()}).Return(nil).Twice() mgr := NewManager(ManagerConfig{ Client: client, Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), From 5b82711ef6c0257eb4480f8056bcc8d1f5da97fe Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Mon, 8 May 2023 09:51:10 -0400 Subject: [PATCH 002/100] [HCP Observability] New MetricsClient (#17100) * Client configured with TLS using HCP config and retry/throttle * Add tests and godoc for metrics client * close body after request * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * remove clone * Extract CloudConfig and mock for future PR * Switch to hclog.FromContext --- agent/hcp/client/metrics_client.go | 152 ++++++++++++++++++++++++ agent/hcp/client/metrics_client_test.go | 107 +++++++++++++++++ agent/hcp/client/mock_CloudConfig.go | 40 +++++++ go.mod | 5 +- go.sum | 12 +- 5 files changed, 313 insertions(+), 3 deletions(-) create mode 100644 agent/hcp/client/metrics_client.go create mode 100644 agent/hcp/client/metrics_client_test.go create mode 100644 agent/hcp/client/mock_CloudConfig.go diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go new file mode 100644 index 000000000000..15bd71097f79 --- /dev/null +++ b/agent/hcp/client/metrics_client.go @@ -0,0 +1,152 @@ +package client + +import ( + "bytes" + "context" + "fmt" + "io" + "net/http" + "time" + + "github.com/hashicorp/go-cleanhttp" + "github.com/hashicorp/go-hclog" + "github.com/hashicorp/go-retryablehttp" + hcpcfg "github.com/hashicorp/hcp-sdk-go/config" + colmetricpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1" + metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" + "golang.org/x/oauth2" + "google.golang.org/protobuf/proto" +) + +const ( + // HTTP Client config + defaultStreamTimeout = 15 * time.Second + + // Retry config + // TODO: Evenutally, we'd like to configure these values dynamically. + defaultRetryWaitMin = 1 * time.Second + defaultRetryWaitMax = 15 * time.Second + defaultRetryMax = 4 +) + +// MetricsClient exports Consul metrics in OTLP format to the HCP Telemetry Gateway. +type MetricsClient interface { + ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error +} + +// cloudConfig represents cloud config for TLS abstracted in an interface for easy testing. +type CloudConfig interface { + HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) +} + +// otlpClient is an implementation of MetricsClient with a retryable http client for retries and to honor throttle. +// It also holds default HTTP headers to add to export requests. +type otlpClient struct { + client *retryablehttp.Client + header *http.Header +} + +// NewMetricsClient returns a configured MetricsClient. +// The current implementation uses otlpClient to provide retry functionality. +func NewMetricsClient(cfg CloudConfig, ctx context.Context) (MetricsClient, error) { + if cfg == nil { + return nil, fmt.Errorf("failed to init telemetry client: provide valid cloudCfg (Cloud Configuration for TLS)") + } + + if ctx == nil { + return nil, fmt.Errorf("failed to init telemetry client: provide a valid context") + } + + logger := hclog.FromContext(ctx) + + c, err := newHTTPClient(cfg, logger) + if err != nil { + return nil, fmt.Errorf("failed to init telemetry client: %v", err) + } + + header := make(http.Header) + header.Set("Content-Type", "application/x-protobuf") + + return &otlpClient{ + client: c, + header: &header, + }, nil +} + +// newHTTPClient configures the retryable HTTP client. +func newHTTPClient(cloudCfg CloudConfig, logger hclog.Logger) (*retryablehttp.Client, error) { + hcpCfg, err := cloudCfg.HCPConfig() + if err != nil { + return nil, err + } + + tlsTransport := cleanhttp.DefaultPooledTransport() + tlsTransport.TLSClientConfig = hcpCfg.APITLSConfig() + + var transport http.RoundTripper = &oauth2.Transport{ + Base: tlsTransport, + Source: hcpCfg, + } + + client := &http.Client{ + Transport: transport, + Timeout: defaultStreamTimeout, + } + + retryClient := &retryablehttp.Client{ + HTTPClient: client, + Logger: logger.Named("hcp_telemetry_client"), + RetryWaitMin: defaultRetryWaitMin, + RetryWaitMax: defaultRetryWaitMax, + RetryMax: defaultRetryMax, + CheckRetry: retryablehttp.DefaultRetryPolicy, + Backoff: retryablehttp.DefaultBackoff, + } + + return retryClient, nil +} + +// ExportMetrics is the single method exposed by MetricsClient to export OTLP metrics to the desired HCP endpoint. +// The endpoint is configurable as the endpoint can change during periodic refresh of CCM telemetry config. +// By configuring the endpoint here, we can re-use the same client and override the endpoint when making a request. +func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + pbRequest := &colmetricpb.ExportMetricsServiceRequest{ + ResourceMetrics: []*metricpb.ResourceMetrics{protoMetrics}, + } + + body, err := proto.Marshal(pbRequest) + if err != nil { + return fmt.Errorf("failed to export metrics: %v", err) + } + + req, err := retryablehttp.NewRequest(http.MethodPost, endpoint, bytes.NewBuffer(body)) + if err != nil { + return fmt.Errorf("failed to export metrics: %v", err) + } + req.Header = *o.header + + resp, err := o.client.Do(req.WithContext(ctx)) + if err != nil { + return fmt.Errorf("failed to export metrics: %v", err) + } + defer resp.Body.Close() + + var respData bytes.Buffer + if _, err := io.Copy(&respData, resp.Body); err != nil { + return fmt.Errorf("failed to export metrics: %v", err) + } + + if respData.Len() != 0 { + var respProto colmetricpb.ExportMetricsServiceResponse + if err := proto.Unmarshal(respData.Bytes(), &respProto); err != nil { + return fmt.Errorf("failed to export metrics: %v", err) + } + + if respProto.PartialSuccess != nil { + msg := respProto.PartialSuccess.GetErrorMessage() + return fmt.Errorf("failed to export metrics: partial success: %s", msg) + } + } + + return nil +} diff --git a/agent/hcp/client/metrics_client_test.go b/agent/hcp/client/metrics_client_test.go new file mode 100644 index 000000000000..7c64d731d0b1 --- /dev/null +++ b/agent/hcp/client/metrics_client_test.go @@ -0,0 +1,107 @@ +package client + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/require" + colpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1" + metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" + "google.golang.org/protobuf/proto" +) + +func TestNewMetricsClient(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + cfg CloudConfig + ctx context.Context + }{ + "success": { + cfg: &MockCloudCfg{}, + ctx: context.Background(), + }, + "failsWithoutCloudCfg": { + wantErr: "failed to init telemetry client: provide valid cloudCfg (Cloud Configuration for TLS)", + cfg: nil, + ctx: context.Background(), + }, + "failsWithoutContext": { + wantErr: "failed to init telemetry client: provide a valid context", + cfg: MockCloudCfg{}, + ctx: nil, + }, + "failsHCPConfig": { + wantErr: "failed to init telemetry client", + cfg: MockErrCloudCfg{}, + ctx: context.Background(), + }, + } { + t.Run(name, func(t *testing.T) { + client, err := NewMetricsClient(test.cfg, test.ctx) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.Nil(t, err) + require.NotNil(t, client) + }) + } +} + +func TestExportMetrics(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + status int + }{ + "success": { + status: http.StatusOK, + }, + "failsWithNonRetryableError": { + status: http.StatusBadRequest, + wantErr: "failed to export metrics", + }, + } { + t.Run(name, func(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + require.Equal(t, r.Header.Get("Content-Type"), "application/x-protobuf") + + require.Equal(t, r.Header.Get("Authorization"), "Bearer test-token") + + body := colpb.ExportMetricsServiceResponse{} + + if test.wantErr != "" { + body.PartialSuccess = &colpb.ExportMetricsPartialSuccess{ + ErrorMessage: "partial failure", + } + } + bytes, err := proto.Marshal(&body) + + require.NoError(t, err) + + w.Header().Set("Content-Type", "application/x-protobuf") + w.WriteHeader(test.status) + w.Write(bytes) + })) + defer srv.Close() + + client, err := NewMetricsClient(MockCloudCfg{}, context.Background()) + require.NoError(t, err) + + ctx := context.Background() + metrics := &metricpb.ResourceMetrics{} + err = client.ExportMetrics(ctx, metrics, srv.URL) + + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NoError(t, err) + }) + } +} diff --git a/agent/hcp/client/mock_CloudConfig.go b/agent/hcp/client/mock_CloudConfig.go new file mode 100644 index 000000000000..ed6e3358f7db --- /dev/null +++ b/agent/hcp/client/mock_CloudConfig.go @@ -0,0 +1,40 @@ +package client + +import ( + "crypto/tls" + "errors" + "net/url" + + hcpcfg "github.com/hashicorp/hcp-sdk-go/config" + "golang.org/x/oauth2" +) + +type mockHCPCfg struct{} + +func (m *mockHCPCfg) Token() (*oauth2.Token, error) { + return &oauth2.Token{ + AccessToken: "test-token", + }, nil +} + +func (m *mockHCPCfg) APITLSConfig() *tls.Config { return nil } + +func (m *mockHCPCfg) SCADAAddress() string { return "" } + +func (m *mockHCPCfg) SCADATLSConfig() *tls.Config { return &tls.Config{} } + +func (m *mockHCPCfg) APIAddress() string { return "" } + +func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} } + +type MockCloudCfg struct{} + +func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { + return &mockHCPCfg{}, nil +} + +type MockErrCloudCfg struct{} + +func (m MockErrCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { + return nil, errors.New("test bad HCP config") +} diff --git a/go.mod b/go.mod index d78707f8c9a6..b3f1a2faa353 100644 --- a/go.mod +++ b/go.mod @@ -53,6 +53,7 @@ require ( github.com/hashicorp/go-memdb v1.3.4 github.com/hashicorp/go-multierror v1.1.1 github.com/hashicorp/go-raftchunking v0.7.0 + github.com/hashicorp/go-retryablehttp v0.6.7 github.com/hashicorp/go-secure-stdlib/awsutil v0.1.6 github.com/hashicorp/go-sockaddr v1.0.2 github.com/hashicorp/go-syslog v1.0.0 @@ -95,6 +96,7 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 + go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 @@ -167,11 +169,11 @@ require ( github.com/googleapis/gax-go/v2 v2.1.0 // indirect github.com/googleapis/gnostic v0.2.0 // indirect github.com/gophercloud/gophercloud v0.3.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/go-msgpack v0.5.5 // indirect github.com/hashicorp/go-msgpack/v2 v2.0.0 // indirect github.com/hashicorp/go-plugin v1.4.5 // indirect - github.com/hashicorp/go-retryablehttp v0.6.7 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/go-secure-stdlib/mlock v0.1.1 // indirect github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6 // indirect @@ -225,7 +227,6 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/proto/otlp v0.7.0 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 964f00fa91e0..2d08b9447dfe 100644 --- a/go.sum +++ b/go.sum @@ -193,8 +193,10 @@ github.com/cloudflare/cloudflare-go v0.10.2/go.mod h1:qhVI5MKwBGhdNU89ZRz2plgYut github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk= +github.com/cncf/udpa/go v0.0.0-20210930031921-04548b0d99d4/go.mod h1:6pvJx4me5XPnfI9Z40ddWsdw2W/uZgQLFXToKeRcDiI= github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20210805033703-aa0b78936158/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= +github.com/cncf/xds/go v0.0.0-20210922020428-25de7278fc84/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211001041855-01bcc9b48dfe/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1 h1:zH8ljVhhq7yC0MIeUL/IviMtY8hx2mK8cN9wEYb8ggw= github.com/cncf/xds/go v0.0.0-20211011173535-cb28da3451f1/go.mod h1:eXthEFrGJvWHgFFCl3hGmgk+/aYT6PnTQLykKQRLhEs= @@ -387,6 +389,8 @@ github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/goji/httpauth v0.0.0-20160601135302-2da839ab0f4d/go.mod h1:nnjvkQ9ptGaCkuDUx6wNykzzlUixGxvkme+H/lnzb+A= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -507,6 +511,8 @@ github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/grpc-ecosystem/grpc-opentracing v0.0.0-20180507213350-8e809c8a8645/go.mod h1:6iZfnjpejD4L/4DwD7NryNaJyCQdzwWwH2MWhCA90Kw= github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI= github.com/hashicorp/consul-awsauth v0.0.0-20220713182709-05ac1c5c2706 h1:1ZEjnveDe20yFa6lSkfdQZm5BR/b271n0MsB5R2L3us= @@ -1068,8 +1074,9 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/proto/otlp v0.7.0 h1:rwOQPCuKAKmwGKq2aVNnYIibI6wnV7EvzgfTCzcdGg8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= +go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= @@ -1220,6 +1227,7 @@ golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210628180205-a41e5a781914/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 h1:lxqLZaMad/dJHMFZH0NiNpiEZI/nhgWhe4wgzpE+MuA= golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg= @@ -1528,6 +1536,7 @@ google.golang.org/genproto v0.0.0-20210828152312-66f60bf46e71/go.mod h1:eFjDcFEc google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210903162649-d08c68adba83/go.mod h1:eFjDcFEctNawg4eG61bRv87N7iHBWyVhJu7u1kqDUXY= google.golang.org/genproto v0.0.0-20210924002016-3dee208752a0/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= +google.golang.org/genproto v0.0.0-20211118181313-81c1377c94b1/go.mod h1:5CzLGKJ67TSI2B9POpiiyGha0AjJvZIUgRMt1dSmuhc= google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 h1:K1zaaMdYBXRyX+cwFnxj7M6zwDyumLQMZ5xqwGvjreQ= google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737/go.mod h1:2r/26NEF3bFmT3eC3aZreahSal0C3Shl8Gi6vyDYqOQ= google.golang.org/grpc v1.8.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= @@ -1561,6 +1570,7 @@ google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.41.0/go.mod h1:U3l9uK9J0sini8mHphKoXyaqDA/8VyGnDee1zzIUK6k= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.49.0 h1:WTLtQzmQori5FUH25Pq4WT22oCsv8USpQ+F6rqtsmxw= google.golang.org/grpc v1.49.0/go.mod h1:ZgQEeidpAuNRZ8iRrlBKXZQP1ghovWIVhdJRyCDK+GI= google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.1.0/go.mod h1:6Kw0yEErY5E/yWrBtf03jp27GLLJujG4z/JK95pnjjw= From 8a1631145271a7448ffb442e2fe982f64ccce639 Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Mon, 8 May 2023 09:51:40 -0400 Subject: [PATCH 003/100] [HCP Observability] New MetricsClient (#17100) * Client configured with TLS using HCP config and retry/throttle * Add tests and godoc for metrics client * close body after request * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * remove clone * Extract CloudConfig and mock for future PR * Switch to hclog.FromContext From c7945b35c2abeaf2fa2641a2be0d95582368a8c6 Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Mon, 8 May 2023 09:52:01 -0400 Subject: [PATCH 004/100] [HCP Observability] New MetricsClient (#17100) * Client configured with TLS using HCP config and retry/throttle * Add tests and godoc for metrics client * close body after request * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * remove clone * Extract CloudConfig and mock for future PR * Switch to hclog.FromContext From e4f076186f9a256c3ac86360a79e33714b0dad1b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 005/100] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 2d08b9447dfe..abf354726196 100644 --- a/go.sum +++ b/go.sum @@ -510,6 +510,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 8bbc289690ec56799a9aa7aa100a44224a17ad5a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 006/100] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index abf354726196..2d08b9447dfe 100644 --- a/go.sum +++ b/go.sum @@ -510,7 +510,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From f6182b4cc3cd25372ca19e07ae0b0a379114b32a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 01:17:14 -0400 Subject: [PATCH 007/100] Remove one abstraction to use the config from deps --- agent/hcp/client/metrics_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 15bd71097f79..ba50f28626e6 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,7 +129,6 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } - defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 009f08e75fb7ab30f19be048be8b26956134c128 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 13:47:35 -0400 Subject: [PATCH 008/100] Address PR feedback --- agent/hcp/client/metrics_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index ba50f28626e6..15bd71097f79 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,6 +129,7 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } + defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 41ba7ee52c636bfc4ea18e0e1c1791f756d92e1a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 009/100] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 2d08b9447dfe..abf354726196 100644 --- a/go.sum +++ b/go.sum @@ -510,6 +510,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 8fd34e463c83c064f63997d2713d767b1fa5f9fd Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 010/100] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index abf354726196..2d08b9447dfe 100644 --- a/go.sum +++ b/go.sum @@ -510,7 +510,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 894cef471487797fe2e6948e0b2bd5cfec992702 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 24 Apr 2023 10:57:59 -0400 Subject: [PATCH 011/100] Create new OTELExporter which uses the MetricsClient Add transform because the conversion is in an /internal package --- agent/hcp/telemetry/otel_exporter.go | 53 +++++ agent/hcp/telemetry/otel_exporter_test.go | 133 +++++++++++ agent/hcp/telemetry/otlp_transform.go | 168 ++++++++++++++ agent/hcp/telemetry/otlp_transform_test.go | 258 +++++++++++++++++++++ go.mod | 11 +- go.sum | 22 +- 6 files changed, 640 insertions(+), 5 deletions(-) create mode 100644 agent/hcp/telemetry/otel_exporter.go create mode 100644 agent/hcp/telemetry/otel_exporter_test.go create mode 100644 agent/hcp/telemetry/otlp_transform.go create mode 100644 agent/hcp/telemetry/otlp_transform_test.go diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go new file mode 100644 index 000000000000..365c316717b1 --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter.go @@ -0,0 +1,53 @@ +package telemetry + +import ( + "context" + + hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-multierror" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" +) + +// OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. +// The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. +// This allows us to use a custom client - HCP authenticated MetricsClient. +type OTELExporter struct { + client hcpclient.MetricsClient + endpoint string +} + +// Temporality returns the Cumulative temporality for metrics aggregation. +// Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. +func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { + return metricdata.CumulativeTemporality +} + +// Aggregation returns the Aggregation to use for an instrument kind. +func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { + switch kind { + case metric.InstrumentKindObservableGauge: + return aggregation.LastValue{} + case metric.InstrumentKindHistogram: + return aggregation.ExplicitBucketHistogram{ + Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, + NoMinMax: false, + } + } + // for metric.InstrumentKindCounter and others, default to sum. + return aggregation.Sum{} +} + +// Export serializes and transmits metric data to a receiver. +func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + otlpMetrics, merr := transformOTLP(metrics) + err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) + return multierror.Append(merr, err) +} + +// ForceFlush does nothing, as the MetricsClient client holds no state. +func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } + +// Shutdown does nothing, as the MetricsClient is a HTTP client that requires no graceful shutdown. +func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go new file mode 100644 index 000000000000..07e959593a4d --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -0,0 +1,133 @@ +package telemetry + +import ( + "context" + "fmt" + "testing" + + "github.com/hashicorp/consul/agent/hcp/client" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" +) + +func TestTemporality(t *testing.T) { + exp := &OTELExporter{} + require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) +} + +func TestAggregation(t *testing.T) { + for name, test := range map[string]struct { + kind metric.InstrumentKind + expAgg aggregation.Aggregation + }{ + "gauge": { + kind: metric.InstrumentKindObservableGauge, + expAgg: aggregation.LastValue{}, + }, + "counter": { + kind: metric.InstrumentKindCounter, + expAgg: aggregation.Sum{}, + }, + "histogram": { + kind: metric.InstrumentKindHistogram, + expAgg: aggregation.ExplicitBucketHistogram{Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, NoMinMax: false}, + }, + } { + t.Run(name, func(t *testing.T) { + exp := &OTELExporter{} + require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) + }) + } +} + +type mockErrMetricsClient struct{} + +func (m *mockErrMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + return fmt.Errorf("failed to export metrics") +} + +type mockMetricsClient struct{} + +func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + return nil +} + +func TestExport(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + metrics *metricdata.ResourceMetrics + client client.MetricsClient + }{ + "errorWithExportFailure": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + }, + wantErr: "failed to export metrics", + }, + "errorWithTransformFailure": { + wantErr: "unknown aggregation: metricdata.Gauge[int64]", + client: &mockMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + // unsupported, only float64 supported + Data: metricdata.Gauge[int64]{}, + }, + }, + }, + }, + }, + }, + "multierrorTransformExportFailure": { + wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + // unsupported, only float64 supported + Data: metricdata.Gauge[int64]{}, + }, + }, + }, + }, + }, + }, + } { + t.Run(name, func(t *testing.T) { + exp := &OTELExporter{ + client: test.client, + } + + err := exp.Export(context.Background(), test.metrics) + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestForceFlush(t *testing.T) { + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.Error(t, exp.ForceFlush(ctx)) +} + +func TestShutdown(t *testing.T) { + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + require.Error(t, exp.Shutdown(ctx)) +} diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go new file mode 100644 index 000000000000..4a81bf96706d --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform.go @@ -0,0 +1,168 @@ +package telemetry + +import ( + "fmt" + + "github.com/hashicorp/go-multierror" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +// TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm +// contains invalid ScopeMetrics, an error will be returned along with an OTLP +// ResourceMetrics that contains partial OTLP ScopeMetrics. +func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) { + sms, err := scopeMetrics(rm.ScopeMetrics) + return &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: attributes(rm.Resource.Iter()), + }, + ScopeMetrics: sms, + }, err +} + +// scopeMetrics returns a slice of OTLP ScopeMetrics. +func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { + var merr *multierror.Error + out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) + for _, sm := range scopeMetrics { + ms, err := metrics(sm.Metrics) + if err != nil { + merr = multierror.Append(merr, err) + } + + out = append(out, &mpb.ScopeMetrics{ + Scope: &cpb.InstrumentationScope{ + Name: sm.Scope.Name, + Version: sm.Scope.Version, + }, + Metrics: ms, + }) + } + return out, merr +} + +// metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. +func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { + var merr *multierror.Error + out := make([]*mpb.Metric, 0, len(metrics)) + for _, m := range metrics { + o, err := metricType(m) + if err != nil { + merr = multierror.Append(merr, err) + continue + } + out = append(out, o) + } + return out, merr +} + +// metricType identifies the instrument type and converts it to OTLP format. +// only float64 values are accepted since the go metrics sink only receives float64 values. +func metricType(m metricdata.Metrics) (*mpb.Metric, error) { + var err error + out := &mpb.Metric{ + Name: m.Name, + Description: m.Description, + Unit: string(m.Unit), + } + switch a := m.Data.(type) { + case metricdata.Gauge[float64]: + out.Data = &mpb.Metric_Gauge{ + Gauge: &mpb.Gauge{ + DataPoints: dataPoints(a.DataPoints), + }, + } + case metricdata.Sum[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + } + out.Data = &mpb.Metric_Sum{ + Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: a.IsMonotonic, + DataPoints: dataPoints(a.DataPoints), + }, + } + case metricdata.Histogram[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + } + out.Data = &mpb.Metric_Histogram{ + Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: histogramDataPoints(a.DataPoints), + }, + } + default: + return out, fmt.Errorf("%s: %T", "unknown aggregation", a) + } + return out, err +} + +// DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. +func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { + out := make([]*mpb.NumberDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + ndp := &mpb.NumberDataPoint{ + Attributes: attributes(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + } + + ndp.Value = &mpb.NumberDataPoint_AsDouble{ + AsDouble: dp.Value, + } + out = append(out, ndp) + } + return out +} + +// HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. +func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { + out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + sum := dp.Sum + hdp := &mpb.HistogramDataPoint{ + Attributes: attributes(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + Count: dp.Count, + Sum: &sum, + BucketCounts: dp.BucketCounts, + ExplicitBounds: dp.Bounds, + } + if v, ok := dp.Min.Value(); ok { + hdp.Min = &v + } + if v, ok := dp.Max.Value(); ok { + hdp.Max = &v + } + out = append(out, hdp) + } + return out +} + +// attributes transforms items of an attribute iterator into OTLP key-values. +// Currently, labels are only key-value pairs. +func attributes(iter attribute.Iterator) []*cpb.KeyValue { + l := iter.Len() + if iter.Len() == 0 { + return nil + } + + out := make([]*cpb.KeyValue, 0, l) + for iter.Next() { + kv := iter.Attribute() + av := &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{ + StringValue: kv.Value.AsString(), + }, + } + out = append(out, &cpb.KeyValue{Key: string(kv.Key), Value: av}) + } + return out +} diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go new file mode 100644 index 000000000000..6afc030f984f --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -0,0 +1,258 @@ +package telemetry + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/instrumentation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.17.0" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +var ( + + // Common attributes for test cases. + start = time.Date(2000, time.January, 01, 0, 0, 0, 0, time.FixedZone("GMT", 0)) + end = start.Add(30 * time.Second) + + alice = attribute.NewSet(attribute.String("user", "alice")) + bob = attribute.NewSet(attribute.String("user", "bob")) + + pbAlice = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "alice"}, + }} + pbBob = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "bob"}, + }} + + // DataPoint test case : Histogram Datapoints (Histogram) + minA, maxA, sumA = 2.0, 4.0, 90.0 + minB, maxB, sumB = 4.0, 150.0, 234.0 + otelHDP = []metricdata.HistogramDataPoint[float64]{{ + Attributes: alice, + StartTime: start, + Time: end, + Count: 30, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: metricdata.NewExtrema(minA), + Max: metricdata.NewExtrema(maxA), + Sum: sumA, + }, { + Attributes: bob, + StartTime: start, + Time: end, + Count: 3, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: metricdata.NewExtrema(minB), + Max: metricdata.NewExtrema(maxB), + Sum: sumB, + }} + + otlpHDP = []*mpb.HistogramDataPoint{{ + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 30, + Sum: &sumA, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: &minA, + Max: &maxA, + }, { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 3, + Sum: &sumB, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: &minB, + Max: &maxB, + }} + // DataPoint test case : Number Datapoints (Gauge / Counter) + otelDP = []metricdata.DataPoint[float64]{ + {Attributes: alice, StartTime: start, Time: end, Value: 1.0}, + {Attributes: bob, StartTime: start, Time: end, Value: 2.0}, + } + + otlpDP = []*mpb.NumberDataPoint{ + { + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 1.0}, + }, + { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 2.0}, + }, + } + + // Metrics Test Case + // - 3 invalid metrics and 3 Valid to test filtering + // - 1 invalid metric type + // - 2 invalid cummulative temporalities (only cummulative supported) + // - 3 types (Gauge, Counter, and Histogram) supported + otelMetrics = []metricdata.Metrics{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: metricdata.Gauge[float64]{DataPoints: otelDP}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: false, + DataPoints: otelDP, + }, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: otelHDP, + }, + }, + { + Name: "invalid-sum", + Description: "Sum with invalid temporality", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.DeltaTemporality, + IsMonotonic: false, + DataPoints: otelDP, + }, + }, + { + Name: "invalid-histogram", + Description: "Invalid histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.DeltaTemporality, + DataPoints: otelHDP, + }, + }, + { + Name: "unknown", + Description: "Unknown aggregation", + Unit: "1", + Data: metricdata.Histogram[int64]{}, + }, + } + + otlpMetrics = []*mpb.Metric{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: otlpDP}}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: &mpb.Metric_Sum{Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: false, + DataPoints: otlpDP, + }}, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: &mpb.Metric_Histogram{Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: otlpHDP, + }}, + }, + } + + // ScopeMetrics Test Cases + otelScopeMetrics = []metricdata.ScopeMetrics{{ + Scope: instrumentation.Scope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: otelMetrics, + }} + + otlpScopeMetrics = []*mpb.ScopeMetrics{{ + Scope: &cpb.InstrumentationScope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: otlpMetrics, + }} + + // ResourceMetrics Test Cases + otelResourceMetrics = &metricdata.ResourceMetrics{ + Resource: resource.NewSchemaless( + semconv.ServiceName("test server"), + semconv.ServiceVersion("v0.1.0"), + ), + ScopeMetrics: otelScopeMetrics, + } + + otlpResourceMetrics = &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: []*cpb.KeyValue{ + { + Key: "service.name", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "test server"}, + }, + }, + { + Key: "service.version", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "v0.1.0"}, + }, + }, + }, + }, + ScopeMetrics: otlpScopeMetrics, + } +) + +// TestTransformOTLP runs tests from the "bottom-up" of the metricdata data types. +func TestTransformOTLP(t *testing.T) { + // Histogram DataPoint Test Case (Histograms) + assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) + + // Number DataPoint Test Case (Counters / Gauges) + require.Equal(t, otlpDP, dataPoints(otelDP)) + + // Metrics Test Case + m, err := metrics(otelMetrics) + require.Equal(t, otlpMetrics, m) + require.Equal(t, len(otlpMetrics), 3) + require.Contains(t, err.Error(), "3 errors occurred") + + // Scope Metrics Test Case + sm, err := scopeMetrics(otelScopeMetrics) + require.Equal(t, otlpScopeMetrics, sm) + require.Contains(t, err.Error(), "3 errors occurred") + + // Resource Metrics Test Case + rm, err := transformOTLP(otelResourceMetrics) + require.Equal(t, otlpResourceMetrics, rm) + require.Contains(t, err.Error(), "3 errors occurred") +} diff --git a/go.mod b/go.mod index b3f1a2faa353..81b82cbb5270 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/go-openapi/runtime v0.24.1 github.com/go-openapi/strfmt v0.21.3 github.com/golang/protobuf v1.5.2 - github.com/google/go-cmp v0.5.8 + github.com/google/go-cmp v0.5.9 github.com/google/gofuzz v1.2.0 github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 @@ -96,13 +96,16 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 + go.opentelemetry.io/otel v1.15.0-rc.2 + go.opentelemetry.io/otel/sdk v1.15.0-rc.2 + go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.5.0 + golang.org/x/sys v0.6.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -151,6 +154,8 @@ require ( github.com/dimchansky/utfbom v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect github.com/form3tech-oss/jwt-go v3.2.2+incompatible // indirect + github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/analysis v0.21.2 // indirect github.com/go-openapi/errors v0.20.2 // indirect @@ -227,6 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect + go.opentelemetry.io/otel/metric v1.15.0-rc.2 // indirect + go.opentelemetry.io/otel/trace v1.15.0-rc.2 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 2d08b9447dfe..78d33e0f5a7b 100644 --- a/go.sum +++ b/go.sum @@ -313,6 +313,11 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= +github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= @@ -447,8 +452,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v0.0.0-20170111101155-53e6ce116135/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -1074,6 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/otel v1.15.0-rc.2 h1:ujPMbp/CAdjMTF9E93fr6X2W7Ub2MulkQAmXcTaNkJI= +go.opentelemetry.io/otel v1.15.0-rc.2/go.mod h1:AMBt1VNvSP4KmoaUOQcTWUVj6Te4ZRmD/v3tu0vfytI= +go.opentelemetry.io/otel/metric v1.15.0-rc.2 h1:MA7asjRHRgVdbvqYLQozHkxViPFZJ21qzi9IOKZVTnQ= +go.opentelemetry.io/otel/metric v1.15.0-rc.2/go.mod h1:/e8iKMaajFDmOXPoQsFrtr+B3XADzJS+HHePsQh2gGs= +go.opentelemetry.io/otel/sdk v1.15.0-rc.2 h1:4t4aauPUWLswNEUXAi5s3X94/kg7+NQY+6CeR9F6nEk= +go.opentelemetry.io/otel/sdk v1.15.0-rc.2/go.mod h1:/dv7lLx8SqyUgUxQeFeUUGvHbl45K4dssbaYjj/ObTQ= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 h1:0hZK6hT73+T5BNuFW6AsBwfARsgeThwAQ7JagJ1hbYE= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2/go.mod h1:GUyzM862531kqKgJkhS/LIeekJ652QyjN94dzN1zmgQ= +go.opentelemetry.io/otel/trace v1.15.0-rc.2 h1:LizBnU5zjqWkHUlZu0tC2k7qICXqPCJqoDlWdc07uw8= +go.opentelemetry.io/otel/trace v1.15.0-rc.2/go.mod h1:JBu+wEp/Ra+YOzZlvPjE6raGRlrUJE/R8aW7Wucsn/o= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1338,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From b50057cd7d53f2cc45788df8a802c5f949994cf9 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:32:40 -0400 Subject: [PATCH 012/100] Fix lint error --- agent/hcp/telemetry/otlp_transform.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 4a81bf96706d..777911bdda9b 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -26,7 +26,7 @@ func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) // scopeMetrics returns a slice of OTLP ScopeMetrics. func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { - var merr *multierror.Error + var merr error out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) for _, sm := range scopeMetrics { ms, err := metrics(sm.Metrics) @@ -47,7 +47,7 @@ func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, // metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { - var merr *multierror.Error + var merr error out := make([]*mpb.Metric, 0, len(metrics)) for _, m := range metrics { o, err := metricType(m) @@ -63,11 +63,10 @@ func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { // metricType identifies the instrument type and converts it to OTLP format. // only float64 values are accepted since the go metrics sink only receives float64 values. func metricType(m metricdata.Metrics) (*mpb.Metric, error) { - var err error out := &mpb.Metric{ Name: m.Name, Description: m.Description, - Unit: string(m.Unit), + Unit: m.Unit, } switch a := m.Data.(type) { case metricdata.Gauge[float64]: @@ -100,7 +99,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { default: return out, fmt.Errorf("%s: %T", "unknown aggregation", a) } - return out, err + return out, nil } // DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. From 0c01542a6429de978f8b129872a52f0d17ef0e4e Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 17:55:06 -0400 Subject: [PATCH 013/100] early return when there are no metrics --- agent/hcp/telemetry/otel_exporter.go | 4 +++ agent/hcp/telemetry/otel_exporter_test.go | 34 +++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 365c316717b1..2845a1ec3eb0 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -41,6 +41,10 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { + return nil + } + otlpMetrics, merr := transformOTLP(metrics) err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) return multierror.Append(merr, err) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 07e959593a4d..38cf35c0a621 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -62,10 +62,35 @@ func TestExport(t *testing.T) { metrics *metricdata.ResourceMetrics client client.MetricsClient }{ + "earlyReturnWithoutScopeMetrics": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + {Metrics: []metricdata.Metrics{}}, + }, + }, + }, + "earlyReturnWithoutMetrics": { + client: &mockErrMetricsClient{}, + metrics: &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{}, + }, + }, "errorWithExportFailure": { client: &mockErrMetricsClient{}, metrics: &metricdata.ResourceMetrics{ Resource: resource.Empty(), + ScopeMetrics: []metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "consul.raft.commitTime", + }, + }, + }, + }, }, wantErr: "failed to export metrics", }, @@ -110,8 +135,13 @@ func TestExport(t *testing.T) { } err := exp.Export(context.Background(), test.metrics) - require.Error(t, err) - require.Contains(t, err.Error(), test.wantErr) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NoError(t, err) }) } } From da20fe319a80560ff5c234d4b92eb316530177db Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 14:25:32 -0400 Subject: [PATCH 014/100] Add NewOTELExporter() function --- agent/hcp/telemetry/otel_exporter.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 2845a1ec3eb0..34f828c72bfb 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -18,6 +18,14 @@ type OTELExporter struct { endpoint string } +// NewOTELExporter returns a configured OTELExporter +func NewOTELExporter(client hcpclient.MetricsClient, endpoint string) *OTELExporter { + return &OTELExporter{ + client: client, + endpoint: endpoint, + } +} + // Temporality returns the Cumulative temporality for metrics aggregation. // Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { From 749b1c83a390c83cd129eeef76cb9fa330f4bd1a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:22:27 -0400 Subject: [PATCH 015/100] Downgrade to metrics SDK version: v1.15.0-rc.1 --- agent/hcp/telemetry/otel_exporter.go | 4 ++-- agent/hcp/telemetry/otel_exporter_test.go | 12 ++++++------ agent/hcp/telemetry/otlp_transform.go | 4 ++-- agent/hcp/telemetry/otlp_transform_test.go | 8 ++++---- go.mod | 10 +++++----- go.sum | 20 ++++++++++---------- 6 files changed, 29 insertions(+), 29 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 34f828c72bfb..b857cacfccf7 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -48,12 +48,12 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre } // Export serializes and transmits metric data to a receiver. -func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { +func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { return nil } - otlpMetrics, merr := transformOTLP(metrics) + otlpMetrics, merr := transformOTLP(&metrics) err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) return multierror.Append(merr, err) } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 38cf35c0a621..89943f09c6b2 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -59,12 +59,12 @@ func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *met func TestExport(t *testing.T) { for name, test := range map[string]struct { wantErr string - metrics *metricdata.ResourceMetrics + metrics metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ {Metrics: []metricdata.Metrics{}}, @@ -73,14 +73,14 @@ func TestExport(t *testing.T) { }, "earlyReturnWithoutMetrics": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{}, }, }, "errorWithExportFailure": { client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { @@ -97,7 +97,7 @@ func TestExport(t *testing.T) { "errorWithTransformFailure": { wantErr: "unknown aggregation: metricdata.Gauge[int64]", client: &mockMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { @@ -114,7 +114,7 @@ func TestExport(t *testing.T) { "multierrorTransformExportFailure": { wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", client: &mockErrMetricsClient{}, - metrics: &metricdata.ResourceMetrics{ + metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ { diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 777911bdda9b..4ea3ab52907f 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -86,7 +86,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { DataPoints: dataPoints(a.DataPoints), }, } - case metricdata.Histogram[float64]: + case metricdata.Histogram: if a.Temporality != metricdata.CumulativeTemporality { return out, fmt.Errorf("%s: %T", "unsupported temporality", a) } @@ -121,7 +121,7 @@ func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoi } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { +func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 6afc030f984f..0b26cc0e6a60 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - otelHDP = []metricdata.HistogramDataPoint[float64]{{ + otelHDP = []metricdata.HistogramDataPoint{{ Attributes: alice, StartTime: start, Time: end, @@ -125,7 +125,7 @@ var ( Name: "float64-histogram", Description: "Histogram", Unit: "1", - Data: metricdata.Histogram[float64]{ + Data: metricdata.Histogram{ Temporality: metricdata.CumulativeTemporality, DataPoints: otelHDP, }, @@ -144,7 +144,7 @@ var ( Name: "invalid-histogram", Description: "Invalid histogram", Unit: "1", - Data: metricdata.Histogram[float64]{ + Data: metricdata.Histogram{ Temporality: metricdata.DeltaTemporality, DataPoints: otelHDP, }, @@ -153,7 +153,7 @@ var ( Name: "unknown", Description: "Unknown aggregation", Unit: "1", - Data: metricdata.Histogram[int64]{}, + Data: metricdata.Sum[int64]{}, }, } diff --git a/go.mod b/go.mod index 81b82cbb5270..02af412dff9f 100644 --- a/go.mod +++ b/go.mod @@ -96,9 +96,9 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.0-rc.2 - go.opentelemetry.io/otel/sdk v1.15.0-rc.2 - go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 + go.opentelemetry.io/otel v1.15.0-rc.1 + go.opentelemetry.io/otel/sdk v1.15.0-rc.1 + go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d @@ -232,8 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v1.15.0-rc.2 // indirect - go.opentelemetry.io/otel/trace v1.15.0-rc.2 // indirect + go.opentelemetry.io/otel/metric v1.15.0-rc.1 // indirect + go.opentelemetry.io/otel/trace v1.15.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 78d33e0f5a7b..485bcb486b95 100644 --- a/go.sum +++ b/go.sum @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.0-rc.2 h1:ujPMbp/CAdjMTF9E93fr6X2W7Ub2MulkQAmXcTaNkJI= -go.opentelemetry.io/otel v1.15.0-rc.2/go.mod h1:AMBt1VNvSP4KmoaUOQcTWUVj6Te4ZRmD/v3tu0vfytI= -go.opentelemetry.io/otel/metric v1.15.0-rc.2 h1:MA7asjRHRgVdbvqYLQozHkxViPFZJ21qzi9IOKZVTnQ= -go.opentelemetry.io/otel/metric v1.15.0-rc.2/go.mod h1:/e8iKMaajFDmOXPoQsFrtr+B3XADzJS+HHePsQh2gGs= -go.opentelemetry.io/otel/sdk v1.15.0-rc.2 h1:4t4aauPUWLswNEUXAi5s3X94/kg7+NQY+6CeR9F6nEk= -go.opentelemetry.io/otel/sdk v1.15.0-rc.2/go.mod h1:/dv7lLx8SqyUgUxQeFeUUGvHbl45K4dssbaYjj/ObTQ= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2 h1:0hZK6hT73+T5BNuFW6AsBwfARsgeThwAQ7JagJ1hbYE= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.2/go.mod h1:GUyzM862531kqKgJkhS/LIeekJ652QyjN94dzN1zmgQ= -go.opentelemetry.io/otel/trace v1.15.0-rc.2 h1:LizBnU5zjqWkHUlZu0tC2k7qICXqPCJqoDlWdc07uw8= -go.opentelemetry.io/otel/trace v1.15.0-rc.2/go.mod h1:JBu+wEp/Ra+YOzZlvPjE6raGRlrUJE/R8aW7Wucsn/o= +go.opentelemetry.io/otel v1.15.0-rc.1 h1:KgZyVIfe3rPjWZHAZE0A9sH5U4tjyh1VeP+BFIgq944= +go.opentelemetry.io/otel v1.15.0-rc.1/go.mod h1:IZXh/uN07z/0si8lWvFW2FkwzAmSGE4DhF4quJIsLnY= +go.opentelemetry.io/otel/metric v1.15.0-rc.1 h1:ueivGgoyP2c58JZvmJriF35k238mVyRtlODD6BRgowU= +go.opentelemetry.io/otel/metric v1.15.0-rc.1/go.mod h1:bpPBxLwoWWmiK+Hmb6ZaG0zDLIi59lK7M+GjgZ5PN+4= +go.opentelemetry.io/otel/sdk v1.15.0-rc.1 h1:WtWiH5l19vwpdGIx9/Wou9l7a/butFoAOuJktWnlOro= +go.opentelemetry.io/otel/sdk v1.15.0-rc.1/go.mod h1:93NwQ8NqCb/QSUF7URdJur5Fvfm9rztE+2eJLpsKdWc= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 h1:PYmYfBlAZeg4y4VNjrKY24yrD2Jzb47x7Dz6rAI9lXg= +go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1/go.mod h1:LH4ApPwmFOe8mvPS7a56gW4WT9IhtwuEA/mN8FsVDX0= +go.opentelemetry.io/otel/trace v1.15.0-rc.1 h1:xK6jLm8h2KFhdItNvzAuNvnoWjRPU9u7whXNNBMxjtc= +go.opentelemetry.io/otel/trace v1.15.0-rc.1/go.mod h1:2cLx8hBNS4rUWB+JA9PuCGggQl+KJioCaoV2CKewY4s= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= From 383b366050b13b8dce4175da3d75044f2ca92e48 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:38:40 -0400 Subject: [PATCH 016/100] Fix imports --- agent/hcp/telemetry/otel_exporter.go | 1 + agent/hcp/telemetry/otel_exporter_test.go | 1 + 2 files changed, 2 insertions(+) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index b857cacfccf7..35be58ed7915 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -4,6 +4,7 @@ import ( "context" hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 89943f09c6b2..cff7802a3a55 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/hashicorp/consul/agent/hcp/client" + "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" From 1d222b144e167c1d232d9294cb415d48914ccef1 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 9 May 2023 17:20:32 -0400 Subject: [PATCH 017/100] fix small nits with comments and url.URL --- agent/hcp/telemetry/doc.go | 12 ++++++++++++ agent/hcp/telemetry/otel_exporter.go | 23 +++++++++++++---------- agent/hcp/telemetry/otel_exporter_test.go | 4 ++-- 3 files changed, 27 insertions(+), 12 deletions(-) create mode 100644 agent/hcp/telemetry/doc.go diff --git a/agent/hcp/telemetry/doc.go b/agent/hcp/telemetry/doc.go new file mode 100644 index 000000000000..4ef18f39bd30 --- /dev/null +++ b/agent/hcp/telemetry/doc.go @@ -0,0 +1,12 @@ +// Package telemetry implements functionality to collect, aggregate, convert and export +// telemetry data in OpenTelemetry Protocol (OTLP) format. +// +// The entrypoint is the OpenTelemetry (OTEL) go-metrics sink which: +// - Receives metric data. +// - Aggregates metric data using the OTEL Go Metrics SDK. +// - Exports metric data using a configurable OTEL exporter. +// +// The package also provides an OTEL exporter implementation to be used within the sink, which: +// - Transforms metric data from the Metrics SDK OTEL representation to OTLP format. +// - Exports OTLP metric data to an external endpoint using a configurable client. +package telemetry diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 35be58ed7915..b0bd81e585a7 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -2,28 +2,29 @@ package telemetry import ( "context" - - hcpclient "github.com/hashicorp/consul/agent/hcp/client" + "net/url" "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" + + hcpclient "github.com/hashicorp/consul/agent/hcp/client" ) // OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. // The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. // This allows us to use a custom client - HCP authenticated MetricsClient. type OTELExporter struct { - client hcpclient.MetricsClient - endpoint string + client hcpclient.MetricsClient + url url.URL } // NewOTELExporter returns a configured OTELExporter -func NewOTELExporter(client hcpclient.MetricsClient, endpoint string) *OTELExporter { +func NewOTELExporter(client hcpclient.MetricsClient, url url.URL) *OTELExporter { return &OTELExporter{ - client: client, - endpoint: endpoint, + client: client, + url: url, } } @@ -34,6 +35,8 @@ func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporali } // Aggregation returns the Aggregation to use for an instrument kind. +// The default implementation provided by the OTEL Metrics SDK library DefaultAggregationSelector panics. +// This custom version replicates that logic, but removes the panic. func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { switch kind { case metric.InstrumentKindObservableGauge: @@ -55,12 +58,12 @@ func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMe } otlpMetrics, merr := transformOTLP(&metrics) - err := e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint) + err := e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) return multierror.Append(merr, err) } -// ForceFlush does nothing, as the MetricsClient client holds no state. +// ForceFlush is a no-op, as the MetricsClient client holds no state. func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } -// Shutdown does nothing, as the MetricsClient is a HTTP client that requires no graceful shutdown. +// Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index cff7802a3a55..6434e54f023b 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -5,14 +5,14 @@ import ( "fmt" "testing" - "github.com/hashicorp/consul/agent/hcp/client" - "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" "go.opentelemetry.io/otel/sdk/resource" metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" + + "github.com/hashicorp/consul/agent/hcp/client" ) func TestTemporality(t *testing.T) { From 5564bcebcd8cc59971c344aec47aff458bda9857 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 13:14:01 -0400 Subject: [PATCH 018/100] Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile --- agent/hcp/telemetry/otel_exporter_test.go | 41 +++++++++++++++------- agent/hcp/telemetry/otlp_transform_test.go | 1 + 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 6434e54f023b..756f20af792f 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -16,11 +16,13 @@ import ( ) func TestTemporality(t *testing.T) { + t.Parallel() exp := &OTELExporter{} require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) } func TestAggregation(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { kind metric.InstrumentKind expAgg aggregation.Aggregation @@ -38,33 +40,34 @@ func TestAggregation(t *testing.T) { expAgg: aggregation.ExplicitBucketHistogram{Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, NoMinMax: false}, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() exp := &OTELExporter{} require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) }) } } -type mockErrMetricsClient struct{} - -func (m *mockErrMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { - return fmt.Errorf("failed to export metrics") +type mockMetricsClient struct { + exportErr error } -type mockMetricsClient struct{} - func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { - return nil + return m.exportErr } func TestExport(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { wantErr string metrics metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: nil, + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -73,14 +76,18 @@ func TestExport(t *testing.T) { }, }, "earlyReturnWithoutMetrics": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: nil, + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{}, }, }, "errorWithExportFailure": { - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: fmt.Errorf("failed to export metrics."), + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -114,7 +121,9 @@ func TestExport(t *testing.T) { }, "multierrorTransformExportFailure": { wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", - client: &mockErrMetricsClient{}, + client: &mockMetricsClient{ + exportErr: fmt.Errorf("failed to export metrics"), + }, metrics: metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: []metricdata.ScopeMetrics{ @@ -130,7 +139,9 @@ func TestExport(t *testing.T) { }, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() exp := &OTELExporter{ client: test.client, } @@ -148,17 +159,21 @@ func TestExport(t *testing.T) { } func TestForceFlush(t *testing.T) { + t.Parallel() exp := &OTELExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() - require.Error(t, exp.ForceFlush(ctx)) + err := exp.ForceFlush(ctx) + require.ErrorIs(t, err, context.Canceled) } func TestShutdown(t *testing.T) { + t.Parallel() exp := &OTELExporter{} ctx, cancel := context.WithCancel(context.Background()) cancel() - require.Error(t, exp.Shutdown(ctx)) + err := exp.Shutdown(ctx) + require.ErrorIs(t, err, context.Canceled) } diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 0b26cc0e6a60..5d7639292498 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -234,6 +234,7 @@ var ( // TestTransformOTLP runs tests from the "bottom-up" of the metricdata data types. func TestTransformOTLP(t *testing.T) { + t.Parallel() // Histogram DataPoint Test Case (Histograms) assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) From 424a065b7e6e037de0b53b2727edd732e0455e19 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 13:53:30 -0400 Subject: [PATCH 019/100] Cleanup error handling and clarify empty metrics case --- agent/hcp/telemetry/otel_exporter.go | 10 ++--- agent/hcp/telemetry/otel_exporter_test.go | 43 ++------------------ agent/hcp/telemetry/otlp_transform.go | 47 +++++++++------------- agent/hcp/telemetry/otlp_transform_test.go | 15 +++---- 4 files changed, 33 insertions(+), 82 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index b0bd81e585a7..da5e774e59d6 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -4,7 +4,6 @@ import ( "context" "net/url" - "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/metric/aggregation" "go.opentelemetry.io/otel/sdk/metric/metricdata" @@ -53,13 +52,12 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { - if len(metrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 { + otlpMetrics := transformOTLP(&metrics) + emptyMetrics := len(otlpMetrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 + if emptyMetrics { return nil } - - otlpMetrics, merr := transformOTLP(&metrics) - err := e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) - return multierror.Append(merr, err) + return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) } // ForceFlush is a no-op, as the MetricsClient client holds no state. diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 756f20af792f..bf8ffb5aea0c 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -69,10 +69,8 @@ func TestExport(t *testing.T) { exportErr: nil, }, metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - {Metrics: []metricdata.Metrics{}}, - }, + Resource: resource.Empty(), + ScopeMetrics: nil, }, }, "earlyReturnWithoutMetrics": { @@ -95,6 +93,7 @@ func TestExport(t *testing.T) { Metrics: []metricdata.Metrics{ { Name: "consul.raft.commitTime", + Data: metricdata.Gauge[float64]{}, }, }, }, @@ -102,42 +101,6 @@ func TestExport(t *testing.T) { }, wantErr: "failed to export metrics", }, - "errorWithTransformFailure": { - wantErr: "unknown aggregation: metricdata.Gauge[int64]", - client: &mockMetricsClient{}, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - // unsupported, only float64 supported - Data: metricdata.Gauge[int64]{}, - }, - }, - }, - }, - }, - }, - "multierrorTransformExportFailure": { - wantErr: "2 errors occurred:\n\t* unknown aggregation: metricdata.Gauge[int64]\n\t* failed to export metrics", - client: &mockMetricsClient{ - exportErr: fmt.Errorf("failed to export metrics"), - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - // unsupported, only float64 supported - Data: metricdata.Gauge[int64]{}, - }, - }, - }, - }, - }, - }, } { test := test t.Run(name, func(t *testing.T) { diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 4ea3ab52907f..f4547c56813b 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -3,7 +3,6 @@ package telemetry import ( "fmt" - "github.com/hashicorp/go-multierror" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric/metricdata" cpb "go.opentelemetry.io/proto/otlp/common/v1" @@ -14,26 +13,21 @@ import ( // TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm // contains invalid ScopeMetrics, an error will be returned along with an OTLP // ResourceMetrics that contains partial OTLP ScopeMetrics. -func transformOTLP(rm *metricdata.ResourceMetrics) (*mpb.ResourceMetrics, error) { - sms, err := scopeMetrics(rm.ScopeMetrics) +func transformOTLP(rm *metricdata.ResourceMetrics) *mpb.ResourceMetrics { + sms := scopeMetricsToPB(rm.ScopeMetrics) return &mpb.ResourceMetrics{ Resource: &rpb.Resource{ - Attributes: attributes(rm.Resource.Iter()), + Attributes: attributesToPB(rm.Resource.Iter()), }, ScopeMetrics: sms, - }, err + } } // scopeMetrics returns a slice of OTLP ScopeMetrics. -func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) { - var merr error +func scopeMetricsToPB(scopeMetrics []metricdata.ScopeMetrics) []*mpb.ScopeMetrics { out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) for _, sm := range scopeMetrics { - ms, err := metrics(sm.Metrics) - if err != nil { - merr = multierror.Append(merr, err) - } - + ms := metricsToPB(sm.Metrics) out = append(out, &mpb.ScopeMetrics{ Scope: &cpb.InstrumentationScope{ Name: sm.Scope.Name, @@ -42,27 +36,26 @@ func scopeMetrics(scopeMetrics []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, Metrics: ms, }) } - return out, merr + return out } // metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. -func metrics(metrics []metricdata.Metrics) ([]*mpb.Metric, error) { - var merr error +func metricsToPB(metrics []metricdata.Metrics) []*mpb.Metric { out := make([]*mpb.Metric, 0, len(metrics)) for _, m := range metrics { - o, err := metricType(m) + o, err := metricTypeToPB(m) if err != nil { - merr = multierror.Append(merr, err) + // TODO: Emit metric when a transformation occurs. continue } out = append(out, o) } - return out, merr + return out } // metricType identifies the instrument type and converts it to OTLP format. // only float64 values are accepted since the go metrics sink only receives float64 values. -func metricType(m metricdata.Metrics) (*mpb.Metric, error) { +func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { out := &mpb.Metric{ Name: m.Name, Description: m.Description, @@ -72,7 +65,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { case metricdata.Gauge[float64]: out.Data = &mpb.Metric_Gauge{ Gauge: &mpb.Gauge{ - DataPoints: dataPoints(a.DataPoints), + DataPoints: dataPointsToPB(a.DataPoints), }, } case metricdata.Sum[float64]: @@ -83,7 +76,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { Sum: &mpb.Sum{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, IsMonotonic: a.IsMonotonic, - DataPoints: dataPoints(a.DataPoints), + DataPoints: dataPointsToPB(a.DataPoints), }, } case metricdata.Histogram: @@ -93,7 +86,7 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { out.Data = &mpb.Metric_Histogram{ Histogram: &mpb.Histogram{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, - DataPoints: histogramDataPoints(a.DataPoints), + DataPoints: histogramDataPointsToPB(a.DataPoints), }, } default: @@ -103,11 +96,11 @@ func metricType(m metricdata.Metrics) (*mpb.Metric, error) { } // DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. -func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { +func dataPointsToPB(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { out := make([]*mpb.NumberDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { ndp := &mpb.NumberDataPoint{ - Attributes: attributes(dp.Attributes.Iter()), + Attributes: attributesToPB(dp.Attributes.Iter()), StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), TimeUnixNano: uint64(dp.Time.UnixNano()), } @@ -121,12 +114,12 @@ func dataPoints(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoi } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { +func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum hdp := &mpb.HistogramDataPoint{ - Attributes: attributes(dp.Attributes.Iter()), + Attributes: attributesToPB(dp.Attributes.Iter()), StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), TimeUnixNano: uint64(dp.Time.UnixNano()), Count: dp.Count, @@ -147,7 +140,7 @@ func histogramDataPoints(dataPoints []metricdata.HistogramDataPoint) []*mpb.Hist // attributes transforms items of an attribute iterator into OTLP key-values. // Currently, labels are only key-value pairs. -func attributes(iter attribute.Iterator) []*cpb.KeyValue { +func attributesToPB(iter attribute.Iterator) []*cpb.KeyValue { l := iter.Len() if iter.Len() == 0 { return nil diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 5d7639292498..1f2aae4a5bc9 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -236,24 +236,21 @@ var ( func TestTransformOTLP(t *testing.T) { t.Parallel() // Histogram DataPoint Test Case (Histograms) - assert.Equal(t, otlpHDP, histogramDataPoints(otelHDP)) + assert.Equal(t, otlpHDP, histogramDataPointsToPB(otelHDP)) // Number DataPoint Test Case (Counters / Gauges) - require.Equal(t, otlpDP, dataPoints(otelDP)) + require.Equal(t, otlpDP, dataPointsToPB(otelDP)) // Metrics Test Case - m, err := metrics(otelMetrics) + m := metricsToPB(otelMetrics) require.Equal(t, otlpMetrics, m) require.Equal(t, len(otlpMetrics), 3) - require.Contains(t, err.Error(), "3 errors occurred") // Scope Metrics Test Case - sm, err := scopeMetrics(otelScopeMetrics) + sm := scopeMetricsToPB(otelScopeMetrics) require.Equal(t, otlpScopeMetrics, sm) - require.Contains(t, err.Error(), "3 errors occurred") - // Resource Metrics Test Case - rm, err := transformOTLP(otelResourceMetrics) + // // Resource Metrics Test Case + rm := transformOTLP(otelResourceMetrics) require.Equal(t, otlpResourceMetrics, rm) - require.Contains(t, err.Error(), "3 errors occurred") } From 470a11df09346212346e37ed9ad28abef794dcfe Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:00:48 -0400 Subject: [PATCH 020/100] Fix input/expected naming in otel_transform_test.go --- agent/hcp/telemetry/otel_exporter_test.go | 42 +++++++-------- agent/hcp/telemetry/otlp_transform.go | 2 +- agent/hcp/telemetry/otlp_transform_test.go | 62 +++++++++++----------- 3 files changed, 52 insertions(+), 54 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index bf8ffb5aea0c..6008121998db 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -65,40 +65,31 @@ func TestExport(t *testing.T) { client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { - client: &mockMetricsClient{ - exportErr: nil, - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: nil, - }, + client: &mockMetricsClient{}, + metrics: mutateMetrics(nil), }, "earlyReturnWithoutMetrics": { - client: &mockMetricsClient{ - exportErr: nil, - }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{}, + client: &mockMetricsClient{}, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + {Metrics: []metricdata.Metrics{}}, }, + ), }, "errorWithExportFailure": { client: &mockMetricsClient{ exportErr: fmt.Errorf("failed to export metrics."), }, - metrics: metricdata.ResourceMetrics{ - Resource: resource.Empty(), - ScopeMetrics: []metricdata.ScopeMetrics{ - { - Metrics: []metricdata.Metrics{ - { - Name: "consul.raft.commitTime", - Data: metricdata.Gauge[float64]{}, - }, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "consul.raft.commitTime", + Data: metricdata.Gauge[float64]{}, }, }, }, }, + ), wantErr: "failed to export metrics", }, } { @@ -140,3 +131,10 @@ func TestShutdown(t *testing.T) { err := exp.Shutdown(ctx) require.ErrorIs(t, err, context.Canceled) } + +func mutateMetrics(m []metricdata.ScopeMetrics) metricdata.ResourceMetrics { + return metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: m, + } +} diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index f4547c56813b..231646a7300b 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -45,7 +45,7 @@ func metricsToPB(metrics []metricdata.Metrics) []*mpb.Metric { for _, m := range metrics { o, err := metricTypeToPB(m) if err != nil { - // TODO: Emit metric when a transformation occurs. + // TODO: Emit metric when a transformation failure occurs. continue } out = append(out, o) diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 1f2aae4a5bc9..c331cb002396 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - otelHDP = []metricdata.HistogramDataPoint{{ + inputHDP = []metricdata.HistogramDataPoint{{ Attributes: alice, StartTime: start, Time: end, @@ -57,7 +57,7 @@ var ( Sum: sumB, }} - otlpHDP = []*mpb.HistogramDataPoint{{ + expectedHDP = []*mpb.HistogramDataPoint{{ Attributes: []*cpb.KeyValue{pbAlice}, StartTimeUnixNano: uint64(start.UnixNano()), TimeUnixNano: uint64(end.UnixNano()), @@ -79,12 +79,12 @@ var ( Max: &maxB, }} // DataPoint test case : Number Datapoints (Gauge / Counter) - otelDP = []metricdata.DataPoint[float64]{ + inputDP = []metricdata.DataPoint[float64]{ {Attributes: alice, StartTime: start, Time: end, Value: 1.0}, {Attributes: bob, StartTime: start, Time: end, Value: 2.0}, } - otlpDP = []*mpb.NumberDataPoint{ + expectedDP = []*mpb.NumberDataPoint{ { Attributes: []*cpb.KeyValue{pbAlice}, StartTimeUnixNano: uint64(start.UnixNano()), @@ -104,12 +104,12 @@ var ( // - 1 invalid metric type // - 2 invalid cummulative temporalities (only cummulative supported) // - 3 types (Gauge, Counter, and Histogram) supported - otelMetrics = []metricdata.Metrics{ + inputMetrics = []metricdata.Metrics{ { Name: "float64-gauge", Description: "Gauge with float64 values", Unit: "1", - Data: metricdata.Gauge[float64]{DataPoints: otelDP}, + Data: metricdata.Gauge[float64]{DataPoints: inputDP}, }, { Name: "float64-sum", @@ -118,7 +118,7 @@ var ( Data: metricdata.Sum[float64]{ Temporality: metricdata.CumulativeTemporality, IsMonotonic: false, - DataPoints: otelDP, + DataPoints: inputDP, }, }, { @@ -127,7 +127,7 @@ var ( Unit: "1", Data: metricdata.Histogram{ Temporality: metricdata.CumulativeTemporality, - DataPoints: otelHDP, + DataPoints: inputHDP, }, }, { @@ -137,7 +137,7 @@ var ( Data: metricdata.Sum[float64]{ Temporality: metricdata.DeltaTemporality, IsMonotonic: false, - DataPoints: otelDP, + DataPoints: inputDP, }, }, { @@ -146,7 +146,7 @@ var ( Unit: "1", Data: metricdata.Histogram{ Temporality: metricdata.DeltaTemporality, - DataPoints: otelHDP, + DataPoints: inputHDP, }, }, { @@ -157,12 +157,12 @@ var ( }, } - otlpMetrics = []*mpb.Metric{ + expectedMetrics = []*mpb.Metric{ { Name: "float64-gauge", Description: "Gauge with float64 values", Unit: "1", - Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: otlpDP}}, + Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: expectedDP}}, }, { Name: "float64-sum", @@ -171,7 +171,7 @@ var ( Data: &mpb.Metric_Sum{Sum: &mpb.Sum{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, IsMonotonic: false, - DataPoints: otlpDP, + DataPoints: expectedDP, }}, }, { @@ -180,38 +180,38 @@ var ( Unit: "1", Data: &mpb.Metric_Histogram{Histogram: &mpb.Histogram{ AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, - DataPoints: otlpHDP, + DataPoints: expectedHDP, }}, }, } // ScopeMetrics Test Cases - otelScopeMetrics = []metricdata.ScopeMetrics{{ + inputScopeMetrics = []metricdata.ScopeMetrics{{ Scope: instrumentation.Scope{ Name: "test/code/path", Version: "v0.1.0", }, - Metrics: otelMetrics, + Metrics: inputMetrics, }} - otlpScopeMetrics = []*mpb.ScopeMetrics{{ + expectedScopeMetrics = []*mpb.ScopeMetrics{{ Scope: &cpb.InstrumentationScope{ Name: "test/code/path", Version: "v0.1.0", }, - Metrics: otlpMetrics, + Metrics: expectedMetrics, }} // ResourceMetrics Test Cases - otelResourceMetrics = &metricdata.ResourceMetrics{ + inputResourceMetrics = &metricdata.ResourceMetrics{ Resource: resource.NewSchemaless( semconv.ServiceName("test server"), semconv.ServiceVersion("v0.1.0"), ), - ScopeMetrics: otelScopeMetrics, + ScopeMetrics: inputScopeMetrics, } - otlpResourceMetrics = &mpb.ResourceMetrics{ + expectedResourceMetrics = &mpb.ResourceMetrics{ Resource: &rpb.Resource{ Attributes: []*cpb.KeyValue{ { @@ -228,7 +228,7 @@ var ( }, }, }, - ScopeMetrics: otlpScopeMetrics, + ScopeMetrics: expectedScopeMetrics, } ) @@ -236,21 +236,21 @@ var ( func TestTransformOTLP(t *testing.T) { t.Parallel() // Histogram DataPoint Test Case (Histograms) - assert.Equal(t, otlpHDP, histogramDataPointsToPB(otelHDP)) + assert.Equal(t, expectedHDP, histogramDataPointsToPB(inputHDP)) // Number DataPoint Test Case (Counters / Gauges) - require.Equal(t, otlpDP, dataPointsToPB(otelDP)) + require.Equal(t, expectedDP, dataPointsToPB(inputDP)) // Metrics Test Case - m := metricsToPB(otelMetrics) - require.Equal(t, otlpMetrics, m) - require.Equal(t, len(otlpMetrics), 3) + m := metricsToPB(inputMetrics) + require.Equal(t, expectedMetrics, m) + require.Equal(t, len(expectedMetrics), 3) // Scope Metrics Test Case - sm := scopeMetricsToPB(otelScopeMetrics) - require.Equal(t, otlpScopeMetrics, sm) + sm := scopeMetricsToPB(inputScopeMetrics) + require.Equal(t, expectedScopeMetrics, sm) // // Resource Metrics Test Case - rm := transformOTLP(otelResourceMetrics) - require.Equal(t, otlpResourceMetrics, rm) + rm := transformOTLP(inputResourceMetrics) + require.Equal(t, expectedResourceMetrics, rm) } From be0b01b79631cb209fa36af9bd37264fe3a31265 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:15:32 -0400 Subject: [PATCH 021/100] add comment for metric tracking --- agent/hcp/telemetry/otel_exporter.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index da5e774e59d6..39c5118411d9 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -61,7 +61,13 @@ func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMe } // ForceFlush is a no-op, as the MetricsClient client holds no state. -func (e *OTELExporter) ForceFlush(ctx context.Context) error { return ctx.Err() } +func (e *OTELExporter) ForceFlush(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} // Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. -func (e *OTELExporter) Shutdown(ctx context.Context) error { return ctx.Err() } +func (e *OTELExporter) Shutdown(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} From 325bb4d3a778455364f6f9ccbe5de0e674fcafa4 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 14:37:27 -0400 Subject: [PATCH 022/100] Add a general isEmpty method --- agent/hcp/telemetry/otel_exporter.go | 3 +-- agent/hcp/telemetry/otlp_transform.go | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 39c5118411d9..4c774f5acb0d 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -53,8 +53,7 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre // Export serializes and transmits metric data to a receiver. func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { otlpMetrics := transformOTLP(&metrics) - emptyMetrics := len(otlpMetrics.ScopeMetrics) == 0 || len(metrics.ScopeMetrics[0].Metrics) == 0 - if emptyMetrics { + if isEmpty(otlpMetrics) { return nil } return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 231646a7300b..d91af179c0c2 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -10,6 +10,25 @@ import ( rpb "go.opentelemetry.io/proto/otlp/resource/v1" ) +// isEmpty verifies if the given OTLP protobuf metrics contains metric data. +// isEmpty returns true if no ScopeMetrics exist or all metrics within ScopeMetrics are empty. +func isEmpty(rm *mpb.ResourceMetrics) bool { + // No ScopeMetrics + if len(rm.ScopeMetrics) == 0 { + return true + } + + // If any inner metrics contain data, return false. + for _, v := range rm.ScopeMetrics { + if len(v.Metrics) != 0 { + return false + } + } + + // All inner metrics are empty. + return true +} + // TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm // contains invalid ScopeMetrics, an error will be returned along with an OTLP // ResourceMetrics that contains partial OTLP ScopeMetrics. From a0352ac883e1e4d0153b0c8c0d67c1dd2118a679 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 10 May 2023 16:34:52 -0400 Subject: [PATCH 023/100] Add clear error types --- agent/hcp/telemetry/otlp_transform.go | 12 +++- agent/hcp/telemetry/otlp_transform_test.go | 69 ++++++++++++++-------- 2 files changed, 53 insertions(+), 28 deletions(-) diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index d91af179c0c2..37b071e2d564 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -1,6 +1,7 @@ package telemetry import ( + "errors" "fmt" "go.opentelemetry.io/otel/attribute" @@ -10,6 +11,11 @@ import ( rpb "go.opentelemetry.io/proto/otlp/resource/v1" ) +var ( + aggregationErr = errors.New("unsupported aggregation") + temporalityErr = errors.New("unsupported temporality") +) + // isEmpty verifies if the given OTLP protobuf metrics contains metric data. // isEmpty returns true if no ScopeMetrics exist or all metrics within ScopeMetrics are empty. func isEmpty(rm *mpb.ResourceMetrics) bool { @@ -89,7 +95,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { } case metricdata.Sum[float64]: if a.Temporality != metricdata.CumulativeTemporality { - return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } out.Data = &mpb.Metric_Sum{ Sum: &mpb.Sum{ @@ -100,7 +106,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { } case metricdata.Histogram: if a.Temporality != metricdata.CumulativeTemporality { - return out, fmt.Errorf("%s: %T", "unsupported temporality", a) + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } out.Data = &mpb.Metric_Histogram{ Histogram: &mpb.Histogram{ @@ -109,7 +115,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { }, } default: - return out, fmt.Errorf("%s: %T", "unknown aggregation", a) + return out, fmt.Errorf("error: %w: %T", aggregationErr, a) } return out, nil } diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index c331cb002396..7b622967e9a2 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -99,6 +99,34 @@ var ( }, } + invalidSumTemporality = metricdata.Metrics{ + Name: "invalid-sum", + Description: "Sum with invalid temporality", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.DeltaTemporality, + IsMonotonic: false, + DataPoints: inputDP, + }, + } + + invalidSumAgg = metricdata.Metrics{ + Name: "unknown", + Description: "Unknown aggregation", + Unit: "1", + Data: metricdata.Sum[int64]{}, + } + + invalidHistTemporality = metricdata.Metrics{ + Name: "invalid-histogram", + Description: "Invalid histogram", + Unit: "1", + Data: metricdata.Histogram{ + Temporality: metricdata.DeltaTemporality, + DataPoints: inputHDP, + }, + } + // Metrics Test Case // - 3 invalid metrics and 3 Valid to test filtering // - 1 invalid metric type @@ -130,31 +158,9 @@ var ( DataPoints: inputHDP, }, }, - { - Name: "invalid-sum", - Description: "Sum with invalid temporality", - Unit: "1", - Data: metricdata.Sum[float64]{ - Temporality: metricdata.DeltaTemporality, - IsMonotonic: false, - DataPoints: inputDP, - }, - }, - { - Name: "invalid-histogram", - Description: "Invalid histogram", - Unit: "1", - Data: metricdata.Histogram{ - Temporality: metricdata.DeltaTemporality, - DataPoints: inputHDP, - }, - }, - { - Name: "unknown", - Description: "Unknown aggregation", - Unit: "1", - Data: metricdata.Sum[int64]{}, - }, + invalidSumTemporality, + invalidHistTemporality, + invalidSumAgg, } expectedMetrics = []*mpb.Metric{ @@ -241,6 +247,19 @@ func TestTransformOTLP(t *testing.T) { // Number DataPoint Test Case (Counters / Gauges) require.Equal(t, expectedDP, dataPointsToPB(inputDP)) + // MetricType Error Test Cases + _, err := metricTypeToPB(invalidHistTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumAgg) + require.Error(t, err) + require.ErrorIs(t, err, aggregationErr) + // Metrics Test Case m := metricsToPB(inputMetrics) require.Equal(t, expectedMetrics, m) From 2d356f677aed84eea40c342d7f629561a15c5b70 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 00:00:55 -0400 Subject: [PATCH 024/100] update to latest version 1.15.0 of OTEL --- agent/hcp/telemetry/otel_exporter.go | 4 ++-- agent/hcp/telemetry/otel_exporter_test.go | 6 ++--- agent/hcp/telemetry/otlp_transform.go | 4 ++-- agent/hcp/telemetry/otlp_transform_test.go | 6 ++--- go.mod | 14 +++++------ go.sum | 28 +++++++++++----------- 6 files changed, 31 insertions(+), 31 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 4c774f5acb0d..02f6a07f8371 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -51,8 +51,8 @@ func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggre } // Export serializes and transmits metric data to a receiver. -func (e *OTELExporter) Export(ctx context.Context, metrics metricdata.ResourceMetrics) error { - otlpMetrics := transformOTLP(&metrics) +func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + otlpMetrics := transformOTLP(metrics) if isEmpty(otlpMetrics) { return nil } diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 6008121998db..0e3d3fcc1e4f 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -61,7 +61,7 @@ func TestExport(t *testing.T) { t.Parallel() for name, test := range map[string]struct { wantErr string - metrics metricdata.ResourceMetrics + metrics *metricdata.ResourceMetrics client client.MetricsClient }{ "earlyReturnWithoutScopeMetrics": { @@ -132,8 +132,8 @@ func TestShutdown(t *testing.T) { require.ErrorIs(t, err, context.Canceled) } -func mutateMetrics(m []metricdata.ScopeMetrics) metricdata.ResourceMetrics { - return metricdata.ResourceMetrics{ +func mutateMetrics(m []metricdata.ScopeMetrics) *metricdata.ResourceMetrics { + return &metricdata.ResourceMetrics{ Resource: resource.Empty(), ScopeMetrics: m, } diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go index 37b071e2d564..7ba1650ffd05 100644 --- a/agent/hcp/telemetry/otlp_transform.go +++ b/agent/hcp/telemetry/otlp_transform.go @@ -104,7 +104,7 @@ func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { DataPoints: dataPointsToPB(a.DataPoints), }, } - case metricdata.Histogram: + case metricdata.Histogram[float64]: if a.Temporality != metricdata.CumulativeTemporality { return out, fmt.Errorf("error: %w: %T", temporalityErr, a) } @@ -139,7 +139,7 @@ func dataPointsToPB(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDat } // HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. -func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint) []*mpb.HistogramDataPoint { +func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) for _, dp := range dataPoints { sum := dp.Sum diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go index 7b622967e9a2..1c22e9a5cd75 100644 --- a/agent/hcp/telemetry/otlp_transform_test.go +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -35,7 +35,7 @@ var ( // DataPoint test case : Histogram Datapoints (Histogram) minA, maxA, sumA = 2.0, 4.0, 90.0 minB, maxB, sumB = 4.0, 150.0, 234.0 - inputHDP = []metricdata.HistogramDataPoint{{ + inputHDP = []metricdata.HistogramDataPoint[float64]{{ Attributes: alice, StartTime: start, Time: end, @@ -121,7 +121,7 @@ var ( Name: "invalid-histogram", Description: "Invalid histogram", Unit: "1", - Data: metricdata.Histogram{ + Data: metricdata.Histogram[float64]{ Temporality: metricdata.DeltaTemporality, DataPoints: inputHDP, }, @@ -153,7 +153,7 @@ var ( Name: "float64-histogram", Description: "Histogram", Unit: "1", - Data: metricdata.Histogram{ + Data: metricdata.Histogram[float64]{ Temporality: metricdata.CumulativeTemporality, DataPoints: inputHDP, }, diff --git a/go.mod b/go.mod index 02af412dff9f..ffafa5157295 100644 --- a/go.mod +++ b/go.mod @@ -96,16 +96,16 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.0-rc.1 - go.opentelemetry.io/otel/sdk v1.15.0-rc.1 - go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 + go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/sdk v1.15.1 + go.opentelemetry.io/otel/sdk/metric v0.38.1 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.6.0 + golang.org/x/sys v0.7.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -154,7 +154,7 @@ require ( github.com/dimchansky/utfbom v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect github.com/form3tech-oss/jwt-go v3.2.2+incompatible // indirect - github.com/go-logr/logr v1.2.3 // indirect + github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/analysis v0.21.2 // indirect @@ -232,8 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v1.15.0-rc.1 // indirect - go.opentelemetry.io/otel/trace v1.15.0-rc.1 // indirect + go.opentelemetry.io/otel/metric v0.38.1 // indirect + go.opentelemetry.io/otel/trace v1.15.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 485bcb486b95..4412cb4947e8 100644 --- a/go.sum +++ b/go.sum @@ -314,8 +314,8 @@ github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= -github.com/go-logr/logr v1.2.3 h1:2DntVwHkVopvECVRSlL5PSo9eG+cAkDCuckLubN+rq0= -github.com/go-logr/logr v1.2.3/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.0-rc.1 h1:KgZyVIfe3rPjWZHAZE0A9sH5U4tjyh1VeP+BFIgq944= -go.opentelemetry.io/otel v1.15.0-rc.1/go.mod h1:IZXh/uN07z/0si8lWvFW2FkwzAmSGE4DhF4quJIsLnY= -go.opentelemetry.io/otel/metric v1.15.0-rc.1 h1:ueivGgoyP2c58JZvmJriF35k238mVyRtlODD6BRgowU= -go.opentelemetry.io/otel/metric v1.15.0-rc.1/go.mod h1:bpPBxLwoWWmiK+Hmb6ZaG0zDLIi59lK7M+GjgZ5PN+4= -go.opentelemetry.io/otel/sdk v1.15.0-rc.1 h1:WtWiH5l19vwpdGIx9/Wou9l7a/butFoAOuJktWnlOro= -go.opentelemetry.io/otel/sdk v1.15.0-rc.1/go.mod h1:93NwQ8NqCb/QSUF7URdJur5Fvfm9rztE+2eJLpsKdWc= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1 h1:PYmYfBlAZeg4y4VNjrKY24yrD2Jzb47x7Dz6rAI9lXg= -go.opentelemetry.io/otel/sdk/metric v0.38.0-rc.1/go.mod h1:LH4ApPwmFOe8mvPS7a56gW4WT9IhtwuEA/mN8FsVDX0= -go.opentelemetry.io/otel/trace v1.15.0-rc.1 h1:xK6jLm8h2KFhdItNvzAuNvnoWjRPU9u7whXNNBMxjtc= -go.opentelemetry.io/otel/trace v1.15.0-rc.1/go.mod h1:2cLx8hBNS4rUWB+JA9PuCGggQl+KJioCaoV2CKewY4s= +go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= +go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= +go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= +go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= +go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= +go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= +go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= +go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= +go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= +go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1354,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From 22bb2ee90d9609ec1365caab1870fc45f6af41e0 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 11 Apr 2023 01:39:41 -0400 Subject: [PATCH 025/100] Client configured with TLS using HCP config and retry/throttle --- go.sum | 1 + 1 file changed, 1 insertion(+) diff --git a/go.sum b/go.sum index 4412cb4947e8..945a48559114 100644 --- a/go.sum +++ b/go.sum @@ -516,6 +516,7 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 22be78f224f62a9d7730f6c7f8882b56cf682966 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 25 Apr 2023 14:26:56 -0400 Subject: [PATCH 026/100] run go mod tidy --- go.sum | 1 - 1 file changed, 1 deletion(-) diff --git a/go.sum b/go.sum index 945a48559114..4412cb4947e8 100644 --- a/go.sum +++ b/go.sum @@ -516,7 +516,6 @@ github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgf github.com/grpc-ecosystem/grpc-gateway v1.8.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= -github.com/grpc-ecosystem/grpc-gateway v1.16.0 h1:gmcG1KaJ57LophUzW0Hy8NmPhnMZb4M0+kPpLofRdBo= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= From 478181a983c133a885b6382e04aefd8b43551b61 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 01:17:14 -0400 Subject: [PATCH 027/100] Remove one abstraction to use the config from deps --- agent/hcp/client/metrics_client.go | 1 - 1 file changed, 1 deletion(-) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 15bd71097f79..ba50f28626e6 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,7 +129,6 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } - defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From c2ffaab30d7df9f2e567753274043055de83f262 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 13:47:35 -0400 Subject: [PATCH 028/100] Address PR feedback --- agent/hcp/client/metrics_client.go | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index ba50f28626e6..15bd71097f79 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -129,6 +129,7 @@ func (o *otlpClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.R if err != nil { return fmt.Errorf("failed to export metrics: %v", err) } + defer resp.Body.Close() var respData bytes.Buffer if _, err := io.Copy(&respData, resp.Body); err != nil { From 864e6d7db283ced90e1c0fc65c5b8a5d6477aace Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 24 Apr 2023 10:57:15 -0400 Subject: [PATCH 029/100] Initialize OTELSink with sync.Map for all the instrument stores. --- agent/hcp/telemetry/otel_sink.go | 197 +++++++++++++++++++ agent/hcp/telemetry/otel_sink_test.go | 262 ++++++++++++++++++++++++++ 2 files changed, 459 insertions(+) create mode 100644 agent/hcp/telemetry/otel_sink.go create mode 100644 agent/hcp/telemetry/otel_sink_test.go diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go new file mode 100644 index 000000000000..deeccec01588 --- /dev/null +++ b/agent/hcp/telemetry/otel_sink.go @@ -0,0 +1,197 @@ +package telemetry + +import ( + "bytes" + "context" + "fmt" + "strings" + "sync" + "time" + + gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-hclog" + + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/instrument" + otelsdk "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" +) + +const defaultExportInterval = 10 * time.Second + +// Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. +var gauges sync.Map = sync.Map{} + +type GaugeValue struct { + Value float64 + Labels []attribute.KeyValue +} + +type OTELSinkOpts struct { + Endpoint string + Reader otelsdk.Reader + Logger hclog.Logger + ExportInterval time.Duration + Ctx context.Context +} + +type OTELSink struct { + spaceReplacer *strings.Replacer + logger hclog.Logger + ctx context.Context + + meterProvider *otelsdk.MeterProvider + meter *otelmetric.Meter + exportInterval time.Duration + + gaugeInstruments sync.Map + counterInstruments sync.Map + histogramInstruments sync.Map +} + +func NewOTELReader(client client.MetricsClient) otelsdk.Reader { + exp := &OTELExporter{ + client: client, + } + return otelsdk.NewPeriodicReader(exp, otelsdk.WithInterval(defaultExportInterval)) +} + +func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { + if opts.Logger == nil || opts.Reader == nil || opts.Endpoint == "" || opts.Ctx == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") + } + + // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. + res := resource.NewSchemaless() + meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) + meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") + + return &OTELSink{ + meterProvider: meterProvider, + meter: &meter, + spaceReplacer: strings.NewReplacer(" ", "_"), + ctx: opts.Ctx, + }, nil +} + +// SetGauge emits a Consul gauge metric. +func (o *OTELSink) SetGauge(key []string, val float32) { + o.SetGaugeWithLabels(key, val, nil) +} + +// AddSample emits a Consul histogram metric. +func (o *OTELSink) AddSample(key []string, val float32) { + o.AddSampleWithLabels(key, val, nil) +} + +// IncrCounter emits a Consul counter metric. +func (o *OTELSink) IncrCounter(key []string, val float32) { + o.IncrCounterWithLabels(key, val, nil) +} + +// AddSampleWithLabels emits a Consul gauge metric that gets +// registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + + // Set value in global Gauge store. + g := &GaugeValue{ + Value: float64(val), + Labels: toAttributes(labels), + } + gauges.Store(k, g) + + // If instrument does not exist, create it and register callback to get last value in global Gauge store. + if _, ok := o.gaugeInstruments.Load(k); !ok { + inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + o.gaugeInstruments.Store(k, &inst) + } +} + +// AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + var inst *instrument.Float64Histogram + v, ok := o.histogramInstruments.Load(k) + if !ok { + v, err := (*o.meter).Float64Histogram(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + inst = &v + o.histogramInstruments.Store(k, v) + } else { + inst = v.(*instrument.Float64Histogram) + } + + attrs := toAttributes(labels) + (*inst).Record(o.ctx, float64(val), attrs...) +} + +// IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key, labels) + var inst *instrument.Float64Counter + v, ok := o.histogramInstruments.Load(k) + if !ok { + v, err := (*o.meter).Float64Counter(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + inst = &v + o.histogramInstruments.Store(k, v) + } else { + inst = v.(*instrument.Float64Counter) + } + + attrs := toAttributes(labels) + (*inst).Add(o.ctx, float64(val), attrs...) +} + +// EmitKey unsupported. +func (o *OTELSink) EmitKey(key []string, val float32) {} + +// flattenKey key along with its labels. +func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { + buf := &bytes.Buffer{} + joined := strings.Join(parts, ".") + + o.spaceReplacer.WriteString(buf, joined) + + return buf.String() +} + +func toAttributes(labels []gometrics.Label) []attribute.KeyValue { + if len(labels) == 0 { + return nil + } + attrs := make([]attribute.KeyValue, len(labels)) + for i, label := range labels { + attrs[i] = attribute.KeyValue{ + Key: attribute.Key(label.Name), + Value: attribute.StringValue(label.Value), + } + } + + return attrs +} + +func gaugeCallback(key string) instrument.Float64Callback { + // Closures keep a reference to the key string, so we don't have to worry about it. + // These get garbage collected as the closure completes. + return func(_ context.Context, obs instrument.Float64Observer) error { + if val, ok := gauges.LoadAndDelete(key); ok { + v := val.(*GaugeValue) + obs.Observe(v.Value, v.Labels...) + } + return nil + } +} diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go new file mode 100644 index 000000000000..440e5d3b1cf8 --- /dev/null +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -0,0 +1,262 @@ +package telemetry + +import ( + "context" + "io" + "testing" + + gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" +) + +var ( + attrs = attribute.NewSet(attribute.KeyValue{ + Key: attribute.Key("server.id"), + Value: attribute.StringValue("test"), + }) + + expectedMetrics = map[string]metricdata.Metrics{ + "consul.raft.leader": { + Name: "consul.raft.leader", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(0)), + }, + }, + }, + }, + "consul.autopilot.healthy": { + Name: "consul.autopilot.healthy", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.23)), + }, + }, + }, + }, + "consul.raft.state.leader": { + Name: "consul.raft.state.leader", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(23.23)), + }, + }, + }, + }, + "consul.raft.apply": { + Name: "consul.raft.apply", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.44)), + }, + }, + }, + }, + "consul.raft.leader.lastContact": { + Name: "consul.raft.leader.lastContact", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Count: 1, + Sum: float64(float32(45.32)), + Min: metricdata.NewExtrema(float64(float32(45.32))), + Max: metricdata.NewExtrema(float64(float32(45.32))), + }, + }, + }, + }, + "consul.raft.commitTime": { + Name: "consul.raft.commitTime", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: attrs, + Count: 1, + Sum: float64(float32(26.34)), + Min: metricdata.NewExtrema(float64(float32(26.34))), + Max: metricdata.NewExtrema(float64(float32(26.34))), + }, + }, + }, + }, + } +) + +func TestNewOTELSink(t *testing.T) { + for name, test := range map[string]struct { + wantErr string + opts *OTELSinkOpts + }{ + "failsWithEmptyLogger": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: nil, + Reader: metric.NewManualReader(), + Endpoint: "test.com", + }, + }, + "failsWithEmptyExporter": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: nil, + Endpoint: "test.com", + }, + }, + "failsWithInvalidEndpoint": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + opts: &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: metric.NewManualReader(), + Endpoint: "", + }, + }, + } { + t.Run(name, func(t *testing.T) { + sink, err := NewOTELSink(test.opts) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NotNil(t, sink) + }) + } +} + +func TestOTELSink(t *testing.T) { + // Manual reader outputs the aggregated metrics when reader.Collect is called. + reader := metric.NewManualReader() + + ctx := context.Background() + opts := &OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: reader, + Endpoint: "test.com", + Ctx: ctx, + } + + sink, err := NewOTELSink(opts) + require.NoError(t, err) + + labels := []gometrics.Label{ + { + Name: "server.id", + Value: "test", + }, + } + + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) + + // Validate resource + require.Equal(t, resource.NewSchemaless(), collected.Resource) + + // Validate metrics + for _, actual := range collected.ScopeMetrics[0].Metrics { + name := actual.Name + expected, ok := expectedMetrics[name] + require.True(t, ok, "metric key %s should be in expectedMetrics map", name) + isSameMetrics(t, expected, actual) + } +} + +// compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. +func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { + require.Equal(t, expected.Name, actual.Name, "different .Name field") + require.Equal(t, expected.Description, actual.Description, "different .Description field") + require.Equal(t, expected.Unit, actual.Unit, "different .Unit field") + + switch expectedData := expected.Data.(type) { + case metricdata.Gauge[float64]: + actualData, ok := actual.Data.(metricdata.Gauge[float64]) + require.True(t, ok, "different metric types: expected metricdata.Gauge[float64]") + + isSameData(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Sum[float64]: + actualData, ok := actual.Data.(metricdata.Sum[float64]) + require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") + + isSameData(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Histogram[float64]: + actualData, ok := actual.Data.(metricdata.Histogram[float64]) + require.True(t, ok, "different metric types: expected metricdata.Histogram[float64]") + + isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) + } +} + +func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different datapoints length") + + // Only verify the value and the attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Value, currActual.Value, "different datapoint value") + require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") + + iter := dp.Attributes.Iter() + for iter.Next() { + attr := iter.Attribute() + require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") + } + } +} + +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different histogram datapoint length") + + // Only verify the value and the attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Sum, currActual.Sum, "different histogram datapoint .Sum value") + require.Equal(t, dp.Max, currActual.Max, "different histogram datapoint .Max value") + require.Equal(t, dp.Min, currActual.Min, "different histogram datapoint .Min value") + require.Equal(t, dp.Count, currActual.Count, "different histogram datapoint .Count value") + + require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") + + iter := dp.Attributes.Iter() + for iter.Next() { + attr := iter.Attribute() + require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") + } + } +} From 05c418bef5f8025dd75bdc77337595cf0396a591 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 09:35:10 -0400 Subject: [PATCH 030/100] Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests. --- agent/hcp/telemetry/otel_sink.go | 21 +++++++++------------ agent/hcp/telemetry/otel_sink_test.go | 25 +++++++------------------ 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index deeccec01588..ef926dc32d41 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -19,8 +19,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" ) -const defaultExportInterval = 10 * time.Second - // Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. var gauges sync.Map = sync.Map{} @@ -30,11 +28,9 @@ type GaugeValue struct { } type OTELSinkOpts struct { - Endpoint string - Reader otelsdk.Reader - Logger hclog.Logger - ExportInterval time.Duration - Ctx context.Context + Reader otelsdk.Reader + Logger hclog.Logger + Ctx context.Context } type OTELSink struct { @@ -51,15 +47,16 @@ type OTELSink struct { histogramInstruments sync.Map } -func NewOTELReader(client client.MetricsClient) otelsdk.Reader { - exp := &OTELExporter{ - client: client, +func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { + exporter := &OTELExporter{ + client: client, + endpoint: endpoint, } - return otelsdk.NewPeriodicReader(exp, otelsdk.WithInterval(defaultExportInterval)) + return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { - if opts.Logger == nil || opts.Reader == nil || opts.Endpoint == "" || opts.Ctx == nil { + if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") } diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 440e5d3b1cf8..5e1cfbc53b4f 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -116,25 +116,15 @@ func TestNewOTELSink(t *testing.T) { "failsWithEmptyLogger": { wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", opts: &OTELSinkOpts{ - Logger: nil, - Reader: metric.NewManualReader(), - Endpoint: "test.com", + Logger: nil, + Reader: metric.NewManualReader(), }, }, "failsWithEmptyExporter": { wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: nil, - Endpoint: "test.com", - }, - }, - "failsWithInvalidEndpoint": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", - opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: metric.NewManualReader(), - Endpoint: "", + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: nil, }, }, } { @@ -157,10 +147,9 @@ func TestOTELSink(t *testing.T) { ctx := context.Background() opts := &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: reader, - Endpoint: "test.com", - Ctx: ctx, + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: reader, + Ctx: ctx, } sink, err := NewOTELSink(opts) From 72ae2057384e3d25b90bf15ba6e39d20d4357bc0 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 14:37:46 -0400 Subject: [PATCH 031/100] Switch to mutex instead of sync.Map to avoid type assertion --- agent/hcp/telemetry/otel_sink.go | 109 +++++++++++++++++++++---------- 1 file changed, 76 insertions(+), 33 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ef926dc32d41..2fe208660bd0 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -8,10 +8,10 @@ import ( "sync" "time" - gometrics "github.com/armon/go-metrics" "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/go-hclog" + gometrics "github.com/armon/go-metrics" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" @@ -20,11 +20,37 @@ import ( ) // Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var gauges sync.Map = sync.Map{} +var gauges *GlobalGaugeStore type GaugeValue struct { - Value float64 - Labels []attribute.KeyValue + Value float64 + Attributes []attribute.KeyValue +} + +type GlobalGaugeStore struct { + store map[string]*GaugeValue + mutex sync.Mutex +} + +// LoadAndDelete will read a Gauge value and delete it. +// Within the Gauge callbacks we delete the value once we have registed it with OTEL to ensure +// we only emit a Gauge value once. +func (g *GlobalGaugeStore) LoadAndDelete(key string) (*GaugeValue, bool) { + g.mutex.Lock() + defer g.mutex.Unlock() + + gauge, ok := g.store[key] + + delete(g.store, key) + + return gauge, ok +} + +func (g *GlobalGaugeStore) Store(key string, gauge *GaugeValue) { + g.mutex.Lock() + defer g.mutex.Unlock() + + g.store[key] = gauge } type OTELSinkOpts struct { @@ -42,9 +68,11 @@ type OTELSink struct { meter *otelmetric.Meter exportInterval time.Duration - gaugeInstruments sync.Map - counterInstruments sync.Map - histogramInstruments sync.Map + gaugeInstruments map[string]*instrument.Float64ObservableGauge + counterInstruments map[string]*instrument.Float64Counter + histogramInstruments map[string]*instrument.Float64Histogram + + mutex sync.Mutex } func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { @@ -65,11 +93,21 @@ func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") + // Init global gauge store. + gauges = &GlobalGaugeStore{ + store: make(map[string]*GaugeValue, 0), + mutex: sync.Mutex{}, + } + return &OTELSink{ - meterProvider: meterProvider, - meter: &meter, - spaceReplacer: strings.NewReplacer(" ", "_"), - ctx: opts.Ctx, + meterProvider: meterProvider, + meter: &meter, + spaceReplacer: strings.NewReplacer(" ", "_"), + ctx: opts.Ctx, + mutex: sync.Mutex{}, + gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), + counterInstruments: make(map[string]*instrument.Float64Counter, 0), + histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), }, nil } @@ -95,37 +133,41 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // Set value in global Gauge store. g := &GaugeValue{ - Value: float64(val), - Labels: toAttributes(labels), + Value: float64(val), + Attributes: toAttributes(labels), } gauges.Store(k, g) - // If instrument does not exist, create it and register callback to get last value in global Gauge store. - if _, ok := o.gaugeInstruments.Load(k); !ok { + o.mutex.Lock() + defer o.mutex.Unlock() + + // If instrument does not exist, create it and register callback to emit last value in global Gauge store. + if _, ok := o.gaugeInstruments[k]; !ok { inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - o.gaugeInstruments.Store(k, &inst) + o.gaugeInstruments[k] = &inst } } // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key, labels) - var inst *instrument.Float64Histogram - v, ok := o.histogramInstruments.Load(k) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.histogramInstruments[k] if !ok { - v, err := (*o.meter).Float64Histogram(k) + histogram, err := (*o.meter).Float64Histogram(k) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &v - o.histogramInstruments.Store(k, v) - } else { - inst = v.(*instrument.Float64Histogram) + inst = &histogram + o.histogramInstruments[k] = inst } attrs := toAttributes(labels) @@ -135,18 +177,20 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key, labels) - var inst *instrument.Float64Counter - v, ok := o.histogramInstruments.Load(k) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.counterInstruments[k] if !ok { - v, err := (*o.meter).Float64Counter(k) + counter, err := (*o.meter).Float64Counter(k) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &v - o.histogramInstruments.Store(k, v) - } else { - inst = v.(*instrument.Float64Counter) + + inst = &counter + o.counterInstruments[k] = inst } attrs := toAttributes(labels) @@ -185,9 +229,8 @@ func gaugeCallback(key string) instrument.Float64Callback { // Closures keep a reference to the key string, so we don't have to worry about it. // These get garbage collected as the closure completes. return func(_ context.Context, obs instrument.Float64Observer) error { - if val, ok := gauges.LoadAndDelete(key); ok { - v := val.(*GaugeValue) - obs.Observe(v.Value, v.Labels...) + if gauge, ok := gauges.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, gauge.Attributes...) } return nil } From 83fba0a9bc530e6976ecb2f5f492af5a7c74f6fb Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 15:24:04 -0400 Subject: [PATCH 032/100] Add gauge store --- agent/hcp/telemetry/gauge_store.go | 75 +++++++++++++++++++++++++ agent/hcp/telemetry/gauge_store_test.go | 61 ++++++++++++++++++++ agent/hcp/telemetry/otel_sink.go | 57 +++---------------- 3 files changed, 144 insertions(+), 49 deletions(-) create mode 100644 agent/hcp/telemetry/gauge_store.go create mode 100644 agent/hcp/telemetry/gauge_store_test.go diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go new file mode 100644 index 000000000000..082a7edd78b3 --- /dev/null +++ b/agent/hcp/telemetry/gauge_store.go @@ -0,0 +1,75 @@ +package telemetry + +import ( + "sync" + + "go.opentelemetry.io/otel/attribute" +) + +// Global store for Gauge values as workaround for async OpenTelemetry Gauge instrument. +var once sync.Once +var globalGauges *gaugeStore + +type gaugeStore struct { + store map[string]*gaugeValue + mutex sync.Mutex +} + +// gaugeValues hold both the float64 value and the labels. +type gaugeValue struct { + Value float64 + Attributes []attribute.KeyValue +} + +// initGaugeStore initializes the global gauge store. +// initGaugeStore not thread-safe so it must only be init once. +func initGaugeStore() { + // Avoid double initialization with sync.Once + once.Do(func() { + if globalGauges != nil { + return + } + + globalGauges = &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } + }) +} + +// LoadAndDelete will read a Gauge value and delete it. +// Within the OTEL Gauge callbacks we must delete the value once we have read it +// to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. +// The store must be initialized before using this method. +func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { + if g == nil { + return nil, false + } + + g.mutex.Lock() + defer g.mutex.Unlock() + + gauge, ok := g.store[key] + + delete(g.store, key) + + return gauge, ok +} + +// Store adds a gaugeValue to the global gauge store. +// The store must be initialized before using this method. +func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { + if g == nil { + return + } + + g.mutex.Lock() + defer g.mutex.Unlock() + + gv := &gaugeValue{ + Value: float64(value), + Attributes: labels, + } + + g.store[key] = gv +} diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go new file mode 100644 index 000000000000..8fd7aa9dfcca --- /dev/null +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -0,0 +1,61 @@ +package telemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" +) + +func TestGaugeStore(t *testing.T) { + initGaugeStore() + + attributes := []attribute.KeyValue{ + { + Key: attribute.Key("test_key"), + Value: attribute.StringValue("test_value"), + }, + } + + globalGauges.Store("test", float64(1.23), attributes) + + // Should store a new gauge. + val, ok := globalGauges.LoadAndDelete("test") + require.True(t, ok) + require.Equal(t, val.Value, float64(1.23)) + require.Equal(t, val.Attributes, attributes) + + // Gauge with key "test" have been deleted. + val, ok = globalGauges.LoadAndDelete("test") + require.False(t, ok) + + globalGauges.Store("duplicate", float64(1.5), nil) + globalGauges.Store("duplicate", float64(6.7), nil) + + // Gauge with key "duplicate" should hold the latest (last seen) value. + val, ok = globalGauges.LoadAndDelete("duplicate") + require.True(t, ok) + require.Equal(t, val.Value, float64(6.7)) + + // Reset store + globalGauges = nil +} + +func TestGaugeStore_WithoutInit(t *testing.T) { + attributes := []attribute.KeyValue{ + { + Key: attribute.Key("test_key"), + Value: attribute.StringValue("test_value"), + }, + } + + // Should not store since store not init. + globalGauges.Store("test", float64(1.23), attributes) + val, ok := globalGauges.LoadAndDelete("test") + + require.False(t, ok) + require.Nil(t, val) + + // Reset store + globalGauges = nil +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 2fe208660bd0..eaae26704ae4 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -19,40 +19,6 @@ import ( "go.opentelemetry.io/otel/sdk/resource" ) -// Store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var gauges *GlobalGaugeStore - -type GaugeValue struct { - Value float64 - Attributes []attribute.KeyValue -} - -type GlobalGaugeStore struct { - store map[string]*GaugeValue - mutex sync.Mutex -} - -// LoadAndDelete will read a Gauge value and delete it. -// Within the Gauge callbacks we delete the value once we have registed it with OTEL to ensure -// we only emit a Gauge value once. -func (g *GlobalGaugeStore) LoadAndDelete(key string) (*GaugeValue, bool) { - g.mutex.Lock() - defer g.mutex.Unlock() - - gauge, ok := g.store[key] - - delete(g.store, key) - - return gauge, ok -} - -func (g *GlobalGaugeStore) Store(key string, gauge *GaugeValue) { - g.mutex.Lock() - defer g.mutex.Unlock() - - g.store[key] = gauge -} - type OTELSinkOpts struct { Reader otelsdk.Reader Logger hclog.Logger @@ -64,9 +30,8 @@ type OTELSink struct { logger hclog.Logger ctx context.Context - meterProvider *otelsdk.MeterProvider - meter *otelmetric.Meter - exportInterval time.Duration + meterProvider *otelsdk.MeterProvider + meter *otelmetric.Meter gaugeInstruments map[string]*instrument.Float64ObservableGauge counterInstruments map[string]*instrument.Float64Counter @@ -94,16 +59,14 @@ func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") // Init global gauge store. - gauges = &GlobalGaugeStore{ - store: make(map[string]*GaugeValue, 0), - mutex: sync.Mutex{}, - } + initGaugeStore() return &OTELSink{ - meterProvider: meterProvider, - meter: &meter, spaceReplacer: strings.NewReplacer(" ", "_"), + logger: opts.Logger.Named("otel_sink"), ctx: opts.Ctx, + meterProvider: meterProvider, + meter: &meter, mutex: sync.Mutex{}, gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), counterInstruments: make(map[string]*instrument.Float64Counter, 0), @@ -132,11 +95,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key, labels) // Set value in global Gauge store. - g := &GaugeValue{ - Value: float64(val), - Attributes: toAttributes(labels), - } - gauges.Store(k, g) + globalGauges.Store(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() @@ -229,7 +188,7 @@ func gaugeCallback(key string) instrument.Float64Callback { // Closures keep a reference to the key string, so we don't have to worry about it. // These get garbage collected as the closure completes. return func(_ context.Context, obs instrument.Float64Observer) error { - if gauge, ok := gauges.LoadAndDelete(key); ok { + if gauge, ok := globalGauges.LoadAndDelete(key); ok { obs.Observe(gauge.Value, gauge.Attributes...) } return nil From 520ba9f52a1a102286ec635a79d1c2e3922fe917 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 15:30:47 -0400 Subject: [PATCH 033/100] Clarify comments --- agent/hcp/telemetry/otel_sink.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index eaae26704ae4..72e019d160e5 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -169,6 +169,7 @@ func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { return buf.String() } +// toAttributes converts go metrics Labels into OTEL format []attributes.KeyValue func toAttributes(labels []gometrics.Label) []attribute.KeyValue { if len(labels) == 0 { return nil @@ -184,9 +185,10 @@ func toAttributes(labels []gometrics.Label) []attribute.KeyValue { return attrs } +// gaugeCallback returns a callback which gets called when metrics are collected for export. +// the callback obtains the gauge value from the global gauges. func gaugeCallback(key string) instrument.Float64Callback { - // Closures keep a reference to the key string, so we don't have to worry about it. - // These get garbage collected as the closure completes. + // Closures keep a reference to the key string, that get garbage collected when code completes. return func(_ context.Context, obs instrument.Float64Observer) error { if gauge, ok := globalGauges.LoadAndDelete(key); ok { obs.Observe(gauge.Value, gauge.Attributes...) From 190ef2a5f97b80be4dbc5e276cdb6d4ed8afe940 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 18:00:30 -0400 Subject: [PATCH 034/100] return concrete sink type --- agent/hcp/telemetry/otel_sink.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 72e019d160e5..05711d14a3e6 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -48,7 +48,7 @@ func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } -func NewOTELSink(opts *OTELSinkOpts) (gometrics.MetricSink, error) { +func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") } From 659a7ddcdb4e4a43f8c000b6f6e4a292d35c8074 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 23:25:27 -0400 Subject: [PATCH 035/100] Fix lint errors --- agent/hcp/telemetry/gauge_store.go | 2 +- agent/hcp/telemetry/gauge_store_test.go | 1 + agent/hcp/telemetry/otel_sink.go | 8 ++++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 082a7edd78b3..b070534c3897 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -67,7 +67,7 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu defer g.mutex.Unlock() gv := &gaugeValue{ - Value: float64(value), + Value: value, Attributes: labels, } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 8fd7aa9dfcca..62adf3c7af36 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -28,6 +28,7 @@ func TestGaugeStore(t *testing.T) { // Gauge with key "test" have been deleted. val, ok = globalGauges.LoadAndDelete("test") require.False(t, ok) + require.Nil(t, val) globalGauges.Store("duplicate", float64(1.5), nil) globalGauges.Store("duplicate", float64(6.7), nil) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 05711d14a3e6..d7b8071ab0ef 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -92,7 +92,7 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { // AddSampleWithLabels emits a Consul gauge metric that gets // registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) // Set value in global Gauge store. globalGauges.Store(k, float64(val), toAttributes(labels)) @@ -113,7 +113,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -135,7 +135,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -160,7 +160,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom func (o *OTELSink) EmitKey(key []string, val float32) {} // flattenKey key along with its labels. -func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { +func (o *OTELSink) flattenKey(parts []string) string { buf := &bytes.Buffer{} joined := strings.Join(parts, ".") From 9659a87634ea7375a941f5a29a60f0a2a905678a Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 15:58:56 -0400 Subject: [PATCH 036/100] Move gauge store to be within sink --- agent/hcp/telemetry/gauge_store.go | 42 +++++++++---------------- agent/hcp/telemetry/gauge_store_test.go | 28 +++-------------- agent/hcp/telemetry/otel_sink.go | 25 ++++++--------- 3 files changed, 28 insertions(+), 67 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index b070534c3897..7aa7edb23445 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -1,15 +1,13 @@ package telemetry import ( + "context" "sync" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric/instrument" ) -// Global store for Gauge values as workaround for async OpenTelemetry Gauge instrument. -var once sync.Once -var globalGauges *gaugeStore - type gaugeStore struct { store map[string]*gaugeValue mutex sync.Mutex @@ -21,31 +19,11 @@ type gaugeValue struct { Attributes []attribute.KeyValue } -// initGaugeStore initializes the global gauge store. -// initGaugeStore not thread-safe so it must only be init once. -func initGaugeStore() { - // Avoid double initialization with sync.Once - once.Do(func() { - if globalGauges != nil { - return - } - - globalGauges = &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, - } - }) -} - // LoadAndDelete will read a Gauge value and delete it. // Within the OTEL Gauge callbacks we must delete the value once we have read it // to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. // The store must be initialized before using this method. func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { - if g == nil { - return nil, false - } - g.mutex.Lock() defer g.mutex.Unlock() @@ -59,10 +37,6 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { // Store adds a gaugeValue to the global gauge store. // The store must be initialized before using this method. func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { - if g == nil { - return - } - g.mutex.Lock() defer g.mutex.Unlock() @@ -73,3 +47,15 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu g.store[key] = gv } + +// gaugeCallback returns a callback which gets called when metrics are collected for export. +// the callback obtains the gauge value from the global gauges. +func (g *gaugeStore) gaugeCallback(key string) instrument.Float64Callback { + // Closures keep a reference to the key string, that get garbage collected when code completes. + return func(_ context.Context, obs instrument.Float64Observer) error { + if gauge, ok := g.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, gauge.Attributes...) + } + return nil + } +} diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 62adf3c7af36..0703faedef86 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,6 +1,7 @@ package telemetry import ( + "sync" "testing" "github.com/stretchr/testify/require" @@ -8,7 +9,10 @@ import ( ) func TestGaugeStore(t *testing.T) { - initGaugeStore() + globalGauges := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } attributes := []attribute.KeyValue{ { @@ -37,26 +41,4 @@ func TestGaugeStore(t *testing.T) { val, ok = globalGauges.LoadAndDelete("duplicate") require.True(t, ok) require.Equal(t, val.Value, float64(6.7)) - - // Reset store - globalGauges = nil -} - -func TestGaugeStore_WithoutInit(t *testing.T) { - attributes := []attribute.KeyValue{ - { - Key: attribute.Key("test_key"), - Value: attribute.StringValue("test_value"), - }, - } - - // Should not store since store not init. - globalGauges.Store("test", float64(1.23), attributes) - val, ok := globalGauges.LoadAndDelete("test") - - require.False(t, ok) - require.Nil(t, val) - - // Reset store - globalGauges = nil } diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index d7b8071ab0ef..fbda518a6e0f 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -33,6 +33,8 @@ type OTELSink struct { meterProvider *otelsdk.MeterProvider meter *otelmetric.Meter + gaugeStore *gaugeStore + gaugeInstruments map[string]*instrument.Float64ObservableGauge counterInstruments map[string]*instrument.Float64Counter histogramInstruments map[string]*instrument.Float64Histogram @@ -58,8 +60,10 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") - // Init global gauge store. - initGaugeStore() + gs := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + mutex: sync.Mutex{}, + } return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), @@ -68,6 +72,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider: meterProvider, meter: &meter, mutex: sync.Mutex{}, + gaugeStore: gs, gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), counterInstruments: make(map[string]*instrument.Float64Counter, 0), histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), @@ -95,14 +100,14 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key) // Set value in global Gauge store. - globalGauges.Store(k, float64(val), toAttributes(labels)) + o.gaugeStore.Store(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { - inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return @@ -184,15 +189,3 @@ func toAttributes(labels []gometrics.Label) []attribute.KeyValue { return attrs } - -// gaugeCallback returns a callback which gets called when metrics are collected for export. -// the callback obtains the gauge value from the global gauges. -func gaugeCallback(key string) instrument.Float64Callback { - // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs instrument.Float64Observer) error { - if gauge, ok := globalGauges.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, gauge.Attributes...) - } - return nil - } -} From 80e01c7200998b828eb8f36605f6cd64f7902061 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 14:30:44 -0400 Subject: [PATCH 037/100] Use context.TODO,rebase and clenaup opts handling --- agent/hcp/telemetry/otel_sink.go | 20 +++++++++----------- agent/hcp/telemetry/otel_sink_test.go | 7 +++---- 2 files changed, 12 insertions(+), 15 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index fbda518a6e0f..b0fd58010d2d 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -22,13 +22,11 @@ import ( type OTELSinkOpts struct { Reader otelsdk.Reader Logger hclog.Logger - Ctx context.Context } type OTELSink struct { spaceReplacer *strings.Replacer logger hclog.Logger - ctx context.Context meterProvider *otelsdk.MeterProvider meter *otelmetric.Meter @@ -43,16 +41,17 @@ type OTELSink struct { } func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { - exporter := &OTELExporter{ - client: client, - endpoint: endpoint, - } + exporter := NewOTELExporter(client, endpoint) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { - if opts.Logger == nil || opts.Reader == nil || opts.Ctx == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts") + if opts.Logger == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Logger") + } + + if opts.Reader == nil { + return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Reader") } // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. @@ -68,7 +67,6 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), logger: opts.Logger.Named("otel_sink"), - ctx: opts.Ctx, meterProvider: meterProvider, meter: &meter, mutex: sync.Mutex{}, @@ -135,7 +133,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet } attrs := toAttributes(labels) - (*inst).Record(o.ctx, float64(val), attrs...) + (*inst).Record(context.TODO(), float64(val), attrs...) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -158,7 +156,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom } attrs := toAttributes(labels) - (*inst).Add(o.ctx, float64(val), attrs...) + (*inst).Add(context.TODO(), float64(val), attrs...) } // EmitKey unsupported. diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 5e1cfbc53b4f..4f631e678caf 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -114,14 +114,14 @@ func TestNewOTELSink(t *testing.T) { opts *OTELSinkOpts }{ "failsWithEmptyLogger": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Logger", opts: &OTELSinkOpts{ Logger: nil, Reader: metric.NewManualReader(), }, }, - "failsWithEmptyExporter": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + "failsWithEmptyReader": { + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Reader", opts: &OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: nil, @@ -149,7 +149,6 @@ func TestOTELSink(t *testing.T) { opts := &OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: reader, - Ctx: ctx, } sink, err := NewOTELSink(opts) From 7cbed580ff98d74527c95429301cd62762aec8dd Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:28:24 -0400 Subject: [PATCH 038/100] Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1 --- agent/hcp/telemetry/otel_sink_test.go | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 4f631e678caf..92a1bbc6316e 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -77,8 +77,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -93,8 +93,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: attrs, Count: 1, @@ -203,9 +203,9 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") isSameData(t, expectedData.DataPoints, actualData.DataPoints) - case metricdata.Histogram[float64]: - actualData, ok := actual.Data.(metricdata.Histogram[float64]) - require.True(t, ok, "different metric types: expected metricdata.Histogram[float64]") + case metricdata.Histogram: + actualData, ok := actual.Data.(metricdata.Histogram) + require.True(t, ok, "different metric types: expected metricdata.Histogram") isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) } @@ -228,7 +228,7 @@ func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual [ } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Only verify the value and the attributes. From 91fcfc7639d3329b8957472d4eaacf43121a4c7f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:39:53 -0400 Subject: [PATCH 039/100] Fix imports --- agent/hcp/telemetry/otel_sink.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index b0fd58010d2d..124a5a1f5af1 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -9,9 +9,9 @@ import ( "time" "github.com/hashicorp/consul/agent/hcp/client" - "github.com/hashicorp/go-hclog" gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" otelmetric "go.opentelemetry.io/otel/metric" "go.opentelemetry.io/otel/metric/instrument" From 48d69e3477882db49d500211a0c912149e8b8809 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 09:58:29 -0400 Subject: [PATCH 040/100] Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx --- agent/hcp/telemetry/gauge_store.go | 8 ++-- agent/hcp/telemetry/gauge_store_test.go | 2 - agent/hcp/telemetry/otel_sink.go | 49 ++++++++++++------------- agent/hcp/telemetry/otel_sink_test.go | 29 +++++++-------- go.mod | 2 +- 5 files changed, 42 insertions(+), 48 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 7aa7edb23445..972c2683263b 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -5,7 +5,7 @@ import ( "sync" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric/instrument" + "go.opentelemetry.io/otel/metric" ) type gaugeStore struct { @@ -50,11 +50,11 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu // gaugeCallback returns a callback which gets called when metrics are collected for export. // the callback obtains the gauge value from the global gauges. -func (g *gaugeStore) gaugeCallback(key string) instrument.Float64Callback { +func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs instrument.Float64Observer) error { + return func(_ context.Context, obs metric.Float64Observer) error { if gauge, ok := g.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, gauge.Attributes...) + obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) } return nil } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 0703faedef86..d4c4eb38d00e 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,7 +1,6 @@ package telemetry import ( - "sync" "testing" "github.com/stretchr/testify/require" @@ -11,7 +10,6 @@ import ( func TestGaugeStore(t *testing.T) { globalGauges := &gaugeStore{ store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, } attributes := []attribute.KeyValue{ diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 124a5a1f5af1..ac184f45e864 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -4,24 +4,25 @@ import ( "bytes" "context" "fmt" + "net/url" "strings" "sync" "time" - "github.com/hashicorp/consul/agent/hcp/client" - gometrics "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" otelmetric "go.opentelemetry.io/otel/metric" - "go.opentelemetry.io/otel/metric/instrument" otelsdk "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" + + "github.com/hashicorp/consul/agent/hcp/client" ) type OTELSinkOpts struct { Reader otelsdk.Reader - Logger hclog.Logger + Ctx context.Context } type OTELSink struct { @@ -33,25 +34,25 @@ type OTELSink struct { gaugeStore *gaugeStore - gaugeInstruments map[string]*instrument.Float64ObservableGauge - counterInstruments map[string]*instrument.Float64Counter - histogramInstruments map[string]*instrument.Float64Histogram + gaugeInstruments map[string]metric.Float64ObservableGauge + counterInstruments map[string]metric.Float64Counter + histogramInstruments map[string]metric.Float64Histogram mutex sync.Mutex } -func NewOTELReader(client client.MetricsClient, endpoint string, exportInterval time.Duration) otelsdk.Reader { - exporter := NewOTELExporter(client, endpoint) +func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { + exporter := NewOTELExporter(client, url) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { - if opts.Logger == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Logger") + if opts.Reader == nil { + return nil, fmt.Errorf("ferror: provide valid reader") } - if opts.Reader == nil { - return nil, fmt.Errorf("failed to init OTEL sink: provide valid OTELSinkOpts Reader") + if opts.Ctx == nil { + return nil, fmt.Errorf("ferror: provide valid context") } // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. @@ -61,19 +62,17 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { gs := &gaugeStore{ store: make(map[string]*gaugeValue, 0), - mutex: sync.Mutex{}, } return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), - logger: opts.Logger.Named("otel_sink"), + logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), meterProvider: meterProvider, meter: &meter, - mutex: sync.Mutex{}, gaugeStore: gs, - gaugeInstruments: make(map[string]*instrument.Float64ObservableGauge, 0), - counterInstruments: make(map[string]*instrument.Float64Counter, 0), - histogramInstruments: make(map[string]*instrument.Float64Histogram, 0), + gaugeInstruments: make(map[string]metric.Float64ObservableGauge, 0), + counterInstruments: make(map[string]metric.Float64Counter, 0), + histogramInstruments: make(map[string]metric.Float64Histogram, 0), }, nil } @@ -105,12 +104,12 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { - inst, err := (*o.meter).Float64ObservableGauge(k, instrument.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return } - o.gaugeInstruments[k] = &inst + o.gaugeInstruments[k] = inst } } @@ -128,12 +127,12 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet o.logger.Error("Failed to emit gauge: %w", err) return } - inst = &histogram + inst = histogram o.histogramInstruments[k] = inst } attrs := toAttributes(labels) - (*inst).Record(context.TODO(), float64(val), attrs...) + inst.Record(context.TODO(), float64(val), metric.WithAttributes(attrs...)) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -151,12 +150,12 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom return } - inst = &counter + inst = counter o.counterInstruments[k] = inst } attrs := toAttributes(labels) - (*inst).Add(context.TODO(), float64(val), attrs...) + inst.Add(context.TODO(), float64(val), metric.WithAttributes(attrs...)) } // EmitKey unsupported. diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 92a1bbc6316e..252a53d033a7 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,11 +2,9 @@ package telemetry import ( "context" - "io" "testing" gometrics "github.com/armon/go-metrics" - "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/sdk/metric" @@ -20,7 +18,7 @@ var ( Value: attribute.StringValue("test"), }) - expectedMetrics = map[string]metricdata.Metrics{ + expectedSinkMetrics = map[string]metricdata.Metrics{ "consul.raft.leader": { Name: "consul.raft.leader", Description: "", @@ -77,8 +75,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -93,8 +91,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: attrs, Count: 1, @@ -114,17 +112,16 @@ func TestNewOTELSink(t *testing.T) { opts *OTELSinkOpts }{ "failsWithEmptyLogger": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Logger", + wantErr: "ferror: provide valid context", opts: &OTELSinkOpts{ - Logger: nil, Reader: metric.NewManualReader(), }, }, "failsWithEmptyReader": { - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts Reader", + wantErr: "ferror: provide valid reader", opts: &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: nil, + Ctx: context.Background(), }, }, } { @@ -147,8 +144,8 @@ func TestOTELSink(t *testing.T) { ctx := context.Background() opts := &OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: reader, + Ctx: context.Background(), } sink, err := NewOTELSink(opts) @@ -180,7 +177,7 @@ func TestOTELSink(t *testing.T) { // Validate metrics for _, actual := range collected.ScopeMetrics[0].Metrics { name := actual.Name - expected, ok := expectedMetrics[name] + expected, ok := expectedSinkMetrics[name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } @@ -203,8 +200,8 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") isSameData(t, expectedData.DataPoints, actualData.DataPoints) - case metricdata.Histogram: - actualData, ok := actual.Data.(metricdata.Histogram) + case metricdata.Histogram[float64]: + actualData, ok := actual.Data.(metricdata.Histogram[float64]) require.True(t, ok, "different metric types: expected metricdata.Histogram") isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) @@ -228,7 +225,7 @@ func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual [ } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Only verify the value and the attributes. diff --git a/go.mod b/go.mod index ffafa5157295..a079e511b5d2 100644 --- a/go.mod +++ b/go.mod @@ -97,6 +97,7 @@ require ( github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/metric v0.38.1 go.opentelemetry.io/otel/sdk v1.15.1 go.opentelemetry.io/otel/sdk/metric v0.38.1 go.opentelemetry.io/proto/otlp v0.19.0 @@ -232,7 +233,6 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v0.38.1 // indirect go.opentelemetry.io/otel/trace v1.15.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect From 563330e35558b5fefc1435d113b802cb5897e7c5 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 10:36:37 -0400 Subject: [PATCH 041/100] Add lots of documentation to the OTELSink --- agent/hcp/telemetry/otel_sink.go | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ac184f45e864..5bdeb1b96e30 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -25,27 +25,50 @@ type OTELSinkOpts struct { Ctx context.Context } +// OTELSink captures and aggregates telemetry data as per the OpenTelemetry (OTEL) specification. +// Metric data is exported in OpenTelemetry Protocol (OTLP) wire format. +// This should be used as a Go Metrics backend, as it implements the MetricsSink interface. type OTELSink struct { + // spaceReplacer cleans the flattened key by removing any spaces. spaceReplacer *strings.Replacer logger hclog.Logger + // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. + // It handles reading/export of aggregated metric data. + // It enables creation and usage of an OTEL Meter. meterProvider *otelsdk.MeterProvider - meter *otelmetric.Meter - gaugeStore *gaugeStore + // meter is an OTEL Meter, which enables the creation of OTEL instruments. + meter *otelmetric.Meter + // Instrument stores contain an OTEL Instrument per metric name () + // for each gauge, counter and histogram types. + // An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics. + // We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements. gaugeInstruments map[string]metric.Float64ObservableGauge counterInstruments map[string]metric.Float64Counter histogramInstruments map[string]metric.Float64Histogram + // gaugeStore is required to hold last-seen values of gauges + // This is a workaround, as OTEL currently does not have synchronous gauge instruments. + // It only allows the registration of "callbacks", which obtain values when the callback is called. + // We must hold gauge values until the callback is called, when the measurement is exported, and can be removed. + gaugeStore *gaugeStore + mutex sync.Mutex } +// NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds. +// It configures the reader with a custom OTELExporter with a MetricsClient to transform and export +// metrics in OTLP format to an external url. func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { exporter := NewOTELExporter(client, url) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } +// NewOTELSink returns a sink which fits the Go Metrics MetricsSink interface. +// It sets up a MeterProvider and Meter, key pieces of the OTEL Metrics SDK which +// enable us to create OTEL Instruments to record measurements. func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { if opts.Reader == nil { return nil, fmt.Errorf("ferror: provide valid reader") @@ -55,7 +78,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { return nil, fmt.Errorf("ferror: provide valid context") } - // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. + // Setup OTEL Metrics SDK to aggregate, convert and export metrics. res := resource.NewSchemaless() meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") @@ -104,6 +127,9 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // If instrument does not exist, create it and register callback to emit last value in global Gauge store. if _, ok := o.gaugeInstruments[k]; !ok { + // The registration of a callback only needs to happen once, when the instrument is created. + // The callback will be triggered every export cycle for that metric. + // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) From b98481d860fd6ecb20befc75b8c61e3f7774a1f3 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 10:51:34 -0400 Subject: [PATCH 042/100] Fix gauge store comment and check ok --- agent/hcp/telemetry/gauge_store.go | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 972c2683263b..07faf9fb1c8b 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -8,26 +8,32 @@ import ( "go.opentelemetry.io/otel/metric" ) +// gaugeStore holds last seen Gauge values for a particular metric () in the store. +// OTEL does not currently have a synchronous Gauge instrument. Instead, it allows the registration of callbacks. +// The callbacks are called during export, where the Gauge value must be returned. +// This store is a workaround, which holds last seen Gauge values until the callback is called. type gaugeStore struct { store map[string]*gaugeValue mutex sync.Mutex } -// gaugeValues hold both the float64 value and the labels. +// gaugeValues are the last seen measurement for a Gauge metric, which contains a float64 value and labels. type gaugeValue struct { Value float64 Attributes []attribute.KeyValue } // LoadAndDelete will read a Gauge value and delete it. -// Within the OTEL Gauge callbacks we must delete the value once we have read it -// to ensure we only emit a Gauge value once, as the callbacks continue to execute every collection cycle. -// The store must be initialized before using this method. +// Once registered for a metric name, a Gauge callback will continue to execute every collection cycel. +// We must delete the value once we have read it, to avoid repeat values being sent. func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { g.mutex.Lock() defer g.mutex.Unlock() gauge, ok := g.store[key] + if !ok { + return nil, ok + } delete(g.store, key) @@ -35,7 +41,6 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { } // Store adds a gaugeValue to the global gauge store. -// The store must be initialized before using this method. func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { g.mutex.Lock() defer g.mutex.Unlock() @@ -49,7 +54,6 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu } // gaugeCallback returns a callback which gets called when metrics are collected for export. -// the callback obtains the gauge value from the global gauges. func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. return func(_ context.Context, obs metric.Float64Observer) error { From 0162bb6139e5f5f5ed076f8bfb55d290cc121879 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 12:06:35 -0400 Subject: [PATCH 043/100] Add select and ctx.Done() check to gauge callback --- agent/hcp/telemetry/gauge_store.go | 13 +++++++++---- agent/hcp/telemetry/gauge_store_test.go | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 07faf9fb1c8b..61feedc3c4e4 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -56,10 +56,15 @@ func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValu // gaugeCallback returns a callback which gets called when metrics are collected for export. func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { // Closures keep a reference to the key string, that get garbage collected when code completes. - return func(_ context.Context, obs metric.Float64Observer) error { - if gauge, ok := g.LoadAndDelete(key); ok { - obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) + return func(ctx context.Context, obs metric.Float64Observer) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + if gauge, ok := g.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) + } + return nil } - return nil } } diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index d4c4eb38d00e..b8c332c56c5f 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -1,6 +1,7 @@ package telemetry import ( + "context" "testing" "github.com/stretchr/testify/require" @@ -40,3 +41,18 @@ func TestGaugeStore(t *testing.T) { require.True(t, ok) require.Equal(t, val.Value, float64(6.7)) } + +func TestGaugeCallback_Failure(t *testing.T) { + k := "consul.raft.apply" + globalGauges := &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + } + globalGauges.Store(k, 1.23, nil) + + cb := globalGauges.gaugeCallback(k) + ctx, cancel := context.WithCancel(context.Background()) + + cancel() + err := cb(ctx, nil) + require.ErrorIs(t, err, context.Canceled) +} From 899dbbaf72744fccd9fefd85565bf32bc63b8e33 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 12:13:22 -0400 Subject: [PATCH 044/100] use require.Equal for attributes --- agent/hcp/telemetry/otel_sink_test.go | 50 +++++++++++++++++---------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 252a53d033a7..b9bf819251d5 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "sort" "testing" gometrics "github.com/armon/go-metrics" @@ -174,16 +175,22 @@ func TestOTELSink(t *testing.T) { // Validate resource require.Equal(t, resource.NewSchemaless(), collected.Resource) - // Validate metrics - for _, actual := range collected.ScopeMetrics[0].Metrics { + // Validate Metrics + require.NotEmpty(t, collected.ScopeMetrics) + actualMetrics := collected.ScopeMetrics[0].Metrics + require.Equal(t, len(actualMetrics), len(expectedSinkMetrics)) + + for _, actual := range actualMetrics { name := actual.Name - expected, ok := expectedSinkMetrics[name] + expected, ok := expectedSinkMetrics[actual.Name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } } // compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. +// test metrics should not contain duplicate sums for histograms nor duplicate values for counters/gauges +// to ensure predictable order of data. func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { require.Equal(t, expected.Name, actual.Name, "different .Name field") require.Equal(t, expected.Description, actual.Description, "different .Description field") @@ -211,23 +218,35 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different datapoints length") - // Only verify the value and the attributes. + // Sort for predictable data in order of lowest value. + // Test cases should not contain duplicate values. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + + // Only verify the value and attributes. for i, dp := range expected { currActual := actual[i] require.Equal(t, dp.Value, currActual.Value, "different datapoint value") - require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") - - iter := dp.Attributes.Iter() - for iter.Next() { - attr := iter.Attribute() - require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") - } + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") } } func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") + // Sort for predictable data in order of lowest sum. + // Test cases should not contain duplicate sums. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + // Only verify the value and the attributes. for i, dp := range expected { currActual := actual[i] @@ -235,13 +254,6 @@ func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[ require.Equal(t, dp.Max, currActual.Max, "different histogram datapoint .Max value") require.Equal(t, dp.Min, currActual.Min, "different histogram datapoint .Min value") require.Equal(t, dp.Count, currActual.Count, "different histogram datapoint .Count value") - - require.Equal(t, dp.Attributes.Len(), currActual.Attributes.Len(), "different attributes of datapoint length") - - iter := dp.Attributes.Iter() - for iter.Next() { - attr := iter.Attribute() - require.True(t, currActual.Attributes.HasValue(attr.Key), "missing attribute in expected") - } + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") } } From 542d23a76e4ec6cac4a5deaed093ed661805fab9 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 13:16:39 -0400 Subject: [PATCH 045/100] Fixed import naming --- agent/hcp/telemetry/otel_sink.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 5bdeb1b96e30..4a9f40c3e4bc 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -12,7 +12,6 @@ import ( gometrics "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/metric" otelmetric "go.opentelemetry.io/otel/metric" otelsdk "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" @@ -45,9 +44,9 @@ type OTELSink struct { // for each gauge, counter and histogram types. // An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics. // We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements. - gaugeInstruments map[string]metric.Float64ObservableGauge - counterInstruments map[string]metric.Float64Counter - histogramInstruments map[string]metric.Float64Histogram + gaugeInstruments map[string]otelmetric.Float64ObservableGauge + counterInstruments map[string]otelmetric.Float64Counter + histogramInstruments map[string]otelmetric.Float64Histogram // gaugeStore is required to hold last-seen values of gauges // This is a workaround, as OTEL currently does not have synchronous gauge instruments. @@ -93,9 +92,9 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider: meterProvider, meter: &meter, gaugeStore: gs, - gaugeInstruments: make(map[string]metric.Float64ObservableGauge, 0), - counterInstruments: make(map[string]metric.Float64Counter, 0), - histogramInstruments: make(map[string]metric.Float64Histogram, 0), + gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0), + counterInstruments: make(map[string]otelmetric.Float64Counter, 0), + histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0), }, nil } @@ -130,7 +129,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // The registration of a callback only needs to happen once, when the instrument is created. // The callback will be triggered every export cycle for that metric. // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. - inst, err := (*o.meter).Float64ObservableGauge(k, metric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) + inst, err := (*o.meter).Float64ObservableGauge(k, otelmetric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { o.logger.Error("Failed to emit gauge: %w", err) return @@ -158,7 +157,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet } attrs := toAttributes(labels) - inst.Record(context.TODO(), float64(val), metric.WithAttributes(attrs...)) + inst.Record(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) } // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. @@ -181,7 +180,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom } attrs := toAttributes(labels) - inst.Add(context.TODO(), float64(val), metric.WithAttributes(attrs...)) + inst.Add(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) } // EmitKey unsupported. From 2d8a18a9a513e2d506e38dcd9e1575fc9873a79f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 15:06:25 -0400 Subject: [PATCH 046/100] Remove float64 calls and add a NewGaugeStore method --- agent/hcp/telemetry/gauge_store.go | 7 +++++++ agent/hcp/telemetry/gauge_store_test.go | 22 +++++++++++----------- agent/hcp/telemetry/otel_sink.go | 6 +----- 3 files changed, 19 insertions(+), 16 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index 61feedc3c4e4..ce905dc72ca5 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -23,6 +23,13 @@ type gaugeValue struct { Attributes []attribute.KeyValue } +// NewGaugeStore returns an initialized empty gaugeStore. +func NewGaugeStore() *gaugeStore { + return &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + } +} + // LoadAndDelete will read a Gauge value and delete it. // Once registered for a metric name, a Gauge callback will continue to execute every collection cycel. // We must delete the value once we have read it, to avoid repeat values being sent. diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index b8c332c56c5f..34449e98d4f8 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -9,9 +9,9 @@ import ( ) func TestGaugeStore(t *testing.T) { - globalGauges := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } + t.Parallel() + + globalGauges := NewGaugeStore() attributes := []attribute.KeyValue{ { @@ -20,12 +20,12 @@ func TestGaugeStore(t *testing.T) { }, } - globalGauges.Store("test", float64(1.23), attributes) + globalGauges.Store("test", 1.23, attributes) // Should store a new gauge. val, ok := globalGauges.LoadAndDelete("test") require.True(t, ok) - require.Equal(t, val.Value, float64(1.23)) + require.Equal(t, val.Value, 1.23) require.Equal(t, val.Attributes, attributes) // Gauge with key "test" have been deleted. @@ -33,20 +33,20 @@ func TestGaugeStore(t *testing.T) { require.False(t, ok) require.Nil(t, val) - globalGauges.Store("duplicate", float64(1.5), nil) - globalGauges.Store("duplicate", float64(6.7), nil) + globalGauges.Store("duplicate", 1.5, nil) + globalGauges.Store("duplicate", 6.7, nil) // Gauge with key "duplicate" should hold the latest (last seen) value. val, ok = globalGauges.LoadAndDelete("duplicate") require.True(t, ok) - require.Equal(t, val.Value, float64(6.7)) + require.Equal(t, val.Value, 6.7) } func TestGaugeCallback_Failure(t *testing.T) { + t.Parallel() + k := "consul.raft.apply" - globalGauges := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } + globalGauges := NewGaugeStore() globalGauges.Store(k, 1.23, nil) cb := globalGauges.gaugeCallback(k) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 4a9f40c3e4bc..6f2f94583ff4 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -82,16 +82,12 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") - gs := &gaugeStore{ - store: make(map[string]*gaugeValue, 0), - } - return &OTELSink{ spaceReplacer: strings.NewReplacer(" ", "_"), logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), meterProvider: meterProvider, meter: &meter, - gaugeStore: gs, + gaugeStore: NewGaugeStore(), gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0), counterInstruments: make(map[string]otelmetric.Float64Counter, 0), histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0), From 5defe6a9ff93a48f11c1ec8bf3dbf5fc580f5f92 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 15:40:57 -0400 Subject: [PATCH 047/100] Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store --- agent/hcp/telemetry/gauge_store.go | 4 +-- agent/hcp/telemetry/gauge_store_test.go | 43 +++++++++++++++++++------ agent/hcp/telemetry/otel_sink.go | 2 +- agent/hcp/telemetry/otel_sink_test.go | 37 +++++++++++++++++---- 4 files changed, 66 insertions(+), 20 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go index ce905dc72ca5..76dfb7806668 100644 --- a/agent/hcp/telemetry/gauge_store.go +++ b/agent/hcp/telemetry/gauge_store.go @@ -47,8 +47,8 @@ func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { return gauge, ok } -// Store adds a gaugeValue to the global gauge store. -func (g *gaugeStore) Store(key string, value float64, labels []attribute.KeyValue) { +// Set adds a gaugeValue to the global gauge store. +func (g *gaugeStore) Set(key string, value float64, labels []attribute.KeyValue) { g.mutex.Lock() defer g.mutex.Unlock() diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 34449e98d4f8..167b2e06e1bf 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -2,6 +2,7 @@ package telemetry import ( "context" + "sync" "testing" "github.com/stretchr/testify/require" @@ -11,7 +12,7 @@ import ( func TestGaugeStore(t *testing.T) { t.Parallel() - globalGauges := NewGaugeStore() + gaugeStore := NewGaugeStore() attributes := []attribute.KeyValue{ { @@ -20,24 +21,24 @@ func TestGaugeStore(t *testing.T) { }, } - globalGauges.Store("test", 1.23, attributes) + gaugeStore.Set("test", 1.23, attributes) // Should store a new gauge. - val, ok := globalGauges.LoadAndDelete("test") + val, ok := gaugeStore.LoadAndDelete("test") require.True(t, ok) require.Equal(t, val.Value, 1.23) require.Equal(t, val.Attributes, attributes) // Gauge with key "test" have been deleted. - val, ok = globalGauges.LoadAndDelete("test") + val, ok = gaugeStore.LoadAndDelete("test") require.False(t, ok) require.Nil(t, val) - globalGauges.Store("duplicate", 1.5, nil) - globalGauges.Store("duplicate", 6.7, nil) + gaugeStore.Set("duplicate", 1.5, nil) + gaugeStore.Set("duplicate", 6.7, nil) // Gauge with key "duplicate" should hold the latest (last seen) value. - val, ok = globalGauges.LoadAndDelete("duplicate") + val, ok = gaugeStore.LoadAndDelete("duplicate") require.True(t, ok) require.Equal(t, val.Value, 6.7) } @@ -46,13 +47,35 @@ func TestGaugeCallback_Failure(t *testing.T) { t.Parallel() k := "consul.raft.apply" - globalGauges := NewGaugeStore() - globalGauges.Store(k, 1.23, nil) + gaugeStore := NewGaugeStore() + gaugeStore.Set(k, 1.23, nil) - cb := globalGauges.gaugeCallback(k) + cb := gaugeStore.gaugeCallback(k) ctx, cancel := context.WithCancel(context.Background()) cancel() err := cb(ctx, nil) require.ErrorIs(t, err, context.Canceled) } + +// TestGaugeStore_Race induces a race condition. When run with go test -race, +// this test should pass if implementation is concurrency safe. +func TestGaugeStore_Race(t *testing.T) { + t.Parallel() + + gaugeStore := NewGaugeStore() + wg := &sync.WaitGroup{} + for k, v := range map[string]float64{"consul.raft.apply": 23.23, "consul.raft.test": 14.3} { + wg.Add(1) + go storeAndRetrieve(t, k, v, gaugeStore, wg) + } + wg.Wait() +} + +func storeAndRetrieve(t *testing.T, k string, v float64, gaugeStore *gaugeStore, wg *sync.WaitGroup) { + gaugeStore.Set(k, v, nil) + gv, ok := gaugeStore.LoadAndDelete(k) + require.True(t, ok) + require.Equal(t, v, gv.Value) + wg.Done() +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 6f2f94583ff4..ec5e2d476b1a 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -115,7 +115,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr k := o.flattenKey(key) // Set value in global Gauge store. - o.gaugeStore.Store(k, float64(val), toAttributes(labels)) + o.gaugeStore.Set(k, float64(val), toAttributes(labels)) o.mutex.Lock() defer o.mutex.Unlock() diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index b9bf819251d5..6fd05514632b 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -3,6 +3,7 @@ package telemetry import ( "context" "sort" + "sync" "testing" gometrics "github.com/armon/go-metrics" @@ -108,6 +109,7 @@ var ( ) func TestNewOTELSink(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { wantErr string opts *OTELSinkOpts @@ -126,7 +128,9 @@ func TestNewOTELSink(t *testing.T) { }, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() sink, err := NewOTELSink(test.opts) if test.wantErr != "" { require.Error(t, err) @@ -139,14 +143,18 @@ func TestNewOTELSink(t *testing.T) { } } +// TestOTELSink performs concurrent metric instrument operations on the sink. +// When run with go test -race, this test should pass if implementation is concurrency safe. func TestOTELSink(t *testing.T) { + t.Parallel() + // Manual reader outputs the aggregated metrics when reader.Collect is called. reader := metric.NewManualReader() ctx := context.Background() opts := &OTELSinkOpts{ Reader: reader, - Ctx: context.Background(), + Ctx: ctx, } sink, err := NewOTELSink(opts) @@ -159,14 +167,29 @@ func TestOTELSink(t *testing.T) { }, } - sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg := &sync.WaitGroup{} + wg.Add(3) + + go func() { + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg.Done() + + }() + + go func() { + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + wg.Done() + }() - sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) - sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + go func() { + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + wg.Done() + }() - sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) - sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + wg.Wait() var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) From a893c320dd4ecb50710017f7e963caf0f4e69c87 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:22:14 -0400 Subject: [PATCH 048/100] Generate 100 gauge operations --- agent/hcp/telemetry/gauge_store_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 167b2e06e1bf..b450b6209f21 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -2,9 +2,12 @@ package telemetry import ( "context" + "fmt" + "math/rand" "sync" "testing" + "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" ) @@ -65,9 +68,13 @@ func TestGaugeStore_Race(t *testing.T) { gaugeStore := NewGaugeStore() wg := &sync.WaitGroup{} - for k, v := range map[string]float64{"consul.raft.apply": 23.23, "consul.raft.test": 14.3} { + for i := 0; i < 100; i++ { wg.Add(1) - go storeAndRetrieve(t, k, v, gaugeStore, wg) + v := rand.Float64() + uuid, err := uuid.GenerateUUID() + require.NoError(t, err) + + go storeAndRetrieve(t, fmt.Sprintf("%s%f", uuid, v), v, gaugeStore, wg) } wg.Wait() } From 9d5f5efb5d08b5348476a2593186da02cf4a4e7e Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:47:14 -0400 Subject: [PATCH 049/100] Seperate the labels into goroutines in sink test --- agent/hcp/telemetry/otel_sink_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 6fd05514632b..6b88bbb7aa3c 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -168,23 +168,35 @@ func TestOTELSink(t *testing.T) { } wg := &sync.WaitGroup{} - wg.Add(3) + wg.Add(6) go func() { sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) wg.Done() }() + go func() { + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + wg.Done() + }() + go func() { sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + wg.Done() + }() + + go func() { sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) wg.Done() }() go func() { sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + wg.Done() + }() + + go func() { sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) wg.Done() }() From 80a534bfcc6cf3764e61ad717ca17bb40a68ffc2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 11 May 2023 16:55:01 -0400 Subject: [PATCH 050/100] Generate kv store for the test case keys to avoid using uuid --- agent/hcp/telemetry/gauge_store_test.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index b450b6209f21..5ed5eadb8fd4 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -7,7 +7,6 @@ import ( "sync" "testing" - "github.com/hashicorp/go-uuid" "github.com/stretchr/testify/require" "go.opentelemetry.io/otel/attribute" ) @@ -68,14 +67,14 @@ func TestGaugeStore_Race(t *testing.T) { gaugeStore := NewGaugeStore() wg := &sync.WaitGroup{} - for i := 0; i < 100; i++ { - wg.Add(1) + samples := 100 + for i := 0; i < samples; i++ { + k := fmt.Sprintf("consul.test.%d", i) v := rand.Float64() - uuid, err := uuid.GenerateUUID() - require.NoError(t, err) - - go storeAndRetrieve(t, fmt.Sprintf("%s%f", uuid, v), v, gaugeStore, wg) + wg.Add(1) + go storeAndRetrieve(t, k, v, gaugeStore, wg) } + wg.Wait() } From aa2a97136c9b67403c200dc026a48005c2c6d0e6 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 12 May 2023 10:55:17 -0400 Subject: [PATCH 051/100] Added a race test with 300 samples for OTELSink --- agent/hcp/telemetry/otel_sink_test.go | 166 +++++++++++++++++++------- 1 file changed, 126 insertions(+), 40 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 6b88bbb7aa3c..1e60d4ad26e8 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -2,7 +2,10 @@ package telemetry import ( "context" + "fmt" + "math/rand" "sort" + "strings" "sync" "testing" @@ -143,8 +146,6 @@ func TestNewOTELSink(t *testing.T) { } } -// TestOTELSink performs concurrent metric instrument operations on the sink. -// When run with go test -race, this test should pass if implementation is concurrency safe. func TestOTELSink(t *testing.T) { t.Parallel() @@ -167,65 +168,152 @@ func TestOTELSink(t *testing.T) { }, } - wg := &sync.WaitGroup{} - wg.Add(6) + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) - go func() { - sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) - wg.Done() + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) - }() + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) - go func() { - sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) - wg.Done() - }() + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) - go func() { - sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) - wg.Done() - }() + isSame(t, expectedSinkMetrics, collected) +} - go func() { - sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) - wg.Done() - }() +func TestOTELSink_Race(t *testing.T) { + reader := metric.NewManualReader() + ctx := context.Background() + opts := &OTELSinkOpts{ + Ctx: ctx, + Reader: reader, + } - go func() { - sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) - wg.Done() - }() + sink, err := NewOTELSink(opts) + require.NoError(t, err) - go func() { - sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) - wg.Done() - }() + expectedMetrics := generateSamples(100) + wg := &sync.WaitGroup{} + for k, v := range expectedMetrics { + wg.Add(1) + go performSinkOperation(t, sink, k, v, wg) + } wg.Wait() var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) require.NoError(t, err) + isSame(t, expectedMetrics, collected) +} + +// generateSamples generates n of each gauges, counter and histogram measurements to use for test purposes. +func generateSamples(n int) map[string]metricdata.Metrics { + generated := make(map[string]metricdata.Metrics, 3*n) + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.gauges.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + } + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.sum.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + + } + + for i := 0; i < n; i++ { + v := rand.Float64() + k := fmt.Sprintf("consul.test.hist.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Sum: float64(float32(v)), + Max: metricdata.NewExtrema(float64(float32(v))), + Min: metricdata.NewExtrema(float64(float32(v))), + Count: 1, + }, + }, + }, + } + } + + return generated +} + +// performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. +func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, wg *sync.WaitGroup) { + key := strings.Split(k, ".") + data := v.Data + switch data.(type) { + case metricdata.Gauge[float64]: + gauge, ok := data.(metricdata.Gauge[float64]) + require.True(t, ok) + + sink.SetGauge(key, float32(gauge.DataPoints[0].Value)) + case metricdata.Sum[float64]: + sum, ok := data.(metricdata.Sum[float64]) + require.True(t, ok) + + sink.IncrCounter(key, float32(sum.DataPoints[0].Value)) + case metricdata.Histogram[float64]: + hist, ok := data.(metricdata.Histogram[float64]) + require.True(t, ok) + + sink.AddSample(key, float32(hist.DataPoints[0].Sum)) + } + + wg.Done() +} + +func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { // Validate resource - require.Equal(t, resource.NewSchemaless(), collected.Resource) + require.Equal(t, resource.NewSchemaless(), actual.Resource) // Validate Metrics - require.NotEmpty(t, collected.ScopeMetrics) - actualMetrics := collected.ScopeMetrics[0].Metrics - require.Equal(t, len(actualMetrics), len(expectedSinkMetrics)) + require.NotEmpty(t, actual.ScopeMetrics) + actualMetrics := actual.ScopeMetrics[0].Metrics + require.Equal(t, len(expectedMap), len(actualMetrics)) for _, actual := range actualMetrics { name := actual.Name - expected, ok := expectedSinkMetrics[actual.Name] + expected, ok := expectedMap[actual.Name] require.True(t, ok, "metric key %s should be in expectedMetrics map", name) isSameMetrics(t, expected, actual) } } // compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. -// test metrics should not contain duplicate sums for histograms nor duplicate values for counters/gauges -// to ensure predictable order of data. +// avoid duplicate datapoint values to ensure predictable order of sort. func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { require.Equal(t, expected.Name, actual.Name, "different .Name field") require.Equal(t, expected.Description, actual.Description, "different .Description field") @@ -236,12 +324,12 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. actualData, ok := actual.Data.(metricdata.Gauge[float64]) require.True(t, ok, "different metric types: expected metricdata.Gauge[float64]") - isSameData(t, expectedData.DataPoints, actualData.DataPoints) + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) case metricdata.Sum[float64]: actualData, ok := actual.Data.(metricdata.Sum[float64]) require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") - isSameData(t, expectedData.DataPoints, actualData.DataPoints) + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) case metricdata.Histogram[float64]: actualData, ok := actual.Data.(metricdata.Histogram[float64]) require.True(t, ok, "different metric types: expected metricdata.Histogram") @@ -250,11 +338,10 @@ func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata. } } -func isSameData(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { +func isSameDataPoint(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different datapoints length") // Sort for predictable data in order of lowest value. - // Test cases should not contain duplicate values. sort.Slice(expected, func(i, j int) bool { return expected[i].Value < expected[j].Value }) @@ -274,7 +361,6 @@ func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[ require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Sort for predictable data in order of lowest sum. - // Test cases should not contain duplicate sums. sort.Slice(expected, func(i, j int) bool { return expected[i].Sum < expected[j].Sum }) From 91f9a891add98d779229d6381110d96a624b24fe Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Fri, 12 May 2023 13:08:38 -0400 Subject: [PATCH 052/100] [HCP Observability] OTELExporter (#17128) * Client configured with TLS using HCP config and retry/throttle * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * Client configured with TLS using HCP config and retry/throttle * run go mod tidy * Create new OTELExporter which uses the MetricsClient Add transform because the conversion is in an /internal package * Fix lint error * early return when there are no metrics * Add NewOTELExporter() function * Downgrade to metrics SDK version: v1.15.0-rc.1 * Fix imports * fix small nits with comments and url.URL * Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile * Cleanup error handling and clarify empty metrics case * Fix input/expected naming in otel_transform_test.go * add comment for metric tracking * Add a general isEmpty method * Add clear error types * update to latest version 1.15.0 of OTEL --- agent/hcp/telemetry/doc.go | 12 + agent/hcp/telemetry/otel_exporter.go | 72 ++++++ agent/hcp/telemetry/otel_exporter_test.go | 140 +++++++++++ agent/hcp/telemetry/otlp_transform.go | 185 ++++++++++++++ agent/hcp/telemetry/otlp_transform_test.go | 275 +++++++++++++++++++++ go.mod | 11 +- go.sum | 22 +- 7 files changed, 712 insertions(+), 5 deletions(-) create mode 100644 agent/hcp/telemetry/doc.go create mode 100644 agent/hcp/telemetry/otel_exporter.go create mode 100644 agent/hcp/telemetry/otel_exporter_test.go create mode 100644 agent/hcp/telemetry/otlp_transform.go create mode 100644 agent/hcp/telemetry/otlp_transform_test.go diff --git a/agent/hcp/telemetry/doc.go b/agent/hcp/telemetry/doc.go new file mode 100644 index 000000000000..4ef18f39bd30 --- /dev/null +++ b/agent/hcp/telemetry/doc.go @@ -0,0 +1,12 @@ +// Package telemetry implements functionality to collect, aggregate, convert and export +// telemetry data in OpenTelemetry Protocol (OTLP) format. +// +// The entrypoint is the OpenTelemetry (OTEL) go-metrics sink which: +// - Receives metric data. +// - Aggregates metric data using the OTEL Go Metrics SDK. +// - Exports metric data using a configurable OTEL exporter. +// +// The package also provides an OTEL exporter implementation to be used within the sink, which: +// - Transforms metric data from the Metrics SDK OTEL representation to OTLP format. +// - Exports OTLP metric data to an external endpoint using a configurable client. +package telemetry diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go new file mode 100644 index 000000000000..02f6a07f8371 --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter.go @@ -0,0 +1,72 @@ +package telemetry + +import ( + "context" + "net/url" + + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + + hcpclient "github.com/hashicorp/consul/agent/hcp/client" +) + +// OTELExporter is a custom implementation of a OTEL Metrics SDK metrics.Exporter. +// The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. +// This allows us to use a custom client - HCP authenticated MetricsClient. +type OTELExporter struct { + client hcpclient.MetricsClient + url url.URL +} + +// NewOTELExporter returns a configured OTELExporter +func NewOTELExporter(client hcpclient.MetricsClient, url url.URL) *OTELExporter { + return &OTELExporter{ + client: client, + url: url, + } +} + +// Temporality returns the Cumulative temporality for metrics aggregation. +// Telemetry Gateway stores metrics in Prometheus format, so use Cummulative aggregation as default. +func (e *OTELExporter) Temporality(_ metric.InstrumentKind) metricdata.Temporality { + return metricdata.CumulativeTemporality +} + +// Aggregation returns the Aggregation to use for an instrument kind. +// The default implementation provided by the OTEL Metrics SDK library DefaultAggregationSelector panics. +// This custom version replicates that logic, but removes the panic. +func (e *OTELExporter) Aggregation(kind metric.InstrumentKind) aggregation.Aggregation { + switch kind { + case metric.InstrumentKindObservableGauge: + return aggregation.LastValue{} + case metric.InstrumentKindHistogram: + return aggregation.ExplicitBucketHistogram{ + Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, + NoMinMax: false, + } + } + // for metric.InstrumentKindCounter and others, default to sum. + return aggregation.Sum{} +} + +// Export serializes and transmits metric data to a receiver. +func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceMetrics) error { + otlpMetrics := transformOTLP(metrics) + if isEmpty(otlpMetrics) { + return nil + } + return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) +} + +// ForceFlush is a no-op, as the MetricsClient client holds no state. +func (e *OTELExporter) ForceFlush(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} + +// Shutdown is a no-op, as the MetricsClient is a HTTP client that requires no graceful shutdown. +func (e *OTELExporter) Shutdown(ctx context.Context) error { + // TODO: Emit metric when this operation occurs. + return ctx.Err() +} diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go new file mode 100644 index 000000000000..0e3d3fcc1e4f --- /dev/null +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -0,0 +1,140 @@ +package telemetry + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/aggregation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" + + "github.com/hashicorp/consul/agent/hcp/client" +) + +func TestTemporality(t *testing.T) { + t.Parallel() + exp := &OTELExporter{} + require.Equal(t, metricdata.CumulativeTemporality, exp.Temporality(metric.InstrumentKindCounter)) +} + +func TestAggregation(t *testing.T) { + t.Parallel() + for name, test := range map[string]struct { + kind metric.InstrumentKind + expAgg aggregation.Aggregation + }{ + "gauge": { + kind: metric.InstrumentKindObservableGauge, + expAgg: aggregation.LastValue{}, + }, + "counter": { + kind: metric.InstrumentKindCounter, + expAgg: aggregation.Sum{}, + }, + "histogram": { + kind: metric.InstrumentKindHistogram, + expAgg: aggregation.ExplicitBucketHistogram{Boundaries: []float64{0, 5, 10, 25, 50, 75, 100, 250, 500, 750, 1000, 2500, 5000, 7500, 10000}, NoMinMax: false}, + }, + } { + test := test + t.Run(name, func(t *testing.T) { + t.Parallel() + exp := &OTELExporter{} + require.Equal(t, test.expAgg, exp.Aggregation(test.kind)) + }) + } +} + +type mockMetricsClient struct { + exportErr error +} + +func (m *mockMetricsClient) ExportMetrics(ctx context.Context, protoMetrics *metricpb.ResourceMetrics, endpoint string) error { + return m.exportErr +} + +func TestExport(t *testing.T) { + t.Parallel() + for name, test := range map[string]struct { + wantErr string + metrics *metricdata.ResourceMetrics + client client.MetricsClient + }{ + "earlyReturnWithoutScopeMetrics": { + client: &mockMetricsClient{}, + metrics: mutateMetrics(nil), + }, + "earlyReturnWithoutMetrics": { + client: &mockMetricsClient{}, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + {Metrics: []metricdata.Metrics{}}, + }, + ), + }, + "errorWithExportFailure": { + client: &mockMetricsClient{ + exportErr: fmt.Errorf("failed to export metrics."), + }, + metrics: mutateMetrics([]metricdata.ScopeMetrics{ + { + Metrics: []metricdata.Metrics{ + { + Name: "consul.raft.commitTime", + Data: metricdata.Gauge[float64]{}, + }, + }, + }, + }, + ), + wantErr: "failed to export metrics", + }, + } { + test := test + t.Run(name, func(t *testing.T) { + t.Parallel() + exp := &OTELExporter{ + client: test.client, + } + + err := exp.Export(context.Background(), test.metrics) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NoError(t, err) + }) + } +} + +func TestForceFlush(t *testing.T) { + t.Parallel() + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + err := exp.ForceFlush(ctx) + require.ErrorIs(t, err, context.Canceled) +} + +func TestShutdown(t *testing.T) { + t.Parallel() + exp := &OTELExporter{} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + err := exp.Shutdown(ctx) + require.ErrorIs(t, err, context.Canceled) +} + +func mutateMetrics(m []metricdata.ScopeMetrics) *metricdata.ResourceMetrics { + return &metricdata.ResourceMetrics{ + Resource: resource.Empty(), + ScopeMetrics: m, + } +} diff --git a/agent/hcp/telemetry/otlp_transform.go b/agent/hcp/telemetry/otlp_transform.go new file mode 100644 index 000000000000..7ba1650ffd05 --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform.go @@ -0,0 +1,185 @@ +package telemetry + +import ( + "errors" + "fmt" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +var ( + aggregationErr = errors.New("unsupported aggregation") + temporalityErr = errors.New("unsupported temporality") +) + +// isEmpty verifies if the given OTLP protobuf metrics contains metric data. +// isEmpty returns true if no ScopeMetrics exist or all metrics within ScopeMetrics are empty. +func isEmpty(rm *mpb.ResourceMetrics) bool { + // No ScopeMetrics + if len(rm.ScopeMetrics) == 0 { + return true + } + + // If any inner metrics contain data, return false. + for _, v := range rm.ScopeMetrics { + if len(v.Metrics) != 0 { + return false + } + } + + // All inner metrics are empty. + return true +} + +// TransformOTLP returns an OTLP ResourceMetrics generated from OTEL metrics. If rm +// contains invalid ScopeMetrics, an error will be returned along with an OTLP +// ResourceMetrics that contains partial OTLP ScopeMetrics. +func transformOTLP(rm *metricdata.ResourceMetrics) *mpb.ResourceMetrics { + sms := scopeMetricsToPB(rm.ScopeMetrics) + return &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: attributesToPB(rm.Resource.Iter()), + }, + ScopeMetrics: sms, + } +} + +// scopeMetrics returns a slice of OTLP ScopeMetrics. +func scopeMetricsToPB(scopeMetrics []metricdata.ScopeMetrics) []*mpb.ScopeMetrics { + out := make([]*mpb.ScopeMetrics, 0, len(scopeMetrics)) + for _, sm := range scopeMetrics { + ms := metricsToPB(sm.Metrics) + out = append(out, &mpb.ScopeMetrics{ + Scope: &cpb.InstrumentationScope{ + Name: sm.Scope.Name, + Version: sm.Scope.Version, + }, + Metrics: ms, + }) + } + return out +} + +// metrics returns a slice of OTLP Metric generated from OTEL metrics sdk ones. +func metricsToPB(metrics []metricdata.Metrics) []*mpb.Metric { + out := make([]*mpb.Metric, 0, len(metrics)) + for _, m := range metrics { + o, err := metricTypeToPB(m) + if err != nil { + // TODO: Emit metric when a transformation failure occurs. + continue + } + out = append(out, o) + } + return out +} + +// metricType identifies the instrument type and converts it to OTLP format. +// only float64 values are accepted since the go metrics sink only receives float64 values. +func metricTypeToPB(m metricdata.Metrics) (*mpb.Metric, error) { + out := &mpb.Metric{ + Name: m.Name, + Description: m.Description, + Unit: m.Unit, + } + switch a := m.Data.(type) { + case metricdata.Gauge[float64]: + out.Data = &mpb.Metric_Gauge{ + Gauge: &mpb.Gauge{ + DataPoints: dataPointsToPB(a.DataPoints), + }, + } + case metricdata.Sum[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) + } + out.Data = &mpb.Metric_Sum{ + Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: a.IsMonotonic, + DataPoints: dataPointsToPB(a.DataPoints), + }, + } + case metricdata.Histogram[float64]: + if a.Temporality != metricdata.CumulativeTemporality { + return out, fmt.Errorf("error: %w: %T", temporalityErr, a) + } + out.Data = &mpb.Metric_Histogram{ + Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: histogramDataPointsToPB(a.DataPoints), + }, + } + default: + return out, fmt.Errorf("error: %w: %T", aggregationErr, a) + } + return out, nil +} + +// DataPoints returns a slice of OTLP NumberDataPoint generated from OTEL metrics sdk ones. +func dataPointsToPB(dataPoints []metricdata.DataPoint[float64]) []*mpb.NumberDataPoint { + out := make([]*mpb.NumberDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + ndp := &mpb.NumberDataPoint{ + Attributes: attributesToPB(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + } + + ndp.Value = &mpb.NumberDataPoint_AsDouble{ + AsDouble: dp.Value, + } + out = append(out, ndp) + } + return out +} + +// HistogramDataPoints returns a slice of OTLP HistogramDataPoint from OTEL metrics sdk ones. +func histogramDataPointsToPB(dataPoints []metricdata.HistogramDataPoint[float64]) []*mpb.HistogramDataPoint { + out := make([]*mpb.HistogramDataPoint, 0, len(dataPoints)) + for _, dp := range dataPoints { + sum := dp.Sum + hdp := &mpb.HistogramDataPoint{ + Attributes: attributesToPB(dp.Attributes.Iter()), + StartTimeUnixNano: uint64(dp.StartTime.UnixNano()), + TimeUnixNano: uint64(dp.Time.UnixNano()), + Count: dp.Count, + Sum: &sum, + BucketCounts: dp.BucketCounts, + ExplicitBounds: dp.Bounds, + } + if v, ok := dp.Min.Value(); ok { + hdp.Min = &v + } + if v, ok := dp.Max.Value(); ok { + hdp.Max = &v + } + out = append(out, hdp) + } + return out +} + +// attributes transforms items of an attribute iterator into OTLP key-values. +// Currently, labels are only key-value pairs. +func attributesToPB(iter attribute.Iterator) []*cpb.KeyValue { + l := iter.Len() + if iter.Len() == 0 { + return nil + } + + out := make([]*cpb.KeyValue, 0, l) + for iter.Next() { + kv := iter.Attribute() + av := &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{ + StringValue: kv.Value.AsString(), + }, + } + out = append(out, &cpb.KeyValue{Key: string(kv.Key), Value: av}) + } + return out +} diff --git a/agent/hcp/telemetry/otlp_transform_test.go b/agent/hcp/telemetry/otlp_transform_test.go new file mode 100644 index 000000000000..1c22e9a5cd75 --- /dev/null +++ b/agent/hcp/telemetry/otlp_transform_test.go @@ -0,0 +1,275 @@ +package telemetry + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/instrumentation" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" + semconv "go.opentelemetry.io/otel/semconv/v1.17.0" + cpb "go.opentelemetry.io/proto/otlp/common/v1" + mpb "go.opentelemetry.io/proto/otlp/metrics/v1" + rpb "go.opentelemetry.io/proto/otlp/resource/v1" +) + +var ( + + // Common attributes for test cases. + start = time.Date(2000, time.January, 01, 0, 0, 0, 0, time.FixedZone("GMT", 0)) + end = start.Add(30 * time.Second) + + alice = attribute.NewSet(attribute.String("user", "alice")) + bob = attribute.NewSet(attribute.String("user", "bob")) + + pbAlice = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "alice"}, + }} + pbBob = &cpb.KeyValue{Key: "user", Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "bob"}, + }} + + // DataPoint test case : Histogram Datapoints (Histogram) + minA, maxA, sumA = 2.0, 4.0, 90.0 + minB, maxB, sumB = 4.0, 150.0, 234.0 + inputHDP = []metricdata.HistogramDataPoint[float64]{{ + Attributes: alice, + StartTime: start, + Time: end, + Count: 30, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: metricdata.NewExtrema(minA), + Max: metricdata.NewExtrema(maxA), + Sum: sumA, + }, { + Attributes: bob, + StartTime: start, + Time: end, + Count: 3, + Bounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: metricdata.NewExtrema(minB), + Max: metricdata.NewExtrema(maxB), + Sum: sumB, + }} + + expectedHDP = []*mpb.HistogramDataPoint{{ + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 30, + Sum: &sumA, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 30, 0}, + Min: &minA, + Max: &maxA, + }, { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Count: 3, + Sum: &sumB, + ExplicitBounds: []float64{1, 5}, + BucketCounts: []uint64{0, 1, 2}, + Min: &minB, + Max: &maxB, + }} + // DataPoint test case : Number Datapoints (Gauge / Counter) + inputDP = []metricdata.DataPoint[float64]{ + {Attributes: alice, StartTime: start, Time: end, Value: 1.0}, + {Attributes: bob, StartTime: start, Time: end, Value: 2.0}, + } + + expectedDP = []*mpb.NumberDataPoint{ + { + Attributes: []*cpb.KeyValue{pbAlice}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 1.0}, + }, + { + Attributes: []*cpb.KeyValue{pbBob}, + StartTimeUnixNano: uint64(start.UnixNano()), + TimeUnixNano: uint64(end.UnixNano()), + Value: &mpb.NumberDataPoint_AsDouble{AsDouble: 2.0}, + }, + } + + invalidSumTemporality = metricdata.Metrics{ + Name: "invalid-sum", + Description: "Sum with invalid temporality", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.DeltaTemporality, + IsMonotonic: false, + DataPoints: inputDP, + }, + } + + invalidSumAgg = metricdata.Metrics{ + Name: "unknown", + Description: "Unknown aggregation", + Unit: "1", + Data: metricdata.Sum[int64]{}, + } + + invalidHistTemporality = metricdata.Metrics{ + Name: "invalid-histogram", + Description: "Invalid histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.DeltaTemporality, + DataPoints: inputHDP, + }, + } + + // Metrics Test Case + // - 3 invalid metrics and 3 Valid to test filtering + // - 1 invalid metric type + // - 2 invalid cummulative temporalities (only cummulative supported) + // - 3 types (Gauge, Counter, and Histogram) supported + inputMetrics = []metricdata.Metrics{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: metricdata.Gauge[float64]{DataPoints: inputDP}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: metricdata.Sum[float64]{ + Temporality: metricdata.CumulativeTemporality, + IsMonotonic: false, + DataPoints: inputDP, + }, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: metricdata.Histogram[float64]{ + Temporality: metricdata.CumulativeTemporality, + DataPoints: inputHDP, + }, + }, + invalidSumTemporality, + invalidHistTemporality, + invalidSumAgg, + } + + expectedMetrics = []*mpb.Metric{ + { + Name: "float64-gauge", + Description: "Gauge with float64 values", + Unit: "1", + Data: &mpb.Metric_Gauge{Gauge: &mpb.Gauge{DataPoints: expectedDP}}, + }, + { + Name: "float64-sum", + Description: "Sum with float64 values", + Unit: "1", + Data: &mpb.Metric_Sum{Sum: &mpb.Sum{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + IsMonotonic: false, + DataPoints: expectedDP, + }}, + }, + { + Name: "float64-histogram", + Description: "Histogram", + Unit: "1", + Data: &mpb.Metric_Histogram{Histogram: &mpb.Histogram{ + AggregationTemporality: mpb.AggregationTemporality_AGGREGATION_TEMPORALITY_CUMULATIVE, + DataPoints: expectedHDP, + }}, + }, + } + + // ScopeMetrics Test Cases + inputScopeMetrics = []metricdata.ScopeMetrics{{ + Scope: instrumentation.Scope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: inputMetrics, + }} + + expectedScopeMetrics = []*mpb.ScopeMetrics{{ + Scope: &cpb.InstrumentationScope{ + Name: "test/code/path", + Version: "v0.1.0", + }, + Metrics: expectedMetrics, + }} + + // ResourceMetrics Test Cases + inputResourceMetrics = &metricdata.ResourceMetrics{ + Resource: resource.NewSchemaless( + semconv.ServiceName("test server"), + semconv.ServiceVersion("v0.1.0"), + ), + ScopeMetrics: inputScopeMetrics, + } + + expectedResourceMetrics = &mpb.ResourceMetrics{ + Resource: &rpb.Resource{ + Attributes: []*cpb.KeyValue{ + { + Key: "service.name", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "test server"}, + }, + }, + { + Key: "service.version", + Value: &cpb.AnyValue{ + Value: &cpb.AnyValue_StringValue{StringValue: "v0.1.0"}, + }, + }, + }, + }, + ScopeMetrics: expectedScopeMetrics, + } +) + +// TestTransformOTLP runs tests from the "bottom-up" of the metricdata data types. +func TestTransformOTLP(t *testing.T) { + t.Parallel() + // Histogram DataPoint Test Case (Histograms) + assert.Equal(t, expectedHDP, histogramDataPointsToPB(inputHDP)) + + // Number DataPoint Test Case (Counters / Gauges) + require.Equal(t, expectedDP, dataPointsToPB(inputDP)) + + // MetricType Error Test Cases + _, err := metricTypeToPB(invalidHistTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumTemporality) + require.Error(t, err) + require.ErrorIs(t, err, temporalityErr) + + _, err = metricTypeToPB(invalidSumAgg) + require.Error(t, err) + require.ErrorIs(t, err, aggregationErr) + + // Metrics Test Case + m := metricsToPB(inputMetrics) + require.Equal(t, expectedMetrics, m) + require.Equal(t, len(expectedMetrics), 3) + + // Scope Metrics Test Case + sm := scopeMetricsToPB(inputScopeMetrics) + require.Equal(t, expectedScopeMetrics, sm) + + // // Resource Metrics Test Case + rm := transformOTLP(inputResourceMetrics) + require.Equal(t, expectedResourceMetrics, rm) +} diff --git a/go.mod b/go.mod index b3f1a2faa353..ffafa5157295 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( github.com/go-openapi/runtime v0.24.1 github.com/go-openapi/strfmt v0.21.3 github.com/golang/protobuf v1.5.2 - github.com/google/go-cmp v0.5.8 + github.com/google/go-cmp v0.5.9 github.com/google/gofuzz v1.2.0 github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 github.com/google/tcpproxy v0.0.0-20180808230851-dfa16c61dad2 @@ -96,13 +96,16 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 + go.opentelemetry.io/otel v1.15.1 + go.opentelemetry.io/otel/sdk v1.15.1 + go.opentelemetry.io/otel/sdk/metric v0.38.1 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.5.0 + golang.org/x/sys v0.7.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -151,6 +154,8 @@ require ( github.com/dimchansky/utfbom v1.1.0 // indirect github.com/envoyproxy/protoc-gen-validate v0.1.0 // indirect github.com/form3tech-oss/jwt-go v3.2.2+incompatible // indirect + github.com/go-logr/logr v1.2.4 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect github.com/go-openapi/analysis v0.21.2 // indirect github.com/go-openapi/errors v0.20.2 // indirect @@ -227,6 +232,8 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect + go.opentelemetry.io/otel/metric v0.38.1 // indirect + go.opentelemetry.io/otel/trace v1.15.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 2d08b9447dfe..4412cb4947e8 100644 --- a/go.sum +++ b/go.sum @@ -313,6 +313,11 @@ github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= github.com/go-logfmt/logfmt v0.5.1/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= +github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= @@ -447,8 +452,9 @@ github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.6/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8 h1:e6P7q2lk1O+qJJb4BtCQXlK8vWEO8V1ZeuEdJNOqZyg= github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-querystring v0.0.0-20170111101155-53e6ce116135/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= @@ -1074,6 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= +go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= +go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= +go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= +go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= +go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= +go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= +go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= +go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= +go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1338,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= -golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= +golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From d13849b96bc453b45889a7c78e2da4538b89fbff Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 15:37:57 -0400 Subject: [PATCH 053/100] Do not pass in waitgroup and use error channel instead. --- agent/hcp/telemetry/gauge_store_test.go | 24 +++++++++------- agent/hcp/telemetry/otel_sink_test.go | 38 ++++++++++++++----------- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go index 5ed5eadb8fd4..1171ee379c32 100644 --- a/agent/hcp/telemetry/gauge_store_test.go +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -3,7 +3,6 @@ package telemetry import ( "context" "fmt" - "math/rand" "sync" "testing" @@ -66,22 +65,25 @@ func TestGaugeStore_Race(t *testing.T) { t.Parallel() gaugeStore := NewGaugeStore() + wg := &sync.WaitGroup{} samples := 100 + errCh := make(chan error, samples) for i := 0; i < samples; i++ { - k := fmt.Sprintf("consul.test.%d", i) - v := rand.Float64() wg.Add(1) - go storeAndRetrieve(t, k, v, gaugeStore, wg) + key := fmt.Sprintf("consul.test.%d", i) + value := 12.34 + go func() { + defer wg.Done() + gaugeStore.Set(key, value, nil) + gv, _ := gaugeStore.LoadAndDelete(key) + if gv.Value != value { + errCh <- fmt.Errorf("expected value: '%f', but got: '%f' for key: '%s'", value, gv.Value, key) + } + }() } wg.Wait() -} -func storeAndRetrieve(t *testing.T, k string, v float64, gaugeStore *gaugeStore, wg *sync.WaitGroup) { - gaugeStore.Set(k, v, nil) - gv, ok := gaugeStore.LoadAndDelete(k) - require.True(t, ok) - require.Equal(t, v, gv.Value) - wg.Done() + require.Empty(t, errCh) } diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 1e60d4ad26e8..ebdfa432554a 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -3,7 +3,6 @@ package telemetry import ( "context" "fmt" - "math/rand" "sort" "strings" "sync" @@ -195,15 +194,21 @@ func TestOTELSink_Race(t *testing.T) { sink, err := NewOTELSink(opts) require.NoError(t, err) - expectedMetrics := generateSamples(100) - + samples := 100 + expectedMetrics := generateSamples(samples) wg := &sync.WaitGroup{} + errCh := make(chan error, samples) for k, v := range expectedMetrics { wg.Add(1) - go performSinkOperation(t, sink, k, v, wg) + go func(k string, v metricdata.Metrics) { + performSinkOperation(t, sink, k, v, errCh) + wg.Done() + }(k, v) } wg.Wait() + require.Empty(t, errCh) + var collected metricdata.ResourceMetrics err = reader.Collect(ctx, &collected) require.NoError(t, err) @@ -216,7 +221,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { generated := make(map[string]metricdata.Metrics, 3*n) for i := 0; i < n; i++ { - v := rand.Float64() + v := 12.3 k := fmt.Sprintf("consul.test.gauges.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -232,7 +237,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { } for i := 0; i < n; i++ { - v := rand.Float64() + v := 22.23 k := fmt.Sprintf("consul.test.sum.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -249,7 +254,7 @@ func generateSamples(n int) map[string]metricdata.Metrics { } for i := 0; i < n; i++ { - v := rand.Float64() + v := 13.24 k := fmt.Sprintf("consul.test.hist.%d", i) generated[k] = metricdata.Metrics{ Name: k, @@ -271,28 +276,29 @@ func generateSamples(n int) map[string]metricdata.Metrics { } // performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. -func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, wg *sync.WaitGroup) { +func performSinkOperation(t *testing.T, sink *OTELSink, k string, v metricdata.Metrics, errCh chan error) { key := strings.Split(k, ".") data := v.Data switch data.(type) { case metricdata.Gauge[float64]: gauge, ok := data.(metricdata.Gauge[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.SetGauge(key, float32(gauge.DataPoints[0].Value)) case metricdata.Sum[float64]: sum, ok := data.(metricdata.Sum[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.IncrCounter(key, float32(sum.DataPoints[0].Value)) case metricdata.Histogram[float64]: hist, ok := data.(metricdata.Histogram[float64]) - require.True(t, ok) - + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } sink.AddSample(key, float32(hist.DataPoints[0].Sum)) } - - wg.Done() } func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { From 713c5fa4b4a992afb2886c02eff2bba715e881c2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 15:51:38 -0400 Subject: [PATCH 054/100] Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel --- go.mod | 12 ++++++------ go.sum | 24 ++++++++++++------------ 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/go.mod b/go.mod index a079e511b5d2..005db2fa7524 100644 --- a/go.mod +++ b/go.mod @@ -96,17 +96,17 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.1 - go.opentelemetry.io/otel/metric v0.38.1 - go.opentelemetry.io/otel/sdk v1.15.1 - go.opentelemetry.io/otel/sdk/metric v0.38.1 + go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/metric v1.16.0-rc.1 + go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.7.0 + golang.org/x/sys v0.8.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -233,7 +233,7 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/trace v1.15.1 // indirect + go.opentelemetry.io/otel/trace v1.16.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 4412cb4947e8..05bf35417367 100644 --- a/go.sum +++ b/go.sum @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= -go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= -go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= -go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= -go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= -go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= -go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= -go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= -go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= -go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:aKv7ueCXRlBdHGBNfot8BYwcvp4jwJ/rK/T/KQ3uXoA= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:dGSTwGyzvw5Dzn8nE8HrfOXnWIDrL0GIzQdOpTnJ2CM= +go.opentelemetry.io/otel/metric v1.16.0-rc.1 h1:R9MPFw2jA+z91ejfOVU7QRYSdb37E5Ak6jJUwNMQbR8= +go.opentelemetry.io/otel/metric v1.16.0-rc.1/go.mod h1:0I+4bYjKHaoXGw7uXAABYA5wyptQdXeXOhi3SBgD6GM= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:YC5ikDtSM7s+sJprqR7edyP9EBKMHGaAnWfte7EsQCI= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:tY+q2LQ4iuvdwcN0zrt/2NdF3ntVodUPbiHPMRZnXyo= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 h1:5Ehgy+TyY7Jh3orDVIn7uVJ7UkFm3yP5lXXQN8ia+00= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:VKkJz/K+pb4rkqXlBH5DMJi1ebQLYhV82fTSK3WvOOQ= +go.opentelemetry.io/otel/trace v1.16.0-rc.1 h1:/dPBlZrzSSXglIEKgy/A3kyiACcmgNMFWKTIHHxxd/o= +go.opentelemetry.io/otel/trace v1.16.0-rc.1/go.mod h1:xqretMbHfSU24I2KKbSEG+aVHsNtBCr5L4BGaNqTx68= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1354,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From c6f2f9c467779cd86e1e71f1a1511c87daba4e76 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:28:24 -0400 Subject: [PATCH 055/100] Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1 --- agent/hcp/telemetry/otel_sink_test.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index ebdfa432554a..b5e6c3557330 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -79,8 +79,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -95,8 +95,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram[float64]{ - DataPoints: []metricdata.HistogramDataPoint[float64]{ + Data: metricdata.Histogram{ + DataPoints: []metricdata.HistogramDataPoint{ { Attributes: attrs, Count: 1, @@ -363,7 +363,7 @@ func isSameDataPoint(t *testing.T, expected []metricdata.DataPoint[float64], act } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Sort for predictable data in order of lowest sum. From f2c0d7ab3527ef7f5837e7019968c755731fbaf7 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 24 Apr 2023 10:57:15 -0400 Subject: [PATCH 056/100] Initialize OTELSink with sync.Map for all the instrument stores. --- agent/hcp/telemetry/otel_sink.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ec5e2d476b1a..9268219334f5 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -112,7 +112,7 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { // AddSampleWithLabels emits a Consul gauge metric that gets // registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key) + k := o.flattenKey(key, labels) // Set value in global Gauge store. o.gaugeStore.Set(k, float64(val), toAttributes(labels)) @@ -136,7 +136,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key) + k := o.flattenKey(key, labels) o.mutex.Lock() defer o.mutex.Unlock() @@ -158,7 +158,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key) + k := o.flattenKey(key, labels) o.mutex.Lock() defer o.mutex.Unlock() @@ -183,7 +183,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom func (o *OTELSink) EmitKey(key []string, val float32) {} // flattenKey key along with its labels. -func (o *OTELSink) flattenKey(parts []string) string { +func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { buf := &bytes.Buffer{} joined := strings.Join(parts, ".") From d884daf42e3a649a6c285508445c68767ec40f30 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 09:32:52 -0400 Subject: [PATCH 057/100] Added telemetry agent to client and init sink in deps --- agent/hcp/client/client.go | 35 +++++++++++++++++++ agent/hcp/deps.go | 44 +++++++++++++++++++++++ go.mod | 28 +++++++-------- go.sum | 71 +++++++++++++++++++------------------- 4 files changed, 127 insertions(+), 51 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index f4e20746a07c..24528295495f 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -14,6 +14,7 @@ import ( "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/version" + hcptelemetry "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service" hcpgnm "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/client/global_network_manager_service" gnmmod "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/models" "github.com/hashicorp/hcp-sdk-go/httpclient" @@ -25,10 +26,21 @@ import ( //go:generate mockery --name Client --with-expecter --inpackage type Client interface { FetchBootstrap(ctx context.Context) (*BootstrapConfig, error) + FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, error) PushServerStatus(ctx context.Context, status *ServerStatus) error DiscoverServers(ctx context.Context) ([]string, error) } +type MetricsConfig struct { + Filters []string + Endpoint string +} +type TelemetryConfig struct { + Endpoint string + Labels map[string]string + MetricsOverride *MetricsConfig +} + type BootstrapConfig struct { Name string BootstrapExpect int @@ -43,6 +55,7 @@ type hcpClient struct { hc *httptransport.Runtime cfg config.CloudConfig gnm hcpgnm.ClientService + tgw hcptelemetry.ClientService resource resource.Resource } @@ -63,6 +76,8 @@ func NewClient(cfg config.CloudConfig) (Client, error) { } client.gnm = hcpgnm.New(client.hc, nil) + client.tgw = hcptelemetry.New(client.hc, nil) + return client, nil } @@ -77,6 +92,26 @@ func httpClient(c config.CloudConfig) (*httptransport.Runtime, error) { SourceChannel: "consul " + version.GetHumanVersion(), }) } +func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, error) { + params := hcptelemetry.NewAgentTelemetryConfigParamsWithContext(ctx). + WithLocationOrganizationID(c.resource.Organization). + WithLocationProjectID(c.resource.Project) + + resp, err := c.tgw.AgentTelemetryConfig(params, nil) + if err != nil { + return nil, err + } + + payloadConfig := resp.Payload.TelemetryConfig + return &TelemetryConfig{ + Endpoint: payloadConfig.Endpoint, + Labels: payloadConfig.Labels, + MetricsOverride: &MetricsConfig{ + Filters: payloadConfig.Metrics.IncludeList, + Endpoint: payloadConfig.Metrics.Endpoint, + }, + }, nil +} func (c *hcpClient) FetchBootstrap(ctx context.Context) (*BootstrapConfig, error) { params := hcpgnm.NewAgentBootstrapConfigParamsWithContext(ctx). diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index b4d67154fb45..6959ff3a295f 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -4,9 +4,15 @@ package hcp import ( + "context" + "net/url" + "time" + + gometrics "github.com/armon/go-metrics" hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" + "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/go-hclog" ) @@ -14,6 +20,7 @@ import ( type Deps struct { Client hcpclient.Client Provider scada.Provider + Sink gometrics.MetricSink } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { @@ -23,5 +30,42 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { } d.Provider, err = scada.New(cfg, logger.Named("hcp.scada")) + + ctx := context.Background() + // Make telemetry config request here to verify registration with CCM. + reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + config, err := d.Client.FetchTelemetryConfig(reqCtx) + if err != nil { + return + } + + endpoint := config.Endpoint + if config.MetricsOverride.Endpoint != "" { + endpoint = config.MetricsOverride.Endpoint + } + url, err := url.Parse(endpoint) + if err != nil { + return + } + url.Scheme = "https" + + // If the above succeeds, Init metrics sink + metricsClient, err := hcpclient.NewMetricsClient(&hcpclient.TelemetryClientCfg{ + Logger: logger, + CloudCfg: &cfg, + }) + if err != nil { + return + } + + opts := &telemetry.OTELSinkOpts{ + Reader: telemetry.NewOTELReader(metricsClient, endpoint), + Logger: logger, + ExportInterval: 10 * time.Second, + Ctx: ctx, + } + d.Sink, err = telemetry.NewOTELSink(opts) + return } diff --git a/go.mod b/go.mod index 005db2fa7524..c7e583b2e5fd 100644 --- a/go.mod +++ b/go.mod @@ -28,7 +28,7 @@ require ( github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1 github.com/fatih/color v1.13.0 github.com/fsnotify/fsnotify v1.5.1 - github.com/go-openapi/runtime v0.24.1 + github.com/go-openapi/runtime v0.25.0 github.com/go-openapi/strfmt v0.21.3 github.com/golang/protobuf v1.5.2 github.com/google/go-cmp v0.5.9 @@ -62,7 +62,7 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/hashicorp/hcl v1.0.0 github.com/hashicorp/hcp-scada-provider v0.2.0 - github.com/hashicorp/hcp-sdk-go v0.23.1-0.20220921131124-49168300a7dc + github.com/hashicorp/hcp-sdk-go v0.43.0 github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 github.com/hashicorp/memberlist v0.5.0 github.com/hashicorp/raft v1.5.0 @@ -133,8 +133,6 @@ require ( github.com/Azure/go-autorest/tracing v0.6.0 // indirect github.com/DataDog/datadog-go v3.2.0+incompatible // indirect github.com/Microsoft/go-winio v0.4.3 // indirect - github.com/PuerkitoBio/purell v1.1.1 // indirect - github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect github.com/asaskevich/govalidator v0.0.0-20210307081110-f21760c49a8d // indirect github.com/benbjohnson/immutable v0.4.0 // indirect github.com/beorn7/perks v1.0.1 // indirect @@ -158,14 +156,14 @@ require ( github.com/go-logr/logr v1.2.4 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.2.6 // indirect - github.com/go-openapi/analysis v0.21.2 // indirect - github.com/go-openapi/errors v0.20.2 // indirect + github.com/go-openapi/analysis v0.21.4 // indirect + github.com/go-openapi/errors v0.20.3 // indirect github.com/go-openapi/jsonpointer v0.19.5 // indirect - github.com/go-openapi/jsonreference v0.19.6 // indirect - github.com/go-openapi/loads v0.21.1 // indirect - github.com/go-openapi/spec v0.20.4 // indirect - github.com/go-openapi/swag v0.21.1 // indirect - github.com/go-openapi/validate v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.0 // indirect + github.com/go-openapi/loads v0.21.2 // indirect + github.com/go-openapi/spec v0.20.8 // indirect + github.com/go-openapi/swag v0.22.3 // indirect + github.com/go-openapi/validate v0.22.1 // indirect github.com/go-ozzo/ozzo-validation v3.6.0+incompatible // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect @@ -231,15 +229,15 @@ require ( github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926 // indirect github.com/vmware/govmomi v0.18.0 // indirect github.com/yusufpapurcu/wmi v1.2.2 // indirect - go.mongodb.org/mongo-driver v1.10.0 // indirect + go.mongodb.org/mongo-driver v1.11.0 // indirect go.opencensus.io v0.23.0 // indirect go.opentelemetry.io/otel/trace v1.16.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect - golang.org/x/term v0.5.0 // indirect - golang.org/x/text v0.7.0 // indirect - golang.org/x/tools v0.2.0 // indirect + golang.org/x/term v0.6.0 // indirect + golang.org/x/text v0.8.0 // indirect + golang.org/x/tools v0.6.0 // indirect google.golang.org/api v0.57.0 // indirect google.golang.org/appengine v1.6.7 // indirect gopkg.in/inf.v0 v0.9.1 // indirect diff --git a/go.sum b/go.sum index 05bf35417367..3a4d9ed55498 100644 --- a/go.sum +++ b/go.sum @@ -114,10 +114,8 @@ github.com/NYTimes/gziphandler v1.0.1/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64 github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= github.com/OpenDNS/vegadns2client v0.0.0-20180418235048-a3fa4a771d87/go.mod h1:iGLljf5n9GjT6kc0HBvyI1nOKnGQbNB66VzSNbK5iks= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= -github.com/PuerkitoBio/purell v1.1.1 h1:WEQqlqaGbrPkxLJWfBwQmfEAE1Z7ONdDLqrN38tNFfI= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= -github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 h1:d+Bc7a5rLufV/sSk/8dngufqelfh6jnri85riMAaF/M= github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE= github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo= github.com/Shopify/sarama v1.21.0/go.mod h1:yuqtN/pe8cXRWG5zPaO7hCfNJp5MwmkoJEoLjkm5tCQ= @@ -249,7 +247,6 @@ github.com/dnsimple/dnsimple-go v0.30.0/go.mod h1:O5TJ0/U6r7AfT8niYNlmohpLbCSG+c github.com/dnstap/golang-dnstap v0.0.0-20170829151710-2cf77a2b5e11/go.mod h1:s1PfVYYVmTMgCSPtho4LKBDecEHJWtiVDPNv78Z985U= github.com/docker/go-connections v0.3.0 h1:3lOnM9cSzgGwx8VfK/NGOW5fLQ0GjIlCkaktF+n1M6o= github.com/docker/go-connections v0.3.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec= -github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/spdystream v0.0.0-20160310174837-449fdfce4d96/go.mod h1:Qh8CwZgvJUkLughtfhJv5dyTYa91l1fOUCrgjqmcifM= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= @@ -320,43 +317,48 @@ github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-ole/go-ole v1.2.6 h1:/Fpf6oFPoeFik9ty7siob0G6Ke8QvQEuVcuChpwXzpY= github.com/go-ole/go-ole v1.2.6/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= -github.com/go-openapi/analysis v0.21.2 h1:hXFrOYFHUAMQdu6zwAiKKJHJQ8kqZs1ux/ru1P1wLJU= github.com/go-openapi/analysis v0.21.2/go.mod h1:HZwRk4RRisyG8vx2Oe6aqeSQcoxRp47Xkp3+K6q+LdY= +github.com/go-openapi/analysis v0.21.4 h1:ZDFLvSNxpDaomuCueM0BlSXxpANBlFYiBvr+GXrvIHc= +github.com/go-openapi/analysis v0.21.4/go.mod h1:4zQ35W4neeZTqh3ol0rv/O8JBbka9QyAgQRPp9y3pfo= github.com/go-openapi/errors v0.19.8/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= github.com/go-openapi/errors v0.19.9/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= -github.com/go-openapi/errors v0.20.2 h1:dxy7PGTqEh94zj2E3h1cUmQQWiM1+aeCROfAr02EmK8= github.com/go-openapi/errors v0.20.2/go.mod h1:cM//ZKUKyO06HSwqAelJ5NsEMMcpa6VpXe8DOa1Mi1M= +github.com/go-openapi/errors v0.20.3 h1:rz6kiC84sqNQoqrtulzaL/VERgkoCyB6WdEkc2ujzUc= +github.com/go-openapi/errors v0.20.3/go.mod h1:Z3FlZ4I8jEGxjUK+bugx3on2mIAk4txuAOhlsB1FSgk= github.com/go-openapi/jsonpointer v0.0.0-20160704185906-46af16f9f7b1/go.mod h1:+35s3my2LFTysnkMfxsJBAMHj/DoqoB9knIWoYG/Vk0= github.com/go-openapi/jsonpointer v0.19.3/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonpointer v0.19.5 h1:gZr+CIYByUqjcgeLXnQu2gHYQC9o73G2XUeOFYEICuY= github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= github.com/go-openapi/jsonreference v0.0.0-20160704190145-13c6e3589ad9/go.mod h1:W3Z9FmVs9qj+KR4zFKmDPGiLdk1D9Rlm7cyMvf57TTg= -github.com/go-openapi/jsonreference v0.19.6 h1:UBIxjkht+AWIgYzCDSv2GN+E/togfwXUJFRTWhl2Jjs= github.com/go-openapi/jsonreference v0.19.6/go.mod h1:diGHMEHg2IqXZGKxqyvWdfWU/aim5Dprw5bqpKkTvns= -github.com/go-openapi/loads v0.21.1 h1:Wb3nVZpdEzDTcly8S4HMkey6fjARRzb7iEaySimlDW0= +github.com/go-openapi/jsonreference v0.20.0 h1:MYlu0sBgChmCfJxxUKZ8g1cPWFOB37YSZqewK7OKeyA= +github.com/go-openapi/jsonreference v0.20.0/go.mod h1:Ag74Ico3lPc+zR+qjn4XBUmXymS4zJbYVCZmcgkasdo= github.com/go-openapi/loads v0.21.1/go.mod h1:/DtAMXXneXFjbQMGEtbamCZb+4x7eGwkvZCvBmwUG+g= -github.com/go-openapi/runtime v0.24.1 h1:Sml5cgQKGYQHF+M7yYSHaH1eOjvTykrddTE/KtQVjqo= -github.com/go-openapi/runtime v0.24.1/go.mod h1:AKurw9fNre+h3ELZfk6ILsfvPN+bvvlaU/M9q/r9hpk= +github.com/go-openapi/loads v0.21.2 h1:r2a/xFIYeZ4Qd2TnGpWDIQNcP80dIaZgf704za8enro= +github.com/go-openapi/loads v0.21.2/go.mod h1:Jq58Os6SSGz0rzh62ptiu8Z31I+OTHqmULx5e/gJbNw= +github.com/go-openapi/runtime v0.25.0 h1:7yQTCdRbWhX8vnIjdzU8S00tBYf7Sg71EBeorlPHvhc= +github.com/go-openapi/runtime v0.25.0/go.mod h1:Ux6fikcHXyyob6LNWxtE96hWwjBPYF0DXgVFuMTneOs= github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nAiCcj+friV/PDoE1/3eeccG9LYBs0tYvLOWc= -github.com/go-openapi/spec v0.20.4 h1:O8hJrt0UMnhHcluhIdUgCLRWyM2x7QkBXRvOs7m+O1M= github.com/go-openapi/spec v0.20.4/go.mod h1:faYFR1CvsJZ0mNsmsphTMSoRrNV3TEDoAM7FOEWeq8I= +github.com/go-openapi/spec v0.20.6/go.mod h1:2OpW+JddWPrpXSCIX8eOx7lZ5iyuWj3RYR6VaaBKcWA= +github.com/go-openapi/spec v0.20.8 h1:ubHmXNY3FCIOinT8RNrrPfGc9t7I1qhPtdOGoG2AxRU= +github.com/go-openapi/spec v0.20.8/go.mod h1:2OpW+JddWPrpXSCIX8eOx7lZ5iyuWj3RYR6VaaBKcWA= github.com/go-openapi/strfmt v0.21.0/go.mod h1:ZRQ409bWMj+SOgXofQAGTIo2Ebu72Gs+WaRADcS5iNg= github.com/go-openapi/strfmt v0.21.1/go.mod h1:I/XVKeLc5+MM5oPNN7P6urMOpuLXEcNrCX/rPGuWb0k= -github.com/go-openapi/strfmt v0.21.2/go.mod h1:I/XVKeLc5+MM5oPNN7P6urMOpuLXEcNrCX/rPGuWb0k= github.com/go-openapi/strfmt v0.21.3 h1:xwhj5X6CjXEZZHMWy1zKJxvW9AfHC9pkyUjLvHtKG7o= github.com/go-openapi/strfmt v0.21.3/go.mod h1:k+RzNO0Da+k3FrrynSNN8F7n/peCmQQqbbXjtDfvmGg= github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.15/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/swag v0.21.1 h1:wm0rhTb5z7qpJRHBdPOMuY4QjVUMbF6/kwoYeRAOrKU= github.com/go-openapi/swag v0.21.1/go.mod h1:QYRuS/SOXUCsnplDa677K7+DxSOj6IPNl/eQntq43wQ= -github.com/go-openapi/validate v0.21.0 h1:+Wqk39yKOhfpLqNLEC0/eViCkzM5FVXVqrvt526+wcI= -github.com/go-openapi/validate v0.21.0/go.mod h1:rjnrwK57VJ7A8xqfpAOEKRH8yQSGUriMu5/zuPSQ1hg= +github.com/go-openapi/swag v0.22.3 h1:yMBqmnQ0gyZvEb/+KzuWZOXgllrXT4SADYbvDaXHv/g= +github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/validate v0.22.1 h1:G+c2ub6q47kfX1sOBLwIQwzBVt8qmOAARyo/9Fqs9NU= +github.com/go-openapi/validate v0.22.1/go.mod h1:rjnrwK57VJ7A8xqfpAOEKRH8yQSGUriMu5/zuPSQ1hg= github.com/go-ozzo/ozzo-validation v3.6.0+incompatible h1:msy24VGS42fKO9K1vLz82/GeYW1cILu7Nuuj1N3BBkE= github.com/go-ozzo/ozzo-validation v3.6.0+incompatible/go.mod h1:gsEKFIVnabGBt6mXmxK0MoFy+cZoTJY6mu5Ll3LVLBU= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/go-stack/stack v1.8.1/go.mod h1:dcoOX6HbPZSZptuspn9bctJ+N/CnF5gGygcUP3XYfe4= github.com/go-test/deep v1.0.2/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= github.com/go-test/deep v1.1.0 h1:WOcxcdHcvdgThNXjw0t76K42FXTU7HpNQWHpA2HHNlg= github.com/go-test/deep v1.1.0/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= @@ -608,8 +610,8 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/hcp-scada-provider v0.2.0 h1:iD3Y+c7LTdjeaWKHq/ym6ahEdSL1R+9GHvKWBb4t+aM= github.com/hashicorp/hcp-scada-provider v0.2.0/go.mod h1:Q0WpS2RyhBKOPD4X/8oW7AJe7jA2HXB09EwDzwRTao0= -github.com/hashicorp/hcp-sdk-go v0.23.1-0.20220921131124-49168300a7dc h1:on26TCKYnX7JzZCtwkR/LWHSqMu40PoZ6h/0e6Pq8ug= -github.com/hashicorp/hcp-sdk-go v0.23.1-0.20220921131124-49168300a7dc/go.mod h1:/9UoDY2FYYA8lFaKBb2HmM/jKYZGANmf65q9QRc/cVw= +github.com/hashicorp/hcp-sdk-go v0.43.0 h1:rigzyx7ST2/UZ3SzcsKZhUDaSiNo2OUv6DSknE0NjgE= +github.com/hashicorp/hcp-sdk-go v0.43.0/go.mod h1:hZqky4HEzsKwvLOt4QJlZUrjeQmb4UCZUhDP2HyQFfc= github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 h1:n9J0rwVWXDpNd5iZnwY7w4WZyq53/rROeI7OVvLW8Ok= github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038/go.mod h1:n2TSygSNwsLJ76m8qFXTSc7beTb+auJxYdqrnoqwZWE= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= @@ -798,7 +800,6 @@ github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:F github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/mitchellh/mapstructure v1.3.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/mapstructure v1.4.1/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/mitchellh/pointerstructure v1.2.1 h1:ZhBBeX8tSlRpu/FFhXH4RC4OJzFlqsQhoHZAz4x7TIw= @@ -824,7 +825,6 @@ github.com/naoina/toml v0.1.1/go.mod h1:NBIhNtsFMo3G2szEBne+bO4gS192HuIYRqfvOWb4 github.com/nbio/st v0.0.0-20140626010706-e9e8d9816f32/go.mod h1:9wM+0iRr9ahx58uYLpLIr5fm8diHn0JbqRycJi6w0Ms= github.com/nicolai86/scaleway-sdk v1.10.2-0.20180628010248-798f60e20bb2 h1:BQ1HW7hr4IVovMwWg0E0PYcyW8CzqDcVmaew9cujU4s= github.com/nicolai86/scaleway-sdk v1.10.2-0.20180628010248-798f60e20bb2/go.mod h1:TLb2Sg7HQcgGdloNxkrmtgDNR9uVYF3lfdFIN4Ro6Sk= -github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= github.com/nrdcg/auroradns v1.0.0/go.mod h1:6JPXKzIRzZzMqtTDgueIhTi6rFf1QvYE/HzqidhOhjw= github.com/nrdcg/goinwx v0.6.1/go.mod h1:XPiut7enlbEdntAqalBIqcYcTEVhpv/dKWgDCX2SwKQ= @@ -850,7 +850,6 @@ github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1Cpa github.com/onsi/gomega v1.7.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/opentracing-contrib/go-observer v0.0.0-20170622124052-a52f23424492/go.mod h1:Ngi6UdF0k5OKD5t5wlmGhe/EDKPoUM3BXZSSfIuJbis= github.com/opentracing/opentracing-go v1.1.0/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= -github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b h1:FfH+VrHHk6Lxt9HdVS0PXzSXFyS2NbZKXv33FYPol0A= github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b/go.mod h1:AC62GU6hc0BrNm+9RK9VSiwa/EUe1bkIeFORAMcHvJU= github.com/openzipkin-contrib/zipkin-go-opentracing v0.3.5/go.mod h1:uVHyebswE1cCXr2A73cRM2frx5ld1RJUCJkFNZ90ZiI= @@ -1067,9 +1066,9 @@ go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= go.etcd.io/etcd v0.5.0-alpha.5.0.20190917205325-a14579fbfb1a/go.mod h1:dnLIgRNXwCJa5e+c6mIZCrds/GIG4ncV9HhK5PX7jPg= go.mongodb.org/mongo-driver v1.7.3/go.mod h1:NqaYOwnXWr5Pm7AOpO5QFxKJ503nbMse/R79oO62zWg= go.mongodb.org/mongo-driver v1.7.5/go.mod h1:VXEWRZ6URJIkUq2SCAyapmhH0ZLRBP+FT4xhp5Zvxng= -go.mongodb.org/mongo-driver v1.8.3/go.mod h1:0sQWfOeY63QTntERDJJ/0SuKK0T1uVSgKCuAROlKEPY= -go.mongodb.org/mongo-driver v1.10.0 h1:UtV6N5k14upNp4LTduX0QCufG124fSu25Wz9tu94GLg= go.mongodb.org/mongo-driver v1.10.0/go.mod h1:wsihk0Kdgv8Kqu1Anit4sfK+22vSFbUrAVEYRhCXrA8= +go.mongodb.org/mongo-driver v1.11.0 h1:FZKhBSTydeuffHj9CBjXlR8vQLee1cQyTWYPA6/tqiE= +go.mongodb.org/mongo-driver v1.11.0/go.mod h1:s7p5vEtfbeR1gYi6pnj3c3/urpbLv2T5Sfd6Rp2HBB8= go.opencensus.io v0.20.1/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= @@ -1124,7 +1123,6 @@ golang.org/x/crypto v0.0.0-20200220183623-bac4c82f6975/go.mod h1:LzIPMQfyMNhhGPh golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.0.0-20201216223049-8b5274cf687f/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d h1:sK3txAijHtOK88l68nt020reeT1ZdKLIYetKl95FzVY= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= @@ -1166,6 +1164,7 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180530234432-1e491301e022/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180611182652-db08ff08e862/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -1225,8 +1224,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g= -golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.8.0 h1:Zrh2ngAOFYneWTAIAPethzeaQLuHwhuBkuV6ZiRnUaQ= +golang.org/x/net v0.8.0/go.mod h1:QVkue5JL9kW//ek3r6jTKnTFis1tRmNAW2P1shuFdJc= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -1245,8 +1244,8 @@ golang.org/x/oauth2 v0.0.0-20210805134026-6f1e6394065a/go.mod h1:KelEdhl1UZF7XfJ golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b/go.mod h1:DAh4E804XQdzx2j+YRIaUnCqCV2RuMz24cGBJ5QYIrc= -golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 h1:lxqLZaMad/dJHMFZH0NiNpiEZI/nhgWhe4wgzpE+MuA= -golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1/go.mod h1:h4gKUeWbJ4rQPri7E0u6Gs4e9Ri2zaLxzw5DI5XGrYg= +golang.org/x/oauth2 v0.6.0 h1:Lh8GPgSKBfWSwFvtuWOfeI3aAAnbXTSutYxJiOJFgIw= +golang.org/x/oauth2 v0.6.0/go.mod h1:ycmewcwgD4Rpr3eZJLSB4Kyyljb3qDh40vJ8STE5HKw= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -1259,8 +1258,8 @@ golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20170830134202-bb24a47a89ea/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180622082034-63fc586f45fe/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1359,8 +1358,8 @@ golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.5.0 h1:n2a8QNdAb0sZNpU9R1ALUXBbY+w51fCQDN+7EdxNBsY= -golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw= +golang.org/x/term v0.6.0/go.mod h1:m6U89DPEgQRMq3DNkDClhWw02AUbt2daBVO4cn4Hv9U= golang.org/x/text v0.0.0-20160726164857-2910a502d2bf/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1372,8 +1371,8 @@ golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= -golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68= +golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= golang.org/x/time v0.0.0-20161028155119-f51c12702a4d/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -1450,8 +1449,8 @@ golang.org/x/tools v0.1.2/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.3/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.4/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= golang.org/x/tools v0.1.5/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.2.0 h1:G6AHpWxTMGY1KyEYoAQ5WTtIekUUvDNjan3ugu60JvE= -golang.org/x/tools v0.2.0/go.mod h1:y4OqIKeOV/fWJetJ8bXPU1sEVniLMIyDAZWeHdV+NTA= +golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -1611,8 +1610,8 @@ gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLks gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f h1:BLraFXnmrev5lT+xlilqcH8XK9/i0At2xKjWk4p6zsU= gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/cheggaaa/pb.v1 v1.0.25/go.mod h1:V/YB90LKu/1FcN3WVnfiiE5oMCibMjukxqG/qStrOgw= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= From bf496f3219c7629b5905ec2e13f99464a7d84d7b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 10:05:31 -0400 Subject: [PATCH 058/100] Fixed client --- agent/hcp/client/client.go | 2 +- agent/hcp/client/mock_Client.go | 81 +++++++++++++++++++++++++++++++-- agent/hcp/deps.go | 39 ++++++++++------ 3 files changed, 103 insertions(+), 19 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 24528295495f..9475f82cf0a1 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -150,7 +150,7 @@ func (c *hcpClient) PushServerStatus(ctx context.Context, s *ServerStatus) error WithLocationOrganizationID(c.resource.Organization). WithLocationProjectID(c.resource.Project) - params.SetBody(&gnmmod.HashicorpCloudGlobalNetworkManager20220215AgentPushServerStateRequest{ + params.SetBody(hcpgnm.AgentPushServerStateBody{ ServerState: serverStatusToHCP(s), }) diff --git a/agent/hcp/client/mock_Client.go b/agent/hcp/client/mock_Client.go index 27eb35a747cb..06853ceb86f7 100644 --- a/agent/hcp/client/mock_Client.go +++ b/agent/hcp/client/mock_Client.go @@ -1,4 +1,4 @@ -// Code generated by mockery v2.15.0. DO NOT EDIT. +// Code generated by mockery v2.22.1. DO NOT EDIT. package client @@ -26,6 +26,10 @@ func (_m *MockClient) DiscoverServers(ctx context.Context) ([]string, error) { ret := _m.Called(ctx) var r0 []string + var r1 error + if rf, ok := ret.Get(0).(func(context.Context) ([]string, error)); ok { + return rf(ctx) + } if rf, ok := ret.Get(0).(func(context.Context) []string); ok { r0 = rf(ctx) } else { @@ -34,7 +38,6 @@ func (_m *MockClient) DiscoverServers(ctx context.Context) ([]string, error) { } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context) error); ok { r1 = rf(ctx) } else { @@ -67,11 +70,20 @@ func (_c *MockClient_DiscoverServers_Call) Return(_a0 []string, _a1 error) *Mock return _c } +func (_c *MockClient_DiscoverServers_Call) RunAndReturn(run func(context.Context) ([]string, error)) *MockClient_DiscoverServers_Call { + _c.Call.Return(run) + return _c +} + // FetchBootstrap provides a mock function with given fields: ctx func (_m *MockClient) FetchBootstrap(ctx context.Context) (*BootstrapConfig, error) { ret := _m.Called(ctx) var r0 *BootstrapConfig + var r1 error + if rf, ok := ret.Get(0).(func(context.Context) (*BootstrapConfig, error)); ok { + return rf(ctx) + } if rf, ok := ret.Get(0).(func(context.Context) *BootstrapConfig); ok { r0 = rf(ctx) } else { @@ -80,7 +92,6 @@ func (_m *MockClient) FetchBootstrap(ctx context.Context) (*BootstrapConfig, err } } - var r1 error if rf, ok := ret.Get(1).(func(context.Context) error); ok { r1 = rf(ctx) } else { @@ -113,6 +124,65 @@ func (_c *MockClient_FetchBootstrap_Call) Return(_a0 *BootstrapConfig, _a1 error return _c } +func (_c *MockClient_FetchBootstrap_Call) RunAndReturn(run func(context.Context) (*BootstrapConfig, error)) *MockClient_FetchBootstrap_Call { + _c.Call.Return(run) + return _c +} + +// FetchTelemetryConfig provides a mock function with given fields: ctx +func (_m *MockClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, error) { + ret := _m.Called(ctx) + + var r0 *TelemetryConfig + var r1 error + if rf, ok := ret.Get(0).(func(context.Context) (*TelemetryConfig, error)); ok { + return rf(ctx) + } + if rf, ok := ret.Get(0).(func(context.Context) *TelemetryConfig); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*TelemetryConfig) + } + } + + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockClient_FetchTelemetryConfig_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'FetchTelemetryConfig' +type MockClient_FetchTelemetryConfig_Call struct { + *mock.Call +} + +// FetchTelemetryConfig is a helper method to define mock.On call +// - ctx context.Context +func (_e *MockClient_Expecter) FetchTelemetryConfig(ctx interface{}) *MockClient_FetchTelemetryConfig_Call { + return &MockClient_FetchTelemetryConfig_Call{Call: _e.mock.On("FetchTelemetryConfig", ctx)} +} + +func (_c *MockClient_FetchTelemetryConfig_Call) Run(run func(ctx context.Context)) *MockClient_FetchTelemetryConfig_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MockClient_FetchTelemetryConfig_Call) Return(_a0 *TelemetryConfig, _a1 error) *MockClient_FetchTelemetryConfig_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockClient_FetchTelemetryConfig_Call) RunAndReturn(run func(context.Context) (*TelemetryConfig, error)) *MockClient_FetchTelemetryConfig_Call { + _c.Call.Return(run) + return _c +} + // PushServerStatus provides a mock function with given fields: ctx, status func (_m *MockClient) PushServerStatus(ctx context.Context, status *ServerStatus) error { ret := _m.Called(ctx, status) @@ -151,6 +221,11 @@ func (_c *MockClient_PushServerStatus_Call) Return(_a0 error) *MockClient_PushSe return _c } +func (_c *MockClient_PushServerStatus_Call) RunAndReturn(run func(context.Context, *ServerStatus) error) *MockClient_PushServerStatus_Call { + _c.Call.Return(run) + return _c +} + type mockConstructorTestingTNewMockClient interface { mock.TestingT Cleanup(func()) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 6959ff3a295f..3b739bfc428e 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -30,42 +30,51 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { } d.Provider, err = scada.New(cfg, logger.Named("hcp.scada")) + if err != nil { + return + } + + d.Sink, err = initTelemetry(d.Client, logger, cfg) + + return +} +func initTelemetry(hcpClient hcpclient.Client, logger hclog.Logger, cfg config.CloudConfig) (gometrics.MetricSink, error) { + // Make telemetry config request here to HCP. ctx := context.Background() - // Make telemetry config request here to verify registration with CCM. reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - config, err := d.Client.FetchTelemetryConfig(reqCtx) + + telemetryCfg, err := hcpClient.FetchTelemetryConfig(reqCtx) if err != nil { - return + return nil, err } - endpoint := config.Endpoint - if config.MetricsOverride.Endpoint != "" { - endpoint = config.MetricsOverride.Endpoint + endpoint := telemetryCfg.Endpoint + if override := telemetryCfg.MetricsOverride.Endpoint; override != "" { + endpoint = override } + url, err := url.Parse(endpoint) if err != nil { - return + return nil, err } url.Scheme = "https" - // If the above succeeds, Init metrics sink + // If the above succeeds, the server is registered with CCM, init metrics sink. metricsClient, err := hcpclient.NewMetricsClient(&hcpclient.TelemetryClientCfg{ Logger: logger, CloudCfg: &cfg, }) if err != nil { - return + return nil, err } opts := &telemetry.OTELSinkOpts{ - Reader: telemetry.NewOTELReader(metricsClient, endpoint), - Logger: logger, - ExportInterval: 10 * time.Second, - Ctx: ctx, + Reader: telemetry.NewOTELReader(metricsClient, endpoint, 10*time.Second), + Logger: logger, + Ctx: ctx, } - d.Sink, err = telemetry.NewOTELSink(opts) - return + return telemetry.NewOTELSink(opts) } From 4a047b08e887bae33ba8ae674c361e8e43c07a1d Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 10:59:59 -0400 Subject: [PATCH 059/100] Initalize sink in deps --- agent/hcp/deps.go | 7 ++++--- agent/hcp/deps_test.go | 18 ++++++++++++++++++ agent/hcp/testing.go | 3 ++- 3 files changed, 24 insertions(+), 4 deletions(-) create mode 100644 agent/hcp/deps_test.go diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 3b739bfc428e..78c432bd82f0 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -5,6 +5,7 @@ package hcp import ( "context" + "fmt" "net/url" "time" @@ -55,11 +56,11 @@ func initTelemetry(hcpClient hcpclient.Client, logger hclog.Logger, cfg config.C endpoint = override } - url, err := url.Parse(endpoint) + // The endpoint from the HCP gateway is a domain without scheme, so it must be added. + url, err := url.Parse(fmt.Sprintf("https://%s", endpoint)) if err != nil { return nil, err } - url.Scheme = "https" // If the above succeeds, the server is registered with CCM, init metrics sink. metricsClient, err := hcpclient.NewMetricsClient(&hcpclient.TelemetryClientCfg{ @@ -71,7 +72,7 @@ func initTelemetry(hcpClient hcpclient.Client, logger hclog.Logger, cfg config.C } opts := &telemetry.OTELSinkOpts{ - Reader: telemetry.NewOTELReader(metricsClient, endpoint, 10*time.Second), + Reader: telemetry.NewOTELReader(metricsClient, url.String(), 10*time.Second), Logger: logger, Ctx: ctx, } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go new file mode 100644 index 000000000000..5a927f1b2931 --- /dev/null +++ b/agent/hcp/deps_test.go @@ -0,0 +1,18 @@ +package hcp + +import ( + "testing" + + "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/go-hclog" +) + +func TestInitTelemetry(t *testing.T) { + cfg := config.CloudConfig{} + logger := hclog.NewNullLogger() + mClient := client.NewMockClient() + + initTelemetry(mClient, logger, cfg) + +} diff --git a/agent/hcp/testing.go b/agent/hcp/testing.go index dbcd7f479b0a..8a35918a6ccc 100644 --- a/agent/hcp/testing.go +++ b/agent/hcp/testing.go @@ -13,6 +13,7 @@ import ( "sync" "time" + hcpgnm "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/client/global_network_manager_service" gnmmod "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/models" "github.com/hashicorp/hcp-sdk-go/resource" ) @@ -141,7 +142,7 @@ func mockTokenResponse(w http.ResponseWriter) { } func (s *MockHCPServer) handleStatus(r *http.Request, cluster resource.Resource) (interface{}, error) { - var req gnmmod.HashicorpCloudGlobalNetworkManager20220215AgentPushServerStateRequest + var req hcpgnm.AgentPushServerStateBody if err := json.NewDecoder(r.Body).Decode(&req); err != nil { return nil, err } From 49dfd64f11cc260c80d6a53cc0e762039f431b7f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 12:11:50 -0400 Subject: [PATCH 060/100] init sink in telemetry library --- agent/hcp/client/mock_CloudConfig.go | 16 ++- agent/hcp/deps.go | 52 ++++++---- agent/hcp/deps_test.go | 144 +++++++++++++++++++++++++-- agent/setup.go | 2 + lib/telemetry.go | 11 ++ lib/telemetry_test.go | 32 ++++-- 6 files changed, 218 insertions(+), 39 deletions(-) diff --git a/agent/hcp/client/mock_CloudConfig.go b/agent/hcp/client/mock_CloudConfig.go index ed6e3358f7db..fc5125135636 100644 --- a/agent/hcp/client/mock_CloudConfig.go +++ b/agent/hcp/client/mock_CloudConfig.go @@ -6,6 +6,7 @@ import ( "net/url" hcpcfg "github.com/hashicorp/hcp-sdk-go/config" + "github.com/hashicorp/hcp-sdk-go/profile" "golang.org/x/oauth2" ) @@ -17,15 +18,12 @@ func (m *mockHCPCfg) Token() (*oauth2.Token, error) { }, nil } -func (m *mockHCPCfg) APITLSConfig() *tls.Config { return nil } - -func (m *mockHCPCfg) SCADAAddress() string { return "" } - -func (m *mockHCPCfg) SCADATLSConfig() *tls.Config { return &tls.Config{} } - -func (m *mockHCPCfg) APIAddress() string { return "" } - -func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} } +func (m *mockHCPCfg) APITLSConfig() *tls.Config { return nil } +func (m *mockHCPCfg) SCADAAddress() string { return "" } +func (m *mockHCPCfg) SCADATLSConfig() *tls.Config { return &tls.Config{} } +func (m *mockHCPCfg) APIAddress() string { return "" } +func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} } +func (m *mockHCPCfg) Profile() *profile.UserProfile { return &profile.UserProfile{} } type MockCloudCfg struct{} diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 78c432bd82f0..080fc01c357d 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -35,20 +35,35 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { return } - d.Sink, err = initTelemetry(d.Client, logger, cfg) + // Make telemetry config request here to HCP. + ctx := context.Background() + url, err := verifyCCMRegistration(ctx, d.Client) + if err != nil { + return + } + + metricsClientOpts := &hcpclient.TelemetryClientCfg{ + Logger: logger, + CloudCfg: &cfg, + } + + sinkOpts := &telemetry.OTELSinkOpts{ + Ctx: ctx, + Logger: logger, + } + + d.Sink, err = initHCPSink(sinkOpts, metricsClientOpts, url) return } -func initTelemetry(hcpClient hcpclient.Client, logger hclog.Logger, cfg config.CloudConfig) (gometrics.MetricSink, error) { - // Make telemetry config request here to HCP. - ctx := context.Background() +func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string, error) { reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() - telemetryCfg, err := hcpClient.FetchTelemetryConfig(reqCtx) + telemetryCfg, err := client.FetchTelemetryConfig(reqCtx) if err != nil { - return nil, err + return "", fmt.Errorf("failed to fetch telemetry config %w", err) } endpoint := telemetryCfg.Endpoint @@ -56,26 +71,27 @@ func initTelemetry(hcpClient hcpclient.Client, logger hclog.Logger, cfg config.C endpoint = override } + if endpoint == "" { + return "", fmt.Errorf("server not registed with management plane") + } + // The endpoint from the HCP gateway is a domain without scheme, so it must be added. url, err := url.Parse(fmt.Sprintf("https://%s", endpoint)) if err != nil { - return nil, err + return "", fmt.Errorf("failed to parse url: %w", err) } + return url.String(), nil +} + +func initHCPSink(sinkOpts *telemetry.OTELSinkOpts, clientCfg *hcpclient.TelemetryClientCfg, url string) (gometrics.MetricSink, error) { // If the above succeeds, the server is registered with CCM, init metrics sink. - metricsClient, err := hcpclient.NewMetricsClient(&hcpclient.TelemetryClientCfg{ - Logger: logger, - CloudCfg: &cfg, - }) + metricsClient, err := hcpclient.NewMetricsClient(clientCfg) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to init metrics client: %w", err) } - opts := &telemetry.OTELSinkOpts{ - Reader: telemetry.NewOTELReader(metricsClient, url.String(), 10*time.Second), - Logger: logger, - Ctx: ctx, - } + sinkOpts.Reader = telemetry.NewOTELReader(metricsClient, url, 10*time.Second) - return telemetry.NewOTELSink(opts) + return telemetry.NewOTELSink(sinkOpts) } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 5a927f1b2931..918853bba56b 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -1,18 +1,150 @@ package hcp import ( + "context" + "fmt" "testing" "github.com/hashicorp/consul/agent/hcp/client" - "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/go-hclog" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" ) -func TestInitTelemetry(t *testing.T) { - cfg := config.CloudConfig{} - logger := hclog.NewNullLogger() - mClient := client.NewMockClient() +func TestVerifyCCMRegistration(t *testing.T) { + for name, test := range map[string]struct { + expect func(*client.MockClient) + wantErr string + mockCloudCfg client.CloudConfig + expectedURL string + }{ + "failsWithFetchTelemetryFailure": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("FetchTelemetryConfig error")) + }, + mockCloudCfg: &client.MockCloudCfg{}, + wantErr: "failed to fetch telemetry config", + }, + "noSinkWithEmptyEndpoint": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + mockCloudCfg: &client.MockCloudCfg{}, + wantErr: "server not registed with management plane", + }, + "failsWithURLParseErr": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + // Minimum 2 chars for a domain to be valid. + Endpoint: "s", + MetricsOverride: &client.MetricsConfig{ + // Invalid domain chars + Endpoint: " ", + }, + }, nil) + }, + mockCloudCfg: &client.MockCloudCfg{}, + wantErr: "failed to parse url:", + }, + "success": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "test.com", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + mockCloudCfg: &client.MockCloudCfg{}, + expectedURL: "https://test.com", + }, + "successMetricsEndpointOverride": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "test.com", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "override.com", + }, + }, nil) + }, + mockCloudCfg: &client.MockCloudCfg{}, + expectedURL: "https://override.com", + }, + } { + t.Run(name, func(t *testing.T) { + ctx := context.Background() + mClient := client.NewMockClient(t) + test.expect(mClient) - initTelemetry(mClient, logger, cfg) + url, err := verifyCCMRegistration(ctx, mClient) + if test.wantErr != "" { + require.Empty(t, url) + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + require.NoError(t, err) + require.Equal(t, url, test.expectedURL) + }) + } +} + +func TestInitSink(t *testing.T) { + for name, test := range map[string]struct { + sinkOpts *telemetry.OTELSinkOpts + clientOpts *client.TelemetryClientCfg + wantErr string + }{ + "failsWithMetricsClientError": { + clientOpts: &client.TelemetryClientCfg{ + Logger: hclog.NewNullLogger(), + CloudCfg: &client.MockErrCloudCfg{}, + }, + sinkOpts: &telemetry.OTELSinkOpts{ + Logger: hclog.NewNullLogger(), + Ctx: context.Background(), + }, + wantErr: "failed to init metrics client", + }, + "failsWithInvalidSinkOpts": { + clientOpts: &client.TelemetryClientCfg{ + Logger: hclog.NewNullLogger(), + CloudCfg: &client.MockCloudCfg{}, + }, + sinkOpts: &telemetry.OTELSinkOpts{ + Logger: nil, + Ctx: context.Background(), + }, + wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", + }, + "success": { + clientOpts: &client.TelemetryClientCfg{ + Logger: hclog.NewNullLogger(), + CloudCfg: &client.MockCloudCfg{}, + }, + sinkOpts: &telemetry.OTELSinkOpts{ + Logger: hclog.NewNullLogger(), + Ctx: context.Background(), + }, + }, + } { + t.Run(name, func(t *testing.T) { + sink, err := initHCPSink(test.sinkOpts, test.clientOpts, "https://test.com") + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NotNil(t, sink) + require.NoError(t, err) + }) + } } diff --git a/agent/setup.go b/agent/setup.go index 07ab3852de8c..9710d022a395 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -101,6 +101,8 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries + cfg.Telemetry.HCPSink = d.HCP.Sink + d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) if err != nil { return d, fmt.Errorf("failed to initialize telemetry: %w", err) diff --git a/lib/telemetry.go b/lib/telemetry.go index f6af9739243c..1095d381fc91 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -210,6 +210,10 @@ type TelemetryConfig struct { // // hcl: telemetry { prometheus_retention_time = "duration" } PrometheusOpts prometheus.PrometheusOpts + + // HCPSink provides an intialized OpenTelemetry sink that aggregates Consul metrics. + // The aggregated metrics are periodically exported to HCP. + HCPSink metrics.MetricSink } // MetricsHandler provides an http.Handler for displaying metrics. @@ -233,6 +237,12 @@ func (cfg *MetricsConfig) Cancel() { } } +func hcpSink(cfg TelemetryConfig, _ string) (metrics.MetricSink, error) { + // The sink is already initialized when HCP deps setup occurs to ensure the + // server is registered with the management plane. + return cfg.HCPSink, nil +} + func statsiteSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { addr := cfg.StatsiteAddr if addr == "" { @@ -349,6 +359,7 @@ func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink) (metrics.Fa addSink(dogstatdSink) addSink(circonusSink) addSink(prometheusSink) + addSink(hcpSink) if len(sinks) > 0 { sinks = append(sinks, memSink) diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index c8649f0fd764..c3de2e49c4f7 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -4,30 +4,47 @@ package lib import ( + "context" "errors" + "io" "net" "os" "testing" + hcptelemetry "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/consul/logging" + "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/sdk/metric" ) -func newCfg() TelemetryConfig { +func newCfg() (TelemetryConfig, error) { + // Manual reader outputs the aggregated metrics when reader.Collect is called. + reader := metric.NewManualReader() + opts := &hcptelemetry.OTELSinkOpts{ + Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), + Reader: reader, + Ctx: context.Background(), + } + + hcpSink, err := hcptelemetry.NewOTELSink(opts) + return TelemetryConfig{ StatsdAddr: "statsd.host:1234", StatsiteAddr: "statsite.host:1234", DogstatsdAddr: "mydog.host:8125", - } + HCPSink: hcpSink, + }, err } func TestConfigureSinks(t *testing.T) { - cfg := newCfg() + cfg, err := newCfg() + require.NoError(t, err) sinks, err := configureSinks(cfg, nil) require.Error(t, err) - // 3 sinks: statsd, statsite, inmem - require.Equal(t, 3, len(sinks)) + // 4 sinks: statsd, statsite, inmem, hcp + require.Equal(t, 4, len(sinks)) cfg = TelemetryConfig{ DogstatsdAddr: "", @@ -55,7 +72,10 @@ func TestInitTelemetryRetrySuccess(t *testing.T) { LogLevel: "INFO", }, os.Stdout) require.NoError(t, err) - cfg := newCfg() + + cfg, err := newCfg() + require.NoError(t, err) + _, err = InitTelemetry(cfg, logger) require.Error(t, err) From e58ba1e0c5996570d216cd720db214fb1a4d76b8 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 17:35:09 -0400 Subject: [PATCH 061/100] Init deps before telemetry --- agent/setup.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/agent/setup.go b/agent/setup.go index 9710d022a395..0c261e2efd63 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -101,6 +101,12 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries + if cfg.IsCloudEnabled() { + d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger) + if err != nil { + return d, err + } + } cfg.Telemetry.HCPSink = d.HCP.Sink d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) @@ -187,12 +193,6 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl d.EventPublisher = stream.NewEventPublisher(10 * time.Second) d.XDSStreamLimiter = limiter.NewSessionLimiter() - if cfg.IsCloudEnabled() { - d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger) - if err != nil { - return d, err - } - } return d, nil } From 5cd9e2eb4fe4d5e5db1bcef6706923205fc307cc Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 18:02:48 -0400 Subject: [PATCH 062/100] Use concrete telemetry.OtelSink type --- agent/hcp/deps.go | 5 ++--- lib/telemetry.go | 3 ++- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 080fc01c357d..94659ec25ed5 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -9,7 +9,6 @@ import ( "net/url" "time" - gometrics "github.com/armon/go-metrics" hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" @@ -21,7 +20,7 @@ import ( type Deps struct { Client hcpclient.Client Provider scada.Provider - Sink gometrics.MetricSink + Sink *telemetry.OTELSink } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { @@ -84,7 +83,7 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string return url.String(), nil } -func initHCPSink(sinkOpts *telemetry.OTELSinkOpts, clientCfg *hcpclient.TelemetryClientCfg, url string) (gometrics.MetricSink, error) { +func initHCPSink(sinkOpts *telemetry.OTELSinkOpts, clientCfg *hcpclient.TelemetryClientCfg, url string) (*telemetry.OTELSink, error) { // If the above succeeds, the server is registered with CCM, init metrics sink. metricsClient, err := hcpclient.NewMetricsClient(clientCfg) if err != nil { diff --git a/lib/telemetry.go b/lib/telemetry.go index 1095d381fc91..6b0b2ad35d03 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -19,6 +19,7 @@ import ( "github.com/hashicorp/go-multierror" prometheuscore "github.com/prometheus/client_golang/prometheus" + "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/consul/lib/retry" ) @@ -213,7 +214,7 @@ type TelemetryConfig struct { // HCPSink provides an intialized OpenTelemetry sink that aggregates Consul metrics. // The aggregated metrics are periodically exported to HCP. - HCPSink metrics.MetricSink + HCPSink *telemetry.OTELSink } // MetricsHandler provides an http.Handler for displaying metrics. From 26bfd941f2343f6a94fd475590833193d06ed72f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 23:32:51 -0400 Subject: [PATCH 063/100] add /v1/metrics --- agent/hcp/deps.go | 2 +- agent/hcp/deps_test.go | 4 ++-- agent/hcp/telemetry/otel_sink.go | 8 ++++---- lib/telemetry_test.go | 4 +--- 4 files changed, 8 insertions(+), 10 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 94659ec25ed5..1f2247746e00 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -75,7 +75,7 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string } // The endpoint from the HCP gateway is a domain without scheme, so it must be added. - url, err := url.Parse(fmt.Sprintf("https://%s", endpoint)) + url, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) if err != nil { return "", fmt.Errorf("failed to parse url: %w", err) } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 918853bba56b..8230717c8f14 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -62,7 +62,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, nil) }, mockCloudCfg: &client.MockCloudCfg{}, - expectedURL: "https://test.com", + expectedURL: "https://test.com/v1/metrics", }, "successMetricsEndpointOverride": { expect: func(mockClient *client.MockClient) { @@ -74,7 +74,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, nil) }, mockCloudCfg: &client.MockCloudCfg{}, - expectedURL: "https://override.com", + expectedURL: "https://override.com/v1/metrics", }, } { t.Run(name, func(t *testing.T) { diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 9268219334f5..ec5e2d476b1a 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -112,7 +112,7 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { // AddSampleWithLabels emits a Consul gauge metric that gets // registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) // Set value in global Gauge store. o.gaugeStore.Set(k, float64(val), toAttributes(labels)) @@ -136,7 +136,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -158,7 +158,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet // IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { - k := o.flattenKey(key, labels) + k := o.flattenKey(key) o.mutex.Lock() defer o.mutex.Unlock() @@ -183,7 +183,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom func (o *OTELSink) EmitKey(key []string, val float32) {} // flattenKey key along with its labels. -func (o *OTELSink) flattenKey(parts []string, labels []gometrics.Label) string { +func (o *OTELSink) flattenKey(parts []string) string { buf := &bytes.Buffer{} joined := strings.Join(parts, ".") diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index c3de2e49c4f7..153b83639574 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -20,11 +20,9 @@ import ( ) func newCfg() (TelemetryConfig, error) { - // Manual reader outputs the aggregated metrics when reader.Collect is called. - reader := metric.NewManualReader() opts := &hcptelemetry.OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: reader, + Reader: metric.NewManualReader(), Ctx: context.Background(), } From 09b498cfffda04d183787c27a3aaf546ad586316 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 09:56:00 -0400 Subject: [PATCH 064/100] Avoid returning err for telemetry init --- agent/hcp/deps.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 1f2247746e00..c2e1b6e29189 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -35,9 +35,10 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { } // Make telemetry config request here to HCP. + // CCM errors should be ignored and not block HCP init. ctx := context.Background() - url, err := verifyCCMRegistration(ctx, d.Client) - if err != nil { + url, telemetryErr := verifyCCMRegistration(ctx, d.Client) + if telemetryErr != nil { return } @@ -51,7 +52,10 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { Logger: logger, } - d.Sink, err = initHCPSink(sinkOpts, metricsClientOpts, url) + d.Sink, telemetryErr = initHCPSink(sinkOpts, metricsClientOpts, url) + if telemetryErr != nil { + logger.Error("Failed to init telemetry.") + } return } From 4604fd9c726cfcc606fb26680df88751cf9a4fbb Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 10:11:15 -0400 Subject: [PATCH 065/100] move sink init within the IsCloudEnabled() --- agent/hcp/deps.go | 27 +++++++++++++++++---------- agent/setup.go | 2 +- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index c2e1b6e29189..24d360a442c8 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -34,12 +34,19 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { return } - // Make telemetry config request here to HCP. - // CCM errors should be ignored and not block HCP init. + d.Sink = setupSink(cfg, d.Client, logger) + + return +} + +// setupSink will initialize an OTELSink which sends Consul metrics to HCP +// only if the server is registered with the management plane (CCM). +// This step should not block server initialization, so errors are logged, but not returned. +func setupSink(cfg config.CloudConfig, client hcpclient.Client, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() - url, telemetryErr := verifyCCMRegistration(ctx, d.Client) - if telemetryErr != nil { - return + url, err := verifyCCMRegistration(ctx, client) + if err != nil { + return nil } metricsClientOpts := &hcpclient.TelemetryClientCfg{ @@ -52,12 +59,13 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { Logger: logger, } - d.Sink, telemetryErr = initHCPSink(sinkOpts, metricsClientOpts, url) - if telemetryErr != nil { - logger.Error("Failed to init telemetry.") + sink, err := initHCPSink(sinkOpts, metricsClientOpts, url) + if err != nil { + logger.Error("Failed to init telemetry: %w", err) + return nil } - return + return sink } func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string, error) { @@ -88,7 +96,6 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string } func initHCPSink(sinkOpts *telemetry.OTELSinkOpts, clientCfg *hcpclient.TelemetryClientCfg, url string) (*telemetry.OTELSink, error) { - // If the above succeeds, the server is registered with CCM, init metrics sink. metricsClient, err := hcpclient.NewMetricsClient(clientCfg) if err != nil { return nil, fmt.Errorf("failed to init metrics client: %w", err) diff --git a/agent/setup.go b/agent/setup.go index 0c261e2efd63..350776ea5ff2 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -106,8 +106,8 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl if err != nil { return d, err } + cfg.Telemetry.HCPSink = d.HCP.Sink } - cfg.Telemetry.HCPSink = d.HCP.Sink d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) if err != nil { From 5f8c33ed05e9f7dc9fe50a7e723f51c22c379987 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 12:41:40 -0400 Subject: [PATCH 066/100] Use HCPSinkOpts in deps instead --- agent/hcp/deps.go | 39 +++++------- agent/hcp/deps_test.go | 131 ++++++++++++++++++++--------------------- agent/setup.go | 3 +- lib/telemetry.go | 14 +++-- lib/telemetry_test.go | 14 ++--- 5 files changed, 93 insertions(+), 108 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 24d360a442c8..d989a9b76f9b 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -20,7 +20,7 @@ import ( type Deps struct { Client hcpclient.Client Provider scada.Provider - Sink *telemetry.OTELSink + SinkOpts *telemetry.OTELSinkOpts } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { @@ -34,15 +34,15 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { return } - d.Sink = setupSink(cfg, d.Client, logger) + d.SinkOpts = sinkOpts(&cfg, d.Client, logger) return } -// setupSink will initialize an OTELSink which sends Consul metrics to HCP +// setupSink provides OTELSink configuration to initialize a Go Metrics sink, // only if the server is registered with the management plane (CCM). // This step should not block server initialization, so errors are logged, but not returned. -func setupSink(cfg config.CloudConfig, client hcpclient.Client, logger hclog.Logger) *telemetry.OTELSink { +func sinkOpts(cfg hcpclient.CloudConfig, client hcpclient.Client, logger hclog.Logger) *telemetry.OTELSinkOpts { ctx := context.Background() url, err := verifyCCMRegistration(ctx, client) if err != nil { @@ -51,23 +51,27 @@ func setupSink(cfg config.CloudConfig, client hcpclient.Client, logger hclog.Log metricsClientOpts := &hcpclient.TelemetryClientCfg{ Logger: logger, - CloudCfg: &cfg, + CloudCfg: cfg, + } + + metricsClient, err := hcpclient.NewMetricsClient(metricsClientOpts) + if err != nil { + logger.Error("failed to init metrics client: %w", err) + return nil } sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, Logger: logger, + Reader: telemetry.NewOTELReader(metricsClient, url, 10*time.Second), } - sink, err := initHCPSink(sinkOpts, metricsClientOpts, url) - if err != nil { - logger.Error("Failed to init telemetry: %w", err) - return nil - } - - return sink + return sinkOpts } +// verifyCCMRegistration checks that a server is registered with the HCP management plane +// by making a HTTP request to the HCP TelemetryConfig endpoint. +// If registered, it returns the full URL for the HCP Telemetry Gateway endpoint where metrics should be forwarded. func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string, error) { reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -94,14 +98,3 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string return url.String(), nil } - -func initHCPSink(sinkOpts *telemetry.OTELSinkOpts, clientCfg *hcpclient.TelemetryClientCfg, url string) (*telemetry.OTELSink, error) { - metricsClient, err := hcpclient.NewMetricsClient(clientCfg) - if err != nil { - return nil, fmt.Errorf("failed to init metrics client: %w", err) - } - - sinkOpts.Reader = telemetry.NewOTELReader(metricsClient, url, 10*time.Second) - - return telemetry.NewOTELSink(sinkOpts) -} diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 8230717c8f14..6abab8f85127 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -6,27 +6,79 @@ import ( "testing" "github.com/hashicorp/consul/agent/hcp/client" - "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func TestVerifyCCMRegistration(t *testing.T) { +func TestSinkOpts(t *testing.T) { for name, test := range map[string]struct { expect func(*client.MockClient) - wantErr string mockCloudCfg client.CloudConfig - expectedURL string + wantErr bool + }{ + "success": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "test.com", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + mockCloudCfg: client.MockCloudCfg{}, + }, + "emptyOptsWhenServerNotRegisteredWithCCM": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + mockCloudCfg: client.MockCloudCfg{}, + wantErr: true, + }, + "emptyOptsWhenMetricsClientInitFails": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "test.com", + MetricsOverride: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + mockCloudCfg: client.MockErrCloudCfg{}, + wantErr: true, + }, + } { + t.Run(name, func(t *testing.T) { + c := client.NewMockClient(t) + l := hclog.NewNullLogger() + test.expect(c) + sinkOpts := sinkOpts(test.mockCloudCfg, c, l) + if test.wantErr { + require.Nil(t, sinkOpts) + return + } + require.NotNil(t, sinkOpts) + }) + } +} +func TestVerifyCCMRegistration(t *testing.T) { + for name, test := range map[string]struct { + expect func(*client.MockClient) + wantErr string + expectedURL string }{ "failsWithFetchTelemetryFailure": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("FetchTelemetryConfig error")) }, - mockCloudCfg: &client.MockCloudCfg{}, - wantErr: "failed to fetch telemetry config", + wantErr: "failed to fetch telemetry config", }, - "noSinkWithEmptyEndpoint": { + "failsWithEmptyEndpoint": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "", @@ -35,8 +87,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - mockCloudCfg: &client.MockCloudCfg{}, - wantErr: "server not registed with management plane", + wantErr: "server not registed with management plane", }, "failsWithURLParseErr": { expect: func(mockClient *client.MockClient) { @@ -49,8 +100,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - mockCloudCfg: &client.MockCloudCfg{}, - wantErr: "failed to parse url:", + wantErr: "failed to parse url:", }, "success": { expect: func(mockClient *client.MockClient) { @@ -61,8 +111,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - mockCloudCfg: &client.MockCloudCfg{}, - expectedURL: "https://test.com/v1/metrics", + expectedURL: "https://test.com/v1/metrics", }, "successMetricsEndpointOverride": { expect: func(mockClient *client.MockClient) { @@ -73,8 +122,7 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - mockCloudCfg: &client.MockCloudCfg{}, - expectedURL: "https://override.com/v1/metrics", + expectedURL: "https://override.com/v1/metrics", }, } { t.Run(name, func(t *testing.T) { @@ -95,56 +143,3 @@ func TestVerifyCCMRegistration(t *testing.T) { }) } } - -func TestInitSink(t *testing.T) { - for name, test := range map[string]struct { - sinkOpts *telemetry.OTELSinkOpts - clientOpts *client.TelemetryClientCfg - wantErr string - }{ - "failsWithMetricsClientError": { - clientOpts: &client.TelemetryClientCfg{ - Logger: hclog.NewNullLogger(), - CloudCfg: &client.MockErrCloudCfg{}, - }, - sinkOpts: &telemetry.OTELSinkOpts{ - Logger: hclog.NewNullLogger(), - Ctx: context.Background(), - }, - wantErr: "failed to init metrics client", - }, - "failsWithInvalidSinkOpts": { - clientOpts: &client.TelemetryClientCfg{ - Logger: hclog.NewNullLogger(), - CloudCfg: &client.MockCloudCfg{}, - }, - sinkOpts: &telemetry.OTELSinkOpts{ - Logger: nil, - Ctx: context.Background(), - }, - wantErr: "failed to init OTEL sink: provide valid OTELSinkOpts", - }, - "success": { - clientOpts: &client.TelemetryClientCfg{ - Logger: hclog.NewNullLogger(), - CloudCfg: &client.MockCloudCfg{}, - }, - sinkOpts: &telemetry.OTELSinkOpts{ - Logger: hclog.NewNullLogger(), - Ctx: context.Background(), - }, - }, - } { - t.Run(name, func(t *testing.T) { - sink, err := initHCPSink(test.sinkOpts, test.clientOpts, "https://test.com") - if test.wantErr != "" { - require.Error(t, err) - require.Contains(t, err.Error(), test.wantErr) - return - } - - require.NotNil(t, sink) - require.NoError(t, err) - }) - } -} diff --git a/agent/setup.go b/agent/setup.go index 350776ea5ff2..56e35a88cbef 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -100,13 +100,12 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl cfg.Telemetry.PrometheusOpts.GaugeDefinitions = gauges cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries - if cfg.IsCloudEnabled() { d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger) if err != nil { return d, err } - cfg.Telemetry.HCPSink = d.HCP.Sink + cfg.Telemetry.HCPSinkOpts = d.HCP.SinkOpts } d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) diff --git a/lib/telemetry.go b/lib/telemetry.go index 6b0b2ad35d03..a591a4e86fe8 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -212,9 +212,10 @@ type TelemetryConfig struct { // hcl: telemetry { prometheus_retention_time = "duration" } PrometheusOpts prometheus.PrometheusOpts - // HCPSink provides an intialized OpenTelemetry sink that aggregates Consul metrics. - // The aggregated metrics are periodically exported to HCP. - HCPSink *telemetry.OTELSink + // HCPSinkOpts provides configuration for an OpenTelemetry HCP Metrics sink. + // The aggregated OpenTelemetry metrics are periodically exported to HCP. + // The HCPSinkOpts are created when the HCP Deps are initialized. + HCPSinkOpts *telemetry.OTELSinkOpts } // MetricsHandler provides an http.Handler for displaying metrics. @@ -239,9 +240,10 @@ func (cfg *MetricsConfig) Cancel() { } func hcpSink(cfg TelemetryConfig, _ string) (metrics.MetricSink, error) { - // The sink is already initialized when HCP deps setup occurs to ensure the - // server is registered with the management plane. - return cfg.HCPSink, nil + if cfg.HCPSinkOpts == nil || cfg.HCPSinkOpts.Reader == nil { + return nil, nil + } + return telemetry.NewOTELSink(cfg.HCPSinkOpts) } func statsiteSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index 153b83639574..60aa15e96faf 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -19,26 +19,23 @@ import ( "go.opentelemetry.io/otel/sdk/metric" ) -func newCfg() (TelemetryConfig, error) { +func newCfg() TelemetryConfig { opts := &hcptelemetry.OTELSinkOpts{ Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), Reader: metric.NewManualReader(), Ctx: context.Background(), } - hcpSink, err := hcptelemetry.NewOTELSink(opts) - return TelemetryConfig{ StatsdAddr: "statsd.host:1234", StatsiteAddr: "statsite.host:1234", DogstatsdAddr: "mydog.host:8125", - HCPSink: hcpSink, - }, err + HCPSinkOpts: opts, + } } func TestConfigureSinks(t *testing.T) { - cfg, err := newCfg() - require.NoError(t, err) + cfg := newCfg() sinks, err := configureSinks(cfg, nil) require.Error(t, err) // 4 sinks: statsd, statsite, inmem, hcp @@ -71,8 +68,7 @@ func TestInitTelemetryRetrySuccess(t *testing.T) { }, os.Stdout) require.NoError(t, err) - cfg, err := newCfg() - require.NoError(t, err) + cfg := newCfg() _, err = InitTelemetry(cfg, logger) require.Error(t, err) From 675a494cd693c0fdb4e866e5649bc7df578de43b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 27 Apr 2023 13:22:52 -0400 Subject: [PATCH 067/100] update golden test for configuration file --- agent/config/testdata/TestRuntimeConfig_Sanitize.golden | 1 + 1 file changed, 1 insertion(+) diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index 054a284aeb85..f66794c06ef7 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -463,6 +463,7 @@ "DogstatsdAddr": "", "DogstatsdTags": [], "FilterDefault": false, + "HCPSinkOpts": null, "MetricsPrefix": "", "PrometheusOpts": { "CounterDefinitions": [], From f9b8e596dcf25068cf611af1a0dcea602b34a470 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 10:34:51 -0400 Subject: [PATCH 068/100] Switch to using extra sinks in the telemetry library --- .../TestRuntimeConfig_Sanitize.golden | 2 +- agent/hcp/deps.go | 34 +++++++++----- agent/hcp/deps_test.go | 44 +++++++++++-------- agent/setup.go | 4 +- lib/telemetry.go | 20 +++------ lib/telemetry_test.go | 17 +++---- 6 files changed, 64 insertions(+), 57 deletions(-) diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index f66794c06ef7..86311549c64e 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -462,8 +462,8 @@ "DisableHostname": false, "DogstatsdAddr": "", "DogstatsdTags": [], + "ExtraSinks": [], "FilterDefault": false, - "HCPSinkOpts": null, "MetricsPrefix": "", "PrometheusOpts": { "CounterDefinitions": [], diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index d989a9b76f9b..5ed51d0dc9a4 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -9,6 +9,7 @@ import ( "net/url" "time" + "github.com/coredns/coredns/plugin/pkg/log" hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" @@ -20,7 +21,7 @@ import ( type Deps struct { Client hcpclient.Client Provider scada.Provider - SinkOpts *telemetry.OTELSinkOpts + Sink *telemetry.OTELSink } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { @@ -34,18 +35,21 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { return } - d.SinkOpts = sinkOpts(&cfg, d.Client, logger) + d.Sink = sink(d.Client, &cfg, logger) return } -// setupSink provides OTELSink configuration to initialize a Go Metrics sink, -// only if the server is registered with the management plane (CCM). +// sink provides initializes an OTELSink which forwards Consul metrics to HCP. +// The sink is only initialized if the server is registered with the management plane (CCM). // This step should not block server initialization, so errors are logged, but not returned. -func sinkOpts(cfg hcpclient.CloudConfig, client hcpclient.Client, logger hclog.Logger) *telemetry.OTELSinkOpts { +func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() - url, err := verifyCCMRegistration(ctx, client) - if err != nil { + url, err := verifyCCMRegistration(ctx, hcpClient) + + // if endpoint is empty, no metrics endpoint configuration for this Consul server + // (e.g. not registered with CCM or feature flag to control rollout) so do not enable the HCP metrics sink. + if url == "" { return nil } @@ -66,12 +70,18 @@ func sinkOpts(cfg hcpclient.CloudConfig, client hcpclient.Client, logger hclog.L Reader: telemetry.NewOTELReader(metricsClient, url, 10*time.Second), } - return sinkOpts + sink, err := telemetry.NewOTELSink(sinkOpts) + if err != nil { + logger.Error("failed to init OTEL sink: %w", err) + return nil + } + + return sink } // verifyCCMRegistration checks that a server is registered with the HCP management plane // by making a HTTP request to the HCP TelemetryConfig endpoint. -// If registered, it returns the full URL for the HCP Telemetry Gateway endpoint where metrics should be forwarded. +// If registered, it returns the endpoint for the HCP Telemetry Gateway endpoint where metrics should be forwarded. func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string, error) { reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() @@ -86,13 +96,15 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string endpoint = override } + // no error, the server simply isn't configured for metrics forwarding. if endpoint == "" { - return "", fmt.Errorf("server not registed with management plane") + return "", nil } - // The endpoint from the HCP gateway is a domain without scheme, so it must be added. + // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. url, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) if err != nil { + log.Error("failed to parse url: %w", err) return "", fmt.Errorf("failed to parse url: %w", err) } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 6abab8f85127..2d8d977a6e29 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -11,24 +11,25 @@ import ( "github.com/stretchr/testify/require" ) -func TestSinkOpts(t *testing.T) { +func TestSink(t *testing.T) { for name, test := range map[string]struct { expect func(*client.MockClient) mockCloudCfg client.CloudConfig - wantErr bool + expectedSink bool }{ "success": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", MetricsOverride: &client.MetricsConfig{ - Endpoint: "", + Endpoint: "test.com", }, }, nil) }, mockCloudCfg: client.MockCloudCfg{}, + expectedSink: true, }, - "emptyOptsWhenServerNotRegisteredWithCCM": { + "noSinkWhenServerNotRegisteredWithCCM": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "", @@ -38,9 +39,14 @@ func TestSinkOpts(t *testing.T) { }, nil) }, mockCloudCfg: client.MockCloudCfg{}, - wantErr: true, }, - "emptyOptsWhenMetricsClientInitFails": { + "noSinkWhenCCMVerificationFails": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("fetch failed")) + }, + mockCloudCfg: client.MockCloudCfg{}, + }, + "noSinkWhenMetricsClientInitFails": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", @@ -50,15 +56,14 @@ func TestSinkOpts(t *testing.T) { }, nil) }, mockCloudCfg: client.MockErrCloudCfg{}, - wantErr: true, }, } { t.Run(name, func(t *testing.T) { c := client.NewMockClient(t) l := hclog.NewNullLogger() test.expect(c) - sinkOpts := sinkOpts(test.mockCloudCfg, c, l) - if test.wantErr { + sinkOpts := sink(c, test.mockCloudCfg, l) + if !test.expectedSink { require.Nil(t, sinkOpts) return } @@ -66,6 +71,7 @@ func TestSinkOpts(t *testing.T) { }) } } + func TestVerifyCCMRegistration(t *testing.T) { for name, test := range map[string]struct { expect func(*client.MockClient) @@ -78,29 +84,29 @@ func TestVerifyCCMRegistration(t *testing.T) { }, wantErr: "failed to fetch telemetry config", }, - "failsWithEmptyEndpoint": { + "failsWithURLParseErr": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "", + // Minimum 2 chars for a domain to be valid. + Endpoint: "s", MetricsOverride: &client.MetricsConfig{ - Endpoint: "", + // Invalid domain chars + Endpoint: " ", }, }, nil) }, - wantErr: "server not registed with management plane", + wantErr: "failed to parse url:", }, - "failsWithURLParseErr": { + "noErrWithEmptyEndpoint": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - // Minimum 2 chars for a domain to be valid. - Endpoint: "s", + Endpoint: "", MetricsOverride: &client.MetricsConfig{ - // Invalid domain chars - Endpoint: " ", + Endpoint: "", }, }, nil) }, - wantErr: "failed to parse url:", + expectedURL: "", }, "success": { expect: func(mockClient *client.MockClient) { diff --git a/agent/setup.go b/agent/setup.go index 56e35a88cbef..abb72b118ea1 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -105,7 +105,9 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl if err != nil { return d, err } - cfg.Telemetry.HCPSinkOpts = d.HCP.SinkOpts + if d.HCP.Sink != nil { + cfg.Telemetry.ExtraSinks = append(cfg.Telemetry.ExtraSinks, d.HCP.Sink) + } } d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) diff --git a/lib/telemetry.go b/lib/telemetry.go index a591a4e86fe8..1327f39ab900 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -19,7 +19,6 @@ import ( "github.com/hashicorp/go-multierror" prometheuscore "github.com/prometheus/client_golang/prometheus" - "github.com/hashicorp/consul/agent/hcp/telemetry" "github.com/hashicorp/consul/lib/retry" ) @@ -212,10 +211,8 @@ type TelemetryConfig struct { // hcl: telemetry { prometheus_retention_time = "duration" } PrometheusOpts prometheus.PrometheusOpts - // HCPSinkOpts provides configuration for an OpenTelemetry HCP Metrics sink. - // The aggregated OpenTelemetry metrics are periodically exported to HCP. - // The HCPSinkOpts are created when the HCP Deps are initialized. - HCPSinkOpts *telemetry.OTELSinkOpts + // ExtraSinks are additional metrics.MetricSink implementations that are to always be added. + ExtraSinks []metrics.MetricSink } // MetricsHandler provides an http.Handler for displaying metrics. @@ -239,13 +236,6 @@ func (cfg *MetricsConfig) Cancel() { } } -func hcpSink(cfg TelemetryConfig, _ string) (metrics.MetricSink, error) { - if cfg.HCPSinkOpts == nil || cfg.HCPSinkOpts.Reader == nil { - return nil, nil - } - return telemetry.NewOTELSink(cfg.HCPSinkOpts) -} - func statsiteSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, error) { addr := cfg.StatsiteAddr if addr == "" { @@ -362,7 +352,11 @@ func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink) (metrics.Fa addSink(dogstatdSink) addSink(circonusSink) addSink(prometheusSink) - addSink(hcpSink) + for _, sink := range cfg.ExtraSinks { + if sink != nil { + sinks = append(sinks, sink) + } + } if len(sinks) > 0 { sinks = append(sinks, memSink) diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index 60aa15e96faf..63dda97e78f4 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -4,33 +4,26 @@ package lib import ( - "context" "errors" - "io" "net" "os" "testing" - hcptelemetry "github.com/hashicorp/consul/agent/hcp/telemetry" + "github.com/armon/go-metrics" "github.com/hashicorp/consul/logging" - "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-multierror" "github.com/stretchr/testify/require" - "go.opentelemetry.io/otel/sdk/metric" ) func newCfg() TelemetryConfig { - opts := &hcptelemetry.OTELSinkOpts{ - Logger: hclog.New(&hclog.LoggerOptions{Output: io.Discard}), - Reader: metric.NewManualReader(), - Ctx: context.Background(), - } return TelemetryConfig{ StatsdAddr: "statsd.host:1234", StatsiteAddr: "statsite.host:1234", DogstatsdAddr: "mydog.host:8125", - HCPSinkOpts: opts, + ExtraSinks: []metrics.MetricSink{ + &metrics.BlackholeSink{}, + }, } } @@ -38,7 +31,7 @@ func TestConfigureSinks(t *testing.T) { cfg := newCfg() sinks, err := configureSinks(cfg, nil) require.Error(t, err) - // 4 sinks: statsd, statsite, inmem, hcp + // 4 sinks: statsd, statsite, inmem, extra sink (blackhole) require.Equal(t, 4, len(sinks)) cfg = TelemetryConfig{ From b07c73190f8cd61f129d6766550026f7fa46f698 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 14:55:26 -0400 Subject: [PATCH 069/100] keep name MetricsConfig --- agent/hcp/client/client.go | 8 ++++---- agent/hcp/deps.go | 10 ++-------- agent/hcp/deps_test.go | 14 +++++++------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 9475f82cf0a1..a8187142dd7f 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -36,9 +36,9 @@ type MetricsConfig struct { Endpoint string } type TelemetryConfig struct { - Endpoint string - Labels map[string]string - MetricsOverride *MetricsConfig + Endpoint string + Labels map[string]string + MetricsConfig *MetricsConfig } type BootstrapConfig struct { @@ -106,7 +106,7 @@ func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, return &TelemetryConfig{ Endpoint: payloadConfig.Endpoint, Labels: payloadConfig.Labels, - MetricsOverride: &MetricsConfig{ + MetricsConfig: &MetricsConfig{ Filters: payloadConfig.Metrics.IncludeList, Endpoint: payloadConfig.Metrics.Endpoint, }, diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 5ed51d0dc9a4..76a2fe32419c 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -53,19 +53,13 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return nil } - metricsClientOpts := &hcpclient.TelemetryClientCfg{ - Logger: logger, - CloudCfg: cfg, - } - - metricsClient, err := hcpclient.NewMetricsClient(metricsClientOpts) + metricsClient, err := hcpclient.NewMetricsClient(cfg, logger) if err != nil { logger.Error("failed to init metrics client: %w", err) return nil } sinkOpts := &telemetry.OTELSinkOpts{ - Ctx: ctx, Logger: logger, Reader: telemetry.NewOTELReader(metricsClient, url, 10*time.Second), } @@ -92,7 +86,7 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string } endpoint := telemetryCfg.Endpoint - if override := telemetryCfg.MetricsOverride.Endpoint; override != "" { + if override := telemetryCfg.MetricsConfig.Endpoint; override != "" { endpoint = override } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 2d8d977a6e29..d4fad278db2a 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -21,7 +21,7 @@ func TestSink(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "test.com", }, }, nil) @@ -33,7 +33,7 @@ func TestSink(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "", }, }, nil) @@ -50,7 +50,7 @@ func TestSink(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "", }, }, nil) @@ -89,7 +89,7 @@ func TestVerifyCCMRegistration(t *testing.T) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ // Minimum 2 chars for a domain to be valid. Endpoint: "s", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ // Invalid domain chars Endpoint: " ", }, @@ -101,7 +101,7 @@ func TestVerifyCCMRegistration(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "", }, }, nil) @@ -112,7 +112,7 @@ func TestVerifyCCMRegistration(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "", }, }, nil) @@ -123,7 +123,7 @@ func TestVerifyCCMRegistration(t *testing.T) { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "test.com", - MetricsOverride: &client.MetricsConfig{ + MetricsConfig: &client.MetricsConfig{ Endpoint: "override.com", }, }, nil) From e6f367f262e6e6c9ba8bdef9e998c869aeb15475 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 28 Apr 2023 15:26:57 -0400 Subject: [PATCH 070/100] fix log in verifyCCMRegistration --- agent/hcp/deps.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 76a2fe32419c..b0f86f1dd200 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -9,7 +9,6 @@ import ( "net/url" "time" - "github.com/coredns/coredns/plugin/pkg/log" hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" @@ -46,6 +45,10 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() url, err := verifyCCMRegistration(ctx, hcpClient) + if err != nil { + logger.Error("failed to verify CCM registration: %w", err) + return nil + } // if endpoint is empty, no metrics endpoint configuration for this Consul server // (e.g. not registered with CCM or feature flag to control rollout) so do not enable the HCP metrics sink. @@ -98,7 +101,6 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. url, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) if err != nil { - log.Error("failed to parse url: %w", err) return "", fmt.Errorf("failed to parse url: %w", err) } From e91854419aea78f967d622358077ed529b2b28a2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 10:49:51 -0400 Subject: [PATCH 071/100] Set logger in context --- agent/hcp/deps.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index b0f86f1dd200..398a77da5f8d 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -44,6 +44,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { // This step should not block server initialization, so errors are logged, but not returned. func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() + ctx = hclog.WithContext(ctx, logger) url, err := verifyCCMRegistration(ctx, hcpClient) if err != nil { logger.Error("failed to verify CCM registration: %w", err) @@ -56,7 +57,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return nil } - metricsClient, err := hcpclient.NewMetricsClient(cfg, logger) + metricsClient, err := hcpclient.NewMetricsClient(cfg, ctx) if err != nil { logger.Error("failed to init metrics client: %w", err) return nil From d357cb528b056adb12ae75c1adf4d9cf83aa642f Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:37:22 -0400 Subject: [PATCH 072/100] pass around MetricSink in deps --- agent/hcp/deps.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 398a77da5f8d..74ef694e6247 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -13,6 +13,8 @@ import ( "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/consul/agent/hcp/telemetry" + + "github.com/armon/go-metrics" "github.com/hashicorp/go-hclog" ) @@ -20,7 +22,7 @@ import ( type Deps struct { Client hcpclient.Client Provider scada.Provider - Sink *telemetry.OTELSink + Sink metrics.MetricSink } func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { From 3e79438c65a850252d482687a2c61600dc48f306 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 1 May 2023 12:43:33 -0400 Subject: [PATCH 073/100] Fix imports --- agent/hcp/client/client.go | 5 +++-- agent/hcp/deps_test.go | 1 + lib/telemetry_test.go | 6 ++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index a8187142dd7f..642beb8d75d0 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -9,11 +9,12 @@ import ( "strconv" "time" + "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/consul/version" + httptransport "github.com/go-openapi/runtime/client" "github.com/go-openapi/strfmt" - "github.com/hashicorp/consul/agent/hcp/config" - "github.com/hashicorp/consul/version" hcptelemetry "github.com/hashicorp/hcp-sdk-go/clients/cloud-consul-telemetry-gateway/preview/2023-04-14/client/consul_telemetry_service" hcpgnm "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/client/global_network_manager_service" gnmmod "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/models" diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index d4fad278db2a..8198d1f390a7 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index 63dda97e78f4..84ec13ed733d 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -9,14 +9,14 @@ import ( "os" "testing" - "github.com/armon/go-metrics" "github.com/hashicorp/consul/logging" + + "github.com/armon/go-metrics" "github.com/hashicorp/go-multierror" "github.com/stretchr/testify/require" ) func newCfg() TelemetryConfig { - return TelemetryConfig{ StatsdAddr: "statsd.host:1234", StatsiteAddr: "statsite.host:1234", @@ -60,9 +60,7 @@ func TestInitTelemetryRetrySuccess(t *testing.T) { LogLevel: "INFO", }, os.Stdout) require.NoError(t, err) - cfg := newCfg() - _, err = InitTelemetry(cfg, logger) require.Error(t, err) From 3d57ff572545bc1f19e8d49aaef72670d061dd2c Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 12 May 2023 13:30:27 -0400 Subject: [PATCH 074/100] Rebased onto otel sink pr --- agent/hcp/client/client.go | 6 +++--- agent/hcp/deps.go | 26 +++++++++++++------------- agent/hcp/deps_test.go | 9 ++++++++- agent/hcp/telemetry/otel_exporter.go | 12 ++++++------ agent/hcp/telemetry/otel_sink.go | 2 +- agent/hcp/telemetry/otel_sink_test.go | 10 +++++----- go.mod | 6 +++--- go.sum | 1 - 8 files changed, 39 insertions(+), 33 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 642beb8d75d0..bca5343ba68f 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -9,9 +9,6 @@ import ( "strconv" "time" - "github.com/hashicorp/consul/agent/hcp/config" - "github.com/hashicorp/consul/version" - httptransport "github.com/go-openapi/runtime/client" "github.com/go-openapi/strfmt" @@ -20,6 +17,9 @@ import ( gnmmod "github.com/hashicorp/hcp-sdk-go/clients/cloud-global-network-manager-service/preview/2022-02-15/models" "github.com/hashicorp/hcp-sdk-go/httpclient" "github.com/hashicorp/hcp-sdk-go/resource" + + "github.com/hashicorp/consul/agent/hcp/config" + "github.com/hashicorp/consul/version" ) // Client interface exposes HCP operations that can be invoked by Consul diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 74ef694e6247..bb4c28ec9478 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -9,13 +9,13 @@ import ( "net/url" "time" + "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + hcpclient "github.com/hashicorp/consul/agent/hcp/client" "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/consul/agent/hcp/telemetry" - - "github.com/armon/go-metrics" - "github.com/hashicorp/go-hclog" ) // Deps contains the interfaces that the rest of Consul core depends on for HCP integration. @@ -47,7 +47,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() ctx = hclog.WithContext(ctx, logger) - url, err := verifyCCMRegistration(ctx, hcpClient) + u, err := verifyCCMRegistration(ctx, hcpClient) if err != nil { logger.Error("failed to verify CCM registration: %w", err) return nil @@ -55,7 +55,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo // if endpoint is empty, no metrics endpoint configuration for this Consul server // (e.g. not registered with CCM or feature flag to control rollout) so do not enable the HCP metrics sink. - if url == "" { + if u == nil { return nil } @@ -66,8 +66,8 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo } sinkOpts := &telemetry.OTELSinkOpts{ - Logger: logger, - Reader: telemetry.NewOTELReader(metricsClient, url, 10*time.Second), + Ctx: ctx, + Reader: telemetry.NewOTELReader(metricsClient, u, 10*time.Second), } sink, err := telemetry.NewOTELSink(sinkOpts) @@ -82,13 +82,13 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo // verifyCCMRegistration checks that a server is registered with the HCP management plane // by making a HTTP request to the HCP TelemetryConfig endpoint. // If registered, it returns the endpoint for the HCP Telemetry Gateway endpoint where metrics should be forwarded. -func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string, error) { +func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (*url.URL, error) { reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() telemetryCfg, err := client.FetchTelemetryConfig(reqCtx) if err != nil { - return "", fmt.Errorf("failed to fetch telemetry config %w", err) + return nil, fmt.Errorf("failed to fetch telemetry config %w", err) } endpoint := telemetryCfg.Endpoint @@ -98,14 +98,14 @@ func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (string // no error, the server simply isn't configured for metrics forwarding. if endpoint == "" { - return "", nil + return nil, nil } // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. - url, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) + u, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) if err != nil { - return "", fmt.Errorf("failed to parse url: %w", err) + return nil, fmt.Errorf("failed to parse url: %w", err) } - return url.String(), nil + return u, nil } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 8198d1f390a7..eda19244f26f 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -146,7 +146,14 @@ func TestVerifyCCMRegistration(t *testing.T) { } require.NoError(t, err) - require.Equal(t, url, test.expectedURL) + if test.expectedURL == "" { + require.Nil(t, url) + return + } + + expURL, err := url.Parse(test.expectedURL) + require.NoError(t, err) + require.Equal(t, url, expURL) }) } } diff --git a/agent/hcp/telemetry/otel_exporter.go b/agent/hcp/telemetry/otel_exporter.go index 02f6a07f8371..2512706f5353 100644 --- a/agent/hcp/telemetry/otel_exporter.go +++ b/agent/hcp/telemetry/otel_exporter.go @@ -15,15 +15,15 @@ import ( // The exporter is used by a OTEL Metrics SDK PeriodicReader to export aggregated metrics. // This allows us to use a custom client - HCP authenticated MetricsClient. type OTELExporter struct { - client hcpclient.MetricsClient - url url.URL + client hcpclient.MetricsClient + endpoint *url.URL } // NewOTELExporter returns a configured OTELExporter -func NewOTELExporter(client hcpclient.MetricsClient, url url.URL) *OTELExporter { +func NewOTELExporter(client hcpclient.MetricsClient, endpoint *url.URL) *OTELExporter { return &OTELExporter{ - client: client, - url: url, + client: client, + endpoint: endpoint, } } @@ -56,7 +56,7 @@ func (e *OTELExporter) Export(ctx context.Context, metrics *metricdata.ResourceM if isEmpty(otlpMetrics) { return nil } - return e.client.ExportMetrics(ctx, otlpMetrics, e.url.String()) + return e.client.ExportMetrics(ctx, otlpMetrics, e.endpoint.String()) } // ForceFlush is a no-op, as the MetricsClient client holds no state. diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ec5e2d476b1a..ee2776db2b5c 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -60,7 +60,7 @@ type OTELSink struct { // NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds. // It configures the reader with a custom OTELExporter with a MetricsClient to transform and export // metrics in OTLP format to an external url. -func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { +func NewOTELReader(client client.MetricsClient, url *url.URL, exportInterval time.Duration) otelsdk.Reader { exporter := NewOTELExporter(client, url) return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) } diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index b5e6c3557330..ebdfa432554a 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -79,8 +79,8 @@ var ( Name: "consul.raft.leader.lastContact", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: *attribute.EmptySet(), Count: 1, @@ -95,8 +95,8 @@ var ( Name: "consul.raft.commitTime", Description: "", Unit: "", - Data: metricdata.Histogram{ - DataPoints: []metricdata.HistogramDataPoint{ + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ { Attributes: attrs, Count: 1, @@ -363,7 +363,7 @@ func isSameDataPoint(t *testing.T, expected []metricdata.DataPoint[float64], act } } -func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint, actual []metricdata.HistogramDataPoint) { +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { require.Equal(t, len(expected), len(actual), "different histogram datapoint length") // Sort for predictable data in order of lowest sum. diff --git a/go.mod b/go.mod index c7e583b2e5fd..951f0b11dd00 100644 --- a/go.mod +++ b/go.mod @@ -103,9 +103,9 @@ require ( go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d - golang.org/x/net v0.7.0 - golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 - golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 + golang.org/x/net v0.8.0 + golang.org/x/oauth2 v0.6.0 + golang.org/x/sync v0.1.0 golang.org/x/sys v0.8.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 diff --git a/go.sum b/go.sum index 3a4d9ed55498..32ef071c418d 100644 --- a/go.sum +++ b/go.sum @@ -1355,7 +1355,6 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.6.0 h1:clScbb1cHjoCkyRbWwBEUZ5H/tIFu5TAXIqaZD0Gcjw= From dc48f65d76b1059900be4c4144c0c5225f646694 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 16:18:38 -0400 Subject: [PATCH 075/100] Fix URL in test --- agent/hcp/telemetry/otel_exporter_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/agent/hcp/telemetry/otel_exporter_test.go b/agent/hcp/telemetry/otel_exporter_test.go index 0e3d3fcc1e4f..72e6b84d242c 100644 --- a/agent/hcp/telemetry/otel_exporter_test.go +++ b/agent/hcp/telemetry/otel_exporter_test.go @@ -3,6 +3,7 @@ package telemetry import ( "context" "fmt" + "net/url" "testing" "github.com/stretchr/testify/require" @@ -96,9 +97,7 @@ func TestExport(t *testing.T) { test := test t.Run(name, func(t *testing.T) { t.Parallel() - exp := &OTELExporter{ - client: test.client, - } + exp := NewOTELExporter(test.client, &url.URL{}) err := exp.Export(context.Background(), test.metrics) if test.wantErr != "" { From 218a4f66805ae298b622bb97daf1acb3c168ffb7 Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Tue, 16 May 2023 13:58:37 -0400 Subject: [PATCH 076/100] [HCP Observability] OTELSink (#17159) * Client configured with TLS using HCP config and retry/throttle * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * Client configured with TLS using HCP config and retry/throttle * run go mod tidy * Create new OTELExporter which uses the MetricsClient Add transform because the conversion is in an /internal package * Fix lint error * early return when there are no metrics * Add NewOTELExporter() function * Downgrade to metrics SDK version: v1.15.0-rc.1 * Fix imports * fix small nits with comments and url.URL * Fix tests by asserting actual error for context cancellation, fix parallel, and make mock more versatile * Cleanup error handling and clarify empty metrics case * Fix input/expected naming in otel_transform_test.go * add comment for metric tracking * Add a general isEmpty method * Add clear error types * update to latest version 1.15.0 of OTEL * Client configured with TLS using HCP config and retry/throttle * run go mod tidy * Remove one abstraction to use the config from deps * Address PR feedback * Initialize OTELSink with sync.Map for all the instrument stores. * Moved PeriodicReader init to NewOtelReader function. This allows us to use a ManualReader for tests. * Switch to mutex instead of sync.Map to avoid type assertion * Add gauge store * Clarify comments * return concrete sink type * Fix lint errors * Move gauge store to be within sink * Use context.TODO,rebase and clenaup opts handling * Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1 * Fix imports * Update to latest stable version by rebasing on cc-4933, fix import, remove mutex init, fix opts error messages and use logger from ctx * Add lots of documentation to the OTELSink * Fix gauge store comment and check ok * Add select and ctx.Done() check to gauge callback * use require.Equal for attributes * Fixed import naming * Remove float64 calls and add a NewGaugeStore method * Change name Store to Set in gaugeStore, add concurrency tests in both OTELSink and gauge store * Generate 100 gauge operations * Seperate the labels into goroutines in sink test * Generate kv store for the test case keys to avoid using uuid * Added a race test with 300 samples for OTELSink * Do not pass in waitgroup and use error channel instead. * Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build failures in otel * Fix nits --- agent/hcp/telemetry/gauge_store.go | 77 +++++ agent/hcp/telemetry/gauge_store_test.go | 89 ++++++ agent/hcp/telemetry/otel_sink.go | 209 +++++++++++++ agent/hcp/telemetry/otel_sink_test.go | 386 ++++++++++++++++++++++++ go.mod | 12 +- go.sum | 24 +- 6 files changed, 779 insertions(+), 18 deletions(-) create mode 100644 agent/hcp/telemetry/gauge_store.go create mode 100644 agent/hcp/telemetry/gauge_store_test.go create mode 100644 agent/hcp/telemetry/otel_sink.go create mode 100644 agent/hcp/telemetry/otel_sink_test.go diff --git a/agent/hcp/telemetry/gauge_store.go b/agent/hcp/telemetry/gauge_store.go new file mode 100644 index 000000000000..76dfb7806668 --- /dev/null +++ b/agent/hcp/telemetry/gauge_store.go @@ -0,0 +1,77 @@ +package telemetry + +import ( + "context" + "sync" + + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// gaugeStore holds last seen Gauge values for a particular metric () in the store. +// OTEL does not currently have a synchronous Gauge instrument. Instead, it allows the registration of callbacks. +// The callbacks are called during export, where the Gauge value must be returned. +// This store is a workaround, which holds last seen Gauge values until the callback is called. +type gaugeStore struct { + store map[string]*gaugeValue + mutex sync.Mutex +} + +// gaugeValues are the last seen measurement for a Gauge metric, which contains a float64 value and labels. +type gaugeValue struct { + Value float64 + Attributes []attribute.KeyValue +} + +// NewGaugeStore returns an initialized empty gaugeStore. +func NewGaugeStore() *gaugeStore { + return &gaugeStore{ + store: make(map[string]*gaugeValue, 0), + } +} + +// LoadAndDelete will read a Gauge value and delete it. +// Once registered for a metric name, a Gauge callback will continue to execute every collection cycel. +// We must delete the value once we have read it, to avoid repeat values being sent. +func (g *gaugeStore) LoadAndDelete(key string) (*gaugeValue, bool) { + g.mutex.Lock() + defer g.mutex.Unlock() + + gauge, ok := g.store[key] + if !ok { + return nil, ok + } + + delete(g.store, key) + + return gauge, ok +} + +// Set adds a gaugeValue to the global gauge store. +func (g *gaugeStore) Set(key string, value float64, labels []attribute.KeyValue) { + g.mutex.Lock() + defer g.mutex.Unlock() + + gv := &gaugeValue{ + Value: value, + Attributes: labels, + } + + g.store[key] = gv +} + +// gaugeCallback returns a callback which gets called when metrics are collected for export. +func (g *gaugeStore) gaugeCallback(key string) metric.Float64Callback { + // Closures keep a reference to the key string, that get garbage collected when code completes. + return func(ctx context.Context, obs metric.Float64Observer) error { + select { + case <-ctx.Done(): + return ctx.Err() + default: + if gauge, ok := g.LoadAndDelete(key); ok { + obs.Observe(gauge.Value, metric.WithAttributes(gauge.Attributes...)) + } + return nil + } + } +} diff --git a/agent/hcp/telemetry/gauge_store_test.go b/agent/hcp/telemetry/gauge_store_test.go new file mode 100644 index 000000000000..1171ee379c32 --- /dev/null +++ b/agent/hcp/telemetry/gauge_store_test.go @@ -0,0 +1,89 @@ +package telemetry + +import ( + "context" + "fmt" + "sync" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" +) + +func TestGaugeStore(t *testing.T) { + t.Parallel() + + gaugeStore := NewGaugeStore() + + attributes := []attribute.KeyValue{ + { + Key: attribute.Key("test_key"), + Value: attribute.StringValue("test_value"), + }, + } + + gaugeStore.Set("test", 1.23, attributes) + + // Should store a new gauge. + val, ok := gaugeStore.LoadAndDelete("test") + require.True(t, ok) + require.Equal(t, val.Value, 1.23) + require.Equal(t, val.Attributes, attributes) + + // Gauge with key "test" have been deleted. + val, ok = gaugeStore.LoadAndDelete("test") + require.False(t, ok) + require.Nil(t, val) + + gaugeStore.Set("duplicate", 1.5, nil) + gaugeStore.Set("duplicate", 6.7, nil) + + // Gauge with key "duplicate" should hold the latest (last seen) value. + val, ok = gaugeStore.LoadAndDelete("duplicate") + require.True(t, ok) + require.Equal(t, val.Value, 6.7) +} + +func TestGaugeCallback_Failure(t *testing.T) { + t.Parallel() + + k := "consul.raft.apply" + gaugeStore := NewGaugeStore() + gaugeStore.Set(k, 1.23, nil) + + cb := gaugeStore.gaugeCallback(k) + ctx, cancel := context.WithCancel(context.Background()) + + cancel() + err := cb(ctx, nil) + require.ErrorIs(t, err, context.Canceled) +} + +// TestGaugeStore_Race induces a race condition. When run with go test -race, +// this test should pass if implementation is concurrency safe. +func TestGaugeStore_Race(t *testing.T) { + t.Parallel() + + gaugeStore := NewGaugeStore() + + wg := &sync.WaitGroup{} + samples := 100 + errCh := make(chan error, samples) + for i := 0; i < samples; i++ { + wg.Add(1) + key := fmt.Sprintf("consul.test.%d", i) + value := 12.34 + go func() { + defer wg.Done() + gaugeStore.Set(key, value, nil) + gv, _ := gaugeStore.LoadAndDelete(key) + if gv.Value != value { + errCh <- fmt.Errorf("expected value: '%f', but got: '%f' for key: '%s'", value, gv.Value, key) + } + }() + } + + wg.Wait() + + require.Empty(t, errCh) +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go new file mode 100644 index 000000000000..ec5e2d476b1a --- /dev/null +++ b/agent/hcp/telemetry/otel_sink.go @@ -0,0 +1,209 @@ +package telemetry + +import ( + "bytes" + "context" + "fmt" + "net/url" + "strings" + "sync" + "time" + + gometrics "github.com/armon/go-metrics" + "github.com/hashicorp/go-hclog" + "go.opentelemetry.io/otel/attribute" + otelmetric "go.opentelemetry.io/otel/metric" + otelsdk "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + + "github.com/hashicorp/consul/agent/hcp/client" +) + +type OTELSinkOpts struct { + Reader otelsdk.Reader + Ctx context.Context +} + +// OTELSink captures and aggregates telemetry data as per the OpenTelemetry (OTEL) specification. +// Metric data is exported in OpenTelemetry Protocol (OTLP) wire format. +// This should be used as a Go Metrics backend, as it implements the MetricsSink interface. +type OTELSink struct { + // spaceReplacer cleans the flattened key by removing any spaces. + spaceReplacer *strings.Replacer + logger hclog.Logger + + // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. + // It handles reading/export of aggregated metric data. + // It enables creation and usage of an OTEL Meter. + meterProvider *otelsdk.MeterProvider + + // meter is an OTEL Meter, which enables the creation of OTEL instruments. + meter *otelmetric.Meter + + // Instrument stores contain an OTEL Instrument per metric name () + // for each gauge, counter and histogram types. + // An instrument allows us to record a measurement for a particular metric, and continuously aggregates metrics. + // We lazy load the creation of these intruments until a metric is seen, and use them repeatedly to record measurements. + gaugeInstruments map[string]otelmetric.Float64ObservableGauge + counterInstruments map[string]otelmetric.Float64Counter + histogramInstruments map[string]otelmetric.Float64Histogram + + // gaugeStore is required to hold last-seen values of gauges + // This is a workaround, as OTEL currently does not have synchronous gauge instruments. + // It only allows the registration of "callbacks", which obtain values when the callback is called. + // We must hold gauge values until the callback is called, when the measurement is exported, and can be removed. + gaugeStore *gaugeStore + + mutex sync.Mutex +} + +// NewOTELReader returns a configured OTEL PeriodicReader to export metrics every X seconds. +// It configures the reader with a custom OTELExporter with a MetricsClient to transform and export +// metrics in OTLP format to an external url. +func NewOTELReader(client client.MetricsClient, url url.URL, exportInterval time.Duration) otelsdk.Reader { + exporter := NewOTELExporter(client, url) + return otelsdk.NewPeriodicReader(exporter, otelsdk.WithInterval(exportInterval)) +} + +// NewOTELSink returns a sink which fits the Go Metrics MetricsSink interface. +// It sets up a MeterProvider and Meter, key pieces of the OTEL Metrics SDK which +// enable us to create OTEL Instruments to record measurements. +func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { + if opts.Reader == nil { + return nil, fmt.Errorf("ferror: provide valid reader") + } + + if opts.Ctx == nil { + return nil, fmt.Errorf("ferror: provide valid context") + } + + // Setup OTEL Metrics SDK to aggregate, convert and export metrics. + res := resource.NewSchemaless() + meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) + meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") + + return &OTELSink{ + spaceReplacer: strings.NewReplacer(" ", "_"), + logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), + meterProvider: meterProvider, + meter: &meter, + gaugeStore: NewGaugeStore(), + gaugeInstruments: make(map[string]otelmetric.Float64ObservableGauge, 0), + counterInstruments: make(map[string]otelmetric.Float64Counter, 0), + histogramInstruments: make(map[string]otelmetric.Float64Histogram, 0), + }, nil +} + +// SetGauge emits a Consul gauge metric. +func (o *OTELSink) SetGauge(key []string, val float32) { + o.SetGaugeWithLabels(key, val, nil) +} + +// AddSample emits a Consul histogram metric. +func (o *OTELSink) AddSample(key []string, val float32) { + o.AddSampleWithLabels(key, val, nil) +} + +// IncrCounter emits a Consul counter metric. +func (o *OTELSink) IncrCounter(key []string, val float32) { + o.IncrCounterWithLabels(key, val, nil) +} + +// AddSampleWithLabels emits a Consul gauge metric that gets +// registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key) + + // Set value in global Gauge store. + o.gaugeStore.Set(k, float64(val), toAttributes(labels)) + + o.mutex.Lock() + defer o.mutex.Unlock() + + // If instrument does not exist, create it and register callback to emit last value in global Gauge store. + if _, ok := o.gaugeInstruments[k]; !ok { + // The registration of a callback only needs to happen once, when the instrument is created. + // The callback will be triggered every export cycle for that metric. + // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. + inst, err := (*o.meter).Float64ObservableGauge(k, otelmetric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + o.gaugeInstruments[k] = inst + } +} + +// AddSampleWithLabels emits a Consul sample metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.histogramInstruments[k] + if !ok { + histogram, err := (*o.meter).Float64Histogram(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + inst = histogram + o.histogramInstruments[k] = inst + } + + attrs := toAttributes(labels) + inst.Record(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) +} + +// IncrCounterWithLabels emits a Consul counter metric that gets registed by an OpenTelemetry Histogram instrument. +func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { + k := o.flattenKey(key) + + o.mutex.Lock() + defer o.mutex.Unlock() + + inst, ok := o.counterInstruments[k] + if !ok { + counter, err := (*o.meter).Float64Counter(k) + if err != nil { + o.logger.Error("Failed to emit gauge: %w", err) + return + } + + inst = counter + o.counterInstruments[k] = inst + } + + attrs := toAttributes(labels) + inst.Add(context.TODO(), float64(val), otelmetric.WithAttributes(attrs...)) +} + +// EmitKey unsupported. +func (o *OTELSink) EmitKey(key []string, val float32) {} + +// flattenKey key along with its labels. +func (o *OTELSink) flattenKey(parts []string) string { + buf := &bytes.Buffer{} + joined := strings.Join(parts, ".") + + o.spaceReplacer.WriteString(buf, joined) + + return buf.String() +} + +// toAttributes converts go metrics Labels into OTEL format []attributes.KeyValue +func toAttributes(labels []gometrics.Label) []attribute.KeyValue { + if len(labels) == 0 { + return nil + } + attrs := make([]attribute.KeyValue, len(labels)) + for i, label := range labels { + attrs[i] = attribute.KeyValue{ + Key: attribute.Key(label.Name), + Value: attribute.StringValue(label.Value), + } + } + + return attrs +} diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go new file mode 100644 index 000000000000..2b4dc26abda7 --- /dev/null +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -0,0 +1,386 @@ +package telemetry + +import ( + "context" + "fmt" + "sort" + "strings" + "sync" + "testing" + + gometrics "github.com/armon/go-metrics" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/metric/metricdata" + "go.opentelemetry.io/otel/sdk/resource" +) + +var ( + attrs = attribute.NewSet(attribute.KeyValue{ + Key: attribute.Key("server.id"), + Value: attribute.StringValue("test"), + }) + + expectedSinkMetrics = map[string]metricdata.Metrics{ + "consul.raft.leader": { + Name: "consul.raft.leader", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(0)), + }, + }, + }, + }, + "consul.autopilot.healthy": { + Name: "consul.autopilot.healthy", + Description: "", + Unit: "", + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.23)), + }, + }, + }, + }, + "consul.raft.state.leader": { + Name: "consul.raft.state.leader", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(23.23)), + }, + }, + }, + }, + "consul.raft.apply": { + Name: "consul.raft.apply", + Description: "", + Unit: "", + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: attrs, + Value: float64(float32(1.44)), + }, + }, + }, + }, + "consul.raft.leader.lastContact": { + Name: "consul.raft.leader.lastContact", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Count: 1, + Sum: float64(float32(45.32)), + Min: metricdata.NewExtrema(float64(float32(45.32))), + Max: metricdata.NewExtrema(float64(float32(45.32))), + }, + }, + }, + }, + "consul.raft.commitTime": { + Name: "consul.raft.commitTime", + Description: "", + Unit: "", + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: attrs, + Count: 1, + Sum: float64(float32(26.34)), + Min: metricdata.NewExtrema(float64(float32(26.34))), + Max: metricdata.NewExtrema(float64(float32(26.34))), + }, + }, + }, + }, + } +) + +func TestNewOTELSink(t *testing.T) { + t.Parallel() + for name, test := range map[string]struct { + wantErr string + opts *OTELSinkOpts + }{ + "failsWithEmptyLogger": { + wantErr: "ferror: provide valid context", + opts: &OTELSinkOpts{ + Reader: metric.NewManualReader(), + }, + }, + "failsWithEmptyReader": { + wantErr: "ferror: provide valid reader", + opts: &OTELSinkOpts{ + Reader: nil, + Ctx: context.Background(), + }, + }, + } { + test := test + t.Run(name, func(t *testing.T) { + t.Parallel() + sink, err := NewOTELSink(test.opts) + if test.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), test.wantErr) + return + } + + require.NotNil(t, sink) + }) + } +} + +func TestOTELSink(t *testing.T) { + t.Parallel() + + // Manual reader outputs the aggregated metrics when reader.Collect is called. + reader := metric.NewManualReader() + + ctx := context.Background() + opts := &OTELSinkOpts{ + Reader: reader, + Ctx: ctx, + } + + sink, err := NewOTELSink(opts) + require.NoError(t, err) + + labels := []gometrics.Label{ + { + Name: "server.id", + Value: "test", + }, + } + + sink.SetGauge([]string{"consul", "raft", "leader"}, float32(0)) + sink.SetGaugeWithLabels([]string{"consul", "autopilot", "healthy"}, float32(1.23), labels) + + sink.IncrCounter([]string{"consul", "raft", "state", "leader"}, float32(23.23)) + sink.IncrCounterWithLabels([]string{"consul", "raft", "apply"}, float32(1.44), labels) + + sink.AddSample([]string{"consul", "raft", "leader", "lastContact"}, float32(45.32)) + sink.AddSampleWithLabels([]string{"consul", "raft", "commitTime"}, float32(26.34), labels) + + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) + + isSame(t, expectedSinkMetrics, collected) +} + +func TestOTELSink_Race(t *testing.T) { + reader := metric.NewManualReader() + ctx := context.Background() + opts := &OTELSinkOpts{ + Ctx: ctx, + Reader: reader, + } + + sink, err := NewOTELSink(opts) + require.NoError(t, err) + + samples := 100 + expectedMetrics := generateSamples(samples) + wg := &sync.WaitGroup{} + errCh := make(chan error, samples) + for k, v := range expectedMetrics { + wg.Add(1) + go func(k string, v metricdata.Metrics) { + defer wg.Done() + performSinkOperation(sink, k, v, errCh) + }(k, v) + } + wg.Wait() + + require.Empty(t, errCh) + + var collected metricdata.ResourceMetrics + err = reader.Collect(ctx, &collected) + require.NoError(t, err) + + isSame(t, expectedMetrics, collected) +} + +// generateSamples generates n of each gauges, counter and histogram measurements to use for test purposes. +func generateSamples(n int) map[string]metricdata.Metrics { + generated := make(map[string]metricdata.Metrics, 3*n) + + for i := 0; i < n; i++ { + v := 12.3 + k := fmt.Sprintf("consul.test.gauges.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Gauge[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + } + + for i := 0; i < n; i++ { + v := 22.23 + k := fmt.Sprintf("consul.test.sum.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Sum[float64]{ + DataPoints: []metricdata.DataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Value: float64(float32(v)), + }, + }, + }, + } + + } + + for i := 0; i < n; i++ { + v := 13.24 + k := fmt.Sprintf("consul.test.hist.%d", i) + generated[k] = metricdata.Metrics{ + Name: k, + Data: metricdata.Histogram[float64]{ + DataPoints: []metricdata.HistogramDataPoint[float64]{ + { + Attributes: *attribute.EmptySet(), + Sum: float64(float32(v)), + Max: metricdata.NewExtrema(float64(float32(v))), + Min: metricdata.NewExtrema(float64(float32(v))), + Count: 1, + }, + }, + }, + } + } + + return generated +} + +// performSinkOperation emits a measurement using the OTELSink and calls wg.Done() when completed. +func performSinkOperation(sink *OTELSink, k string, v metricdata.Metrics, errCh chan error) { + key := strings.Split(k, ".") + data := v.Data + switch data.(type) { + case metricdata.Gauge[float64]: + gauge, ok := data.(metricdata.Gauge[float64]) + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } + sink.SetGauge(key, float32(gauge.DataPoints[0].Value)) + case metricdata.Sum[float64]: + sum, ok := data.(metricdata.Sum[float64]) + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } + sink.IncrCounter(key, float32(sum.DataPoints[0].Value)) + case metricdata.Histogram[float64]: + hist, ok := data.(metricdata.Histogram[float64]) + if !ok { + errCh <- fmt.Errorf("unexpected type assertion error for key: %s", key) + } + sink.AddSample(key, float32(hist.DataPoints[0].Sum)) + } +} + +func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { + // Validate resource + require.Equal(t, resource.NewSchemaless(), actual.Resource) + + // Validate Metrics + require.NotEmpty(t, actual.ScopeMetrics) + actualMetrics := actual.ScopeMetrics[0].Metrics + require.Equal(t, len(expectedMap), len(actualMetrics)) + + for _, actual := range actualMetrics { + name := actual.Name + expected, ok := expectedMap[actual.Name] + require.True(t, ok, "metric key %s should be in expectedMetrics map", name) + isSameMetrics(t, expected, actual) + } +} + +// compareMetrics verifies if two metricdata.Metric objects are equal by ignoring the time component. +// avoid duplicate datapoint values to ensure predictable order of sort. +func isSameMetrics(t *testing.T, expected metricdata.Metrics, actual metricdata.Metrics) { + require.Equal(t, expected.Name, actual.Name, "different .Name field") + require.Equal(t, expected.Description, actual.Description, "different .Description field") + require.Equal(t, expected.Unit, actual.Unit, "different .Unit field") + + switch expectedData := expected.Data.(type) { + case metricdata.Gauge[float64]: + actualData, ok := actual.Data.(metricdata.Gauge[float64]) + require.True(t, ok, "different metric types: expected metricdata.Gauge[float64]") + + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Sum[float64]: + actualData, ok := actual.Data.(metricdata.Sum[float64]) + require.True(t, ok, "different metric types: expected metricdata.Sum[float64]") + + isSameDataPoint(t, expectedData.DataPoints, actualData.DataPoints) + case metricdata.Histogram[float64]: + actualData, ok := actual.Data.(metricdata.Histogram[float64]) + require.True(t, ok, "different metric types: expected metricdata.Histogram") + + isSameHistogramData(t, expectedData.DataPoints, actualData.DataPoints) + } +} + +func isSameDataPoint(t *testing.T, expected []metricdata.DataPoint[float64], actual []metricdata.DataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different datapoints length") + + // Sort for predictable data in order of lowest value. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Value < expected[j].Value + }) + + // Only verify the value and attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Value, currActual.Value, "different datapoint value") + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") + } +} + +func isSameHistogramData(t *testing.T, expected []metricdata.HistogramDataPoint[float64], actual []metricdata.HistogramDataPoint[float64]) { + require.Equal(t, len(expected), len(actual), "different histogram datapoint length") + + // Sort for predictable data in order of lowest sum. + sort.Slice(expected, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + sort.Slice(actual, func(i, j int) bool { + return expected[i].Sum < expected[j].Sum + }) + + // Only verify the value and the attributes. + for i, dp := range expected { + currActual := actual[i] + require.Equal(t, dp.Sum, currActual.Sum, "different histogram datapoint .Sum value") + require.Equal(t, dp.Max, currActual.Max, "different histogram datapoint .Max value") + require.Equal(t, dp.Min, currActual.Min, "different histogram datapoint .Min value") + require.Equal(t, dp.Count, currActual.Count, "different histogram datapoint .Count value") + require.Equal(t, dp.Attributes, currActual.Attributes, "different attributes") + } +} diff --git a/go.mod b/go.mod index ffafa5157295..005db2fa7524 100644 --- a/go.mod +++ b/go.mod @@ -96,16 +96,17 @@ require ( github.com/shirou/gopsutil/v3 v3.22.8 github.com/stretchr/testify v1.8.2 go.etcd.io/bbolt v1.3.6 - go.opentelemetry.io/otel v1.15.1 - go.opentelemetry.io/otel/sdk v1.15.1 - go.opentelemetry.io/otel/sdk/metric v0.38.1 + go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/metric v1.16.0-rc.1 + go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 + go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 go.opentelemetry.io/proto/otlp v0.19.0 go.uber.org/goleak v1.1.10 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d golang.org/x/net v0.7.0 golang.org/x/oauth2 v0.0.0-20220909003341-f21342109be1 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 - golang.org/x/sys v0.7.0 + golang.org/x/sys v0.8.0 golang.org/x/time v0.3.0 google.golang.org/genproto v0.0.0-20220921223823-23cae91e6737 google.golang.org/grpc v1.49.0 @@ -232,8 +233,7 @@ require ( github.com/yusufpapurcu/wmi v1.2.2 // indirect go.mongodb.org/mongo-driver v1.10.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/metric v0.38.1 // indirect - go.opentelemetry.io/otel/trace v1.15.1 // indirect + go.opentelemetry.io/otel/trace v1.16.0-rc.1 // indirect go.uber.org/atomic v1.9.0 // indirect golang.org/x/exp v0.0.0-20230321023759-10a507213a29 // indirect golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect diff --git a/go.sum b/go.sum index 4412cb4947e8..05bf35417367 100644 --- a/go.sum +++ b/go.sum @@ -1080,16 +1080,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= -go.opentelemetry.io/otel v1.15.1 h1:3Iwq3lfRByPaws0f6bU3naAqOR1n5IeDWd9390kWHa8= -go.opentelemetry.io/otel v1.15.1/go.mod h1:mHHGEHVDLal6YrKMmk9LqC4a3sF5g+fHfrttQIB1NTc= -go.opentelemetry.io/otel/metric v0.38.1 h1:2MM7m6wPw9B8Qv8iHygoAgkbejed59uUR6ezR5T3X2s= -go.opentelemetry.io/otel/metric v0.38.1/go.mod h1:FwqNHD3I/5iX9pfrRGZIlYICrJv0rHEUl2Ln5vdIVnQ= -go.opentelemetry.io/otel/sdk v1.15.1 h1:5FKR+skgpzvhPQHIEfcwMYjCBr14LWzs3uSqKiQzETI= -go.opentelemetry.io/otel/sdk v1.15.1/go.mod h1:8rVtxQfrbmbHKfqzpQkT5EzZMcbMBwTzNAggbEAM0KA= -go.opentelemetry.io/otel/sdk/metric v0.38.1 h1:EkO5wI4NT/fUaoPMGc0fKV28JaWe7q4vfVpEVasGb+8= -go.opentelemetry.io/otel/sdk/metric v0.38.1/go.mod h1:Rn4kSXFF9ZQZ5lL1pxQjCbK4seiO+U7s0ncmIFJaj34= -go.opentelemetry.io/otel/trace v1.15.1 h1:uXLo6iHJEzDfrNC0L0mNjItIp06SyaBQxu5t3xMlngY= -go.opentelemetry.io/otel/trace v1.15.1/go.mod h1:IWdQG/5N1x7f6YUlmdLeJvH9yxtuJAfc4VW5Agv9r/8= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:aKv7ueCXRlBdHGBNfot8BYwcvp4jwJ/rK/T/KQ3uXoA= +go.opentelemetry.io/otel v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:dGSTwGyzvw5Dzn8nE8HrfOXnWIDrL0GIzQdOpTnJ2CM= +go.opentelemetry.io/otel/metric v1.16.0-rc.1 h1:R9MPFw2jA+z91ejfOVU7QRYSdb37E5Ak6jJUwNMQbR8= +go.opentelemetry.io/otel/metric v1.16.0-rc.1/go.mod h1:0I+4bYjKHaoXGw7uXAABYA5wyptQdXeXOhi3SBgD6GM= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218 h1:YC5ikDtSM7s+sJprqR7edyP9EBKMHGaAnWfte7EsQCI= +go.opentelemetry.io/otel/sdk v1.16.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:tY+q2LQ4iuvdwcN0zrt/2NdF3ntVodUPbiHPMRZnXyo= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218 h1:5Ehgy+TyY7Jh3orDVIn7uVJ7UkFm3yP5lXXQN8ia+00= +go.opentelemetry.io/otel/sdk/metric v0.39.0-rc.1.0.20230510144741-7dea2225a218/go.mod h1:VKkJz/K+pb4rkqXlBH5DMJi1ebQLYhV82fTSK3WvOOQ= +go.opentelemetry.io/otel/trace v1.16.0-rc.1 h1:/dPBlZrzSSXglIEKgy/A3kyiACcmgNMFWKTIHHxxd/o= +go.opentelemetry.io/otel/trace v1.16.0-rc.1/go.mod h1:xqretMbHfSU24I2KKbSEG+aVHsNtBCr5L4BGaNqTx68= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= go.opentelemetry.io/proto/otlp v0.19.0 h1:IVN6GR+mhC4s5yfcTbmzHYODqvWAp3ZedA2SJPI1Nnw= go.opentelemetry.io/proto/otlp v0.19.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= @@ -1354,8 +1354,8 @@ golang.org/x/sys v0.0.0-20220128215802-99c3d69c2c27/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220728004956-3c1f35247d10/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU= -golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0 h1:EBmGv8NaZBZTWvrbjNoL6HVt+IVy3QDQpJs7VRIw3tU= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= From 19b1d0c02969638e406e9c5e4789819d81962928 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 16 May 2023 22:28:27 -0400 Subject: [PATCH 077/100] pass extraSinks as function param instead --- .../testdata/TestRuntimeConfig_Sanitize.golden | 1 - agent/setup.go | 7 +++++-- lib/telemetry.go | 13 +++++-------- lib/telemetry_test.go | 8 +++----- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden index 86311549c64e..054a284aeb85 100644 --- a/agent/config/testdata/TestRuntimeConfig_Sanitize.golden +++ b/agent/config/testdata/TestRuntimeConfig_Sanitize.golden @@ -462,7 +462,6 @@ "DisableHostname": false, "DogstatsdAddr": "", "DogstatsdTags": [], - "ExtraSinks": [], "FilterDefault": false, "MetricsPrefix": "", "PrometheusOpts": { diff --git a/agent/setup.go b/agent/setup.go index abb72b118ea1..0b3f2488806b 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -10,6 +10,7 @@ import ( "sync" "time" + "github.com/armon/go-metrics" "github.com/armon/go-metrics/prometheus" "github.com/hashicorp/go-hclog" wal "github.com/hashicorp/raft-wal" @@ -100,17 +101,19 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl cfg.Telemetry.PrometheusOpts.GaugeDefinitions = gauges cfg.Telemetry.PrometheusOpts.CounterDefinitions = counters cfg.Telemetry.PrometheusOpts.SummaryDefinitions = summaries + + var extraSinks []metrics.MetricSink if cfg.IsCloudEnabled() { d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger) if err != nil { return d, err } if d.HCP.Sink != nil { - cfg.Telemetry.ExtraSinks = append(cfg.Telemetry.ExtraSinks, d.HCP.Sink) + extraSinks = append(extraSinks, d.HCP.Sink) } } - d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger) + d.MetricsConfig, err = lib.InitTelemetry(cfg.Telemetry, d.Logger, extraSinks...) if err != nil { return d, fmt.Errorf("failed to initialize telemetry: %w", err) } diff --git a/lib/telemetry.go b/lib/telemetry.go index 1327f39ab900..2d87707c3318 100644 --- a/lib/telemetry.go +++ b/lib/telemetry.go @@ -210,9 +210,6 @@ type TelemetryConfig struct { // // hcl: telemetry { prometheus_retention_time = "duration" } PrometheusOpts prometheus.PrometheusOpts - - // ExtraSinks are additional metrics.MetricSink implementations that are to always be added. - ExtraSinks []metrics.MetricSink } // MetricsHandler provides an http.Handler for displaying metrics. @@ -327,7 +324,7 @@ func circonusSink(cfg TelemetryConfig, _ string) (metrics.MetricSink, error) { return sink, nil } -func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink) (metrics.FanoutSink, error) { +func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink, extraSinks []metrics.MetricSink) (metrics.FanoutSink, error) { metricsConf := metrics.DefaultConfig(cfg.MetricsPrefix) metricsConf.EnableHostname = !cfg.DisableHostname metricsConf.FilterDefault = cfg.FilterDefault @@ -352,7 +349,7 @@ func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink) (metrics.Fa addSink(dogstatdSink) addSink(circonusSink) addSink(prometheusSink) - for _, sink := range cfg.ExtraSinks { + for _, sink := range extraSinks { if sink != nil { sinks = append(sinks, sink) } @@ -372,7 +369,7 @@ func configureSinks(cfg TelemetryConfig, memSink metrics.MetricSink) (metrics.Fa // values as returned by Runtimecfg.Config(). // InitTelemetry retries configurating the sinks in case error is retriable // and retry_failed_connection is set to true. -func InitTelemetry(cfg TelemetryConfig, logger hclog.Logger) (*MetricsConfig, error) { +func InitTelemetry(cfg TelemetryConfig, logger hclog.Logger, extraSinks ...metrics.MetricSink) (*MetricsConfig, error) { if cfg.Disable { return nil, nil } @@ -392,7 +389,7 @@ func InitTelemetry(cfg TelemetryConfig, logger hclog.Logger) (*MetricsConfig, er } for { logger.Warn("retrying configure metric sinks", "retries", waiter.Failures()) - _, err := configureSinks(cfg, memSink) + _, err := configureSinks(cfg, memSink, extraSinks) if err == nil { logger.Info("successfully configured metrics sinks") return @@ -405,7 +402,7 @@ func InitTelemetry(cfg TelemetryConfig, logger hclog.Logger) (*MetricsConfig, er } } - if _, errs := configureSinks(cfg, memSink); errs != nil { + if _, errs := configureSinks(cfg, memSink, extraSinks); errs != nil { if isRetriableError(errs) && cfg.RetryFailedConfiguration { logger.Warn("failed configure sinks", "error", multierror.Flatten(errs)) ctx, cancel = context.WithCancel(context.Background()) diff --git a/lib/telemetry_test.go b/lib/telemetry_test.go index 84ec13ed733d..a2c0075598ec 100644 --- a/lib/telemetry_test.go +++ b/lib/telemetry_test.go @@ -21,15 +21,13 @@ func newCfg() TelemetryConfig { StatsdAddr: "statsd.host:1234", StatsiteAddr: "statsite.host:1234", DogstatsdAddr: "mydog.host:8125", - ExtraSinks: []metrics.MetricSink{ - &metrics.BlackholeSink{}, - }, } } func TestConfigureSinks(t *testing.T) { cfg := newCfg() - sinks, err := configureSinks(cfg, nil) + extraSinks := []metrics.MetricSink{&metrics.BlackholeSink{}} + sinks, err := configureSinks(cfg, nil, extraSinks) require.Error(t, err) // 4 sinks: statsd, statsite, inmem, extra sink (blackhole) require.Equal(t, 4, len(sinks)) @@ -37,7 +35,7 @@ func TestConfigureSinks(t *testing.T) { cfg = TelemetryConfig{ DogstatsdAddr: "", } - _, err = configureSinks(cfg, nil) + _, err = configureSinks(cfg, nil, nil) require.NoError(t, err) } From 8ae6af61221507be0d4e3e026ece1c65b4b8e760 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 16 May 2023 22:47:28 -0400 Subject: [PATCH 078/100] Add default interval as package export --- agent/hcp/deps.go | 2 +- agent/hcp/telemetry/otel_sink.go | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index bb4c28ec9478..42ba8266017a 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -67,7 +67,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, - Reader: telemetry.NewOTELReader(metricsClient, u, 10*time.Second), + Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), } sink, err := telemetry.NewOTELSink(sinkOpts) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index ee2776db2b5c..9a984150b773 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -19,6 +19,10 @@ import ( "github.com/hashicorp/consul/agent/hcp/client" ) +// DefaultExportInterval is a default time interval between export of aggregated metrics. +const DefaultExportInterval = 10 * time.Second + +// OTELSinkOpts is used to provide configuration when initializing an OTELSink using NewOTELSink. type OTELSinkOpts struct { Reader otelsdk.Reader Ctx context.Context From f2ef9c78e07eeb7c61f306906598873f6fda2563 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 00:24:44 -0400 Subject: [PATCH 079/100] remove verifyCCM func --- agent/hcp/client/client.go | 23 ++++++++++ agent/hcp/client/client_test.go | 75 +++++++++++++++++++++++++++++++++ agent/hcp/deps.go | 51 ++++++---------------- agent/hcp/deps_test.go | 73 +++----------------------------- 4 files changed, 119 insertions(+), 103 deletions(-) create mode 100644 agent/hcp/client/client_test.go diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index bca5343ba68f..2979c5814d38 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -32,10 +32,15 @@ type Client interface { DiscoverServers(ctx context.Context) ([]string, error) } +// MetricsConfig holds metrics specific configuration for the TelemetryConfig. +// The endpoint field overrides the TelemetryConfig endpoint. type MetricsConfig struct { Filters []string Endpoint string } + +// TelemetryConfig contains configuration for telemetry data forwarded by Consul servers +// to the HCP Telemetry gateway. type TelemetryConfig struct { Endpoint string Labels map[string]string @@ -93,6 +98,8 @@ func httpClient(c config.CloudConfig) (*httptransport.Runtime, error) { SourceChannel: "consul " + version.GetHumanVersion(), }) } + +// FetchTelemetryConfig obtains telemetry configuration from the Telemetry Gateway. func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, error) { params := hcptelemetry.NewAgentTelemetryConfigParamsWithContext(ctx). WithLocationOrganizationID(c.resource.Organization). @@ -255,3 +262,19 @@ func (c *hcpClient) DiscoverServers(ctx context.Context) ([]string, error) { return servers, nil } + +// Enabled verifies if telemetry is enabled by ensuring a valid endpoint has been retrieved. +// It returns full metrics endpoint and true if a valid endpoint was obtained. +func (t *TelemetryConfig) Enabled() (string, bool) { + endpoint := t.Endpoint + if override := t.MetricsConfig.Endpoint; override != "" { + endpoint = override + } + + if endpoint == "" { + return "", false + } + + // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. + return fmt.Sprintf("https://%s/v1/metrics", endpoint), true +} diff --git a/agent/hcp/client/client_test.go b/agent/hcp/client/client_test.go new file mode 100644 index 000000000000..1b6b7a421bc6 --- /dev/null +++ b/agent/hcp/client/client_test.go @@ -0,0 +1,75 @@ +package client + +import ( + "context" + "testing" + + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +func TestFetchTelemetryConfig(t *testing.T) { + t.Parallel() + for name, test := range map[string]struct { + metricsEndpoint string + expect func(*MockClient) + disabled bool + }{ + "success": { + expect: func(mockClient *MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{ + Endpoint: "test.com", + MetricsConfig: &MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + metricsEndpoint: "https://test.com/v1/metrics", + }, + "overrideMetricsEndpoint": { + expect: func(mockClient *MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{ + Endpoint: "test.com", + MetricsConfig: &MetricsConfig{ + Endpoint: "test.com", + }, + }, nil) + }, + metricsEndpoint: "https://test.com/v1/metrics", + }, + "disabledWithEmptyEndpoint": { + expect: func(mockClient *MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{ + Endpoint: "", + MetricsConfig: &MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + disabled: true, + }, + } { + test := test + t.Run(name, func(t *testing.T) { + t.Parallel() + + mock := NewMockClient(t) + test.expect(mock) + + telemetryCfg, err := mock.FetchTelemetryConfig(context.Background()) + require.NoError(t, err) + + if test.disabled { + endpoint, ok := telemetryCfg.Enabled() + require.False(t, ok) + require.Empty(t, endpoint) + return + } + + endpoint, ok := telemetryCfg.Enabled() + + require.True(t, ok) + require.Equal(t, test.metricsEndpoint, endpoint) + }) + } +} diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 42ba8266017a..055acc9680a6 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -5,7 +5,6 @@ package hcp import ( "context" - "fmt" "net/url" "time" @@ -47,15 +46,24 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { ctx := context.Background() ctx = hclog.WithContext(ctx, logger) - u, err := verifyCCMRegistration(ctx, hcpClient) + + reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + telemetryCfg, err := hcpClient.FetchTelemetryConfig(reqCtx) if err != nil { - logger.Error("failed to verify CCM registration: %w", err) + logger.Error("failed to fetch telemetry config %w", err) + return nil + } + + endpoint, isEnabled := telemetryCfg.Enabled() + if !isEnabled { return nil } - // if endpoint is empty, no metrics endpoint configuration for this Consul server - // (e.g. not registered with CCM or feature flag to control rollout) so do not enable the HCP metrics sink. - if u == nil { + u, err := url.Parse(endpoint) + if err != nil { + logger.Error("failed to parse url endpoint %w", err) return nil } @@ -78,34 +86,3 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return sink } - -// verifyCCMRegistration checks that a server is registered with the HCP management plane -// by making a HTTP request to the HCP TelemetryConfig endpoint. -// If registered, it returns the endpoint for the HCP Telemetry Gateway endpoint where metrics should be forwarded. -func verifyCCMRegistration(ctx context.Context, client hcpclient.Client) (*url.URL, error) { - reqCtx, cancel := context.WithTimeout(ctx, 5*time.Second) - defer cancel() - - telemetryCfg, err := client.FetchTelemetryConfig(reqCtx) - if err != nil { - return nil, fmt.Errorf("failed to fetch telemetry config %w", err) - } - - endpoint := telemetryCfg.Endpoint - if override := telemetryCfg.MetricsConfig.Endpoint; override != "" { - endpoint = override - } - - // no error, the server simply isn't configured for metrics forwarding. - if endpoint == "" { - return nil, nil - } - - // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. - u, err := url.Parse(fmt.Sprintf("https://%s/v1/metrics", endpoint)) - if err != nil { - return nil, fmt.Errorf("failed to parse url: %w", err) - } - - return u, nil -} diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index eda19244f26f..1d9cb2ebd2e0 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -1,7 +1,6 @@ package hcp import ( - "context" "fmt" "testing" @@ -58,32 +57,10 @@ func TestSink(t *testing.T) { }, mockCloudCfg: client.MockErrCloudCfg{}, }, - } { - t.Run(name, func(t *testing.T) { - c := client.NewMockClient(t) - l := hclog.NewNullLogger() - test.expect(c) - sinkOpts := sink(c, test.mockCloudCfg, l) - if !test.expectedSink { - require.Nil(t, sinkOpts) - return - } - require.NotNil(t, sinkOpts) - }) - } -} - -func TestVerifyCCMRegistration(t *testing.T) { - for name, test := range map[string]struct { - expect func(*client.MockClient) - wantErr string - expectedURL string - }{ "failsWithFetchTelemetryFailure": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("FetchTelemetryConfig error")) }, - wantErr: "failed to fetch telemetry config", }, "failsWithURLParseErr": { expect: func(mockClient *client.MockClient) { @@ -96,7 +73,6 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - wantErr: "failed to parse url:", }, "noErrWithEmptyEndpoint": { expect: func(mockClient *client.MockClient) { @@ -107,53 +83,18 @@ func TestVerifyCCMRegistration(t *testing.T) { }, }, nil) }, - expectedURL: "", - }, - "success": { - expect: func(mockClient *client.MockClient) { - mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "test.com", - MetricsConfig: &client.MetricsConfig{ - Endpoint: "", - }, - }, nil) - }, - expectedURL: "https://test.com/v1/metrics", - }, - "successMetricsEndpointOverride": { - expect: func(mockClient *client.MockClient) { - mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "test.com", - MetricsConfig: &client.MetricsConfig{ - Endpoint: "override.com", - }, - }, nil) - }, - expectedURL: "https://override.com/v1/metrics", }, } { t.Run(name, func(t *testing.T) { - ctx := context.Background() - mClient := client.NewMockClient(t) - test.expect(mClient) - - url, err := verifyCCMRegistration(ctx, mClient) - if test.wantErr != "" { - require.Empty(t, url) - require.Error(t, err) - require.Contains(t, err.Error(), test.wantErr) - return - } - - require.NoError(t, err) - if test.expectedURL == "" { - require.Nil(t, url) + c := client.NewMockClient(t) + l := hclog.NewNullLogger() + test.expect(c) + sinkOpts := sink(c, test.mockCloudCfg, l) + if !test.expectedSink { + require.Nil(t, sinkOpts) return } - - expURL, err := url.Parse(test.expectedURL) - require.NoError(t, err) - require.Equal(t, url, expURL) + require.NotNil(t, sinkOpts) }) } } From ca41a33113f0abb1ec1e2a97f2e0fde3be4e37c6 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 11:10:40 -0400 Subject: [PATCH 080/100] Add clusterID --- agent/hcp/client/client.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 2979c5814d38..bb95c6730e38 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -103,7 +103,8 @@ func httpClient(c config.CloudConfig) (*httptransport.Runtime, error) { func (c *hcpClient) FetchTelemetryConfig(ctx context.Context) (*TelemetryConfig, error) { params := hcptelemetry.NewAgentTelemetryConfigParamsWithContext(ctx). WithLocationOrganizationID(c.resource.Organization). - WithLocationProjectID(c.resource.Project) + WithLocationProjectID(c.resource.Project). + WithClusterID(c.resource.ID) resp, err := c.tgw.AgentTelemetryConfig(params, nil) if err != nil { From 52a1ee49335085b87f926026b817409bf947f522 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 11:16:36 -0400 Subject: [PATCH 081/100] Fix import and add t.Parallel() for missing tests --- agent/hcp/deps_test.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 1d9cb2ebd2e0..13324776bb40 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -4,14 +4,15 @@ import ( "fmt" "testing" - "github.com/hashicorp/consul/agent/hcp/client" - "github.com/hashicorp/go-hclog" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + + "github.com/hashicorp/consul/agent/hcp/client" ) func TestSink(t *testing.T) { + t.Parallel() for name, test := range map[string]struct { expect func(*client.MockClient) mockCloudCfg client.CloudConfig @@ -85,7 +86,9 @@ func TestSink(t *testing.T) { }, }, } { + test := test t.Run(name, func(t *testing.T) { + t.Parallel() c := client.NewMockClient(t) l := hclog.NewNullLogger() test.expect(c) From fc863378325d5c8ad22b5ff128c25d6c83cdf9b2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 23:54:10 -0400 Subject: [PATCH 082/100] Kick Vercel CI --- agent/hcp/client/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index bfcd918edcb9..7c8361a149a0 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -290,6 +290,6 @@ func (t *TelemetryConfig) Enabled() (string, bool) { return "", false } - // The endpoint from the HCP gateway is a domain without scheme, and without the metrics path, so they must be added. + // The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added. return fmt.Sprintf("https://%s/v1/metrics", endpoint), true } From d0dbecb9c7b6e185afcdd5fb1687f3758be32dbd Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 18 May 2023 10:50:43 -0400 Subject: [PATCH 083/100] Remove scheme from endpoint path, and fix error logging --- agent/hcp/client/client.go | 5 ++++- agent/hcp/client/client_test.go | 6 +++--- agent/hcp/deps.go | 8 ++++---- agent/hcp/deps_test.go | 6 +++--- 4 files changed, 14 insertions(+), 11 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index 7c8361a149a0..aec1039525aa 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -22,6 +22,9 @@ import ( "github.com/hashicorp/consul/version" ) +// metricsGatewayPath is the default path for metrics export request on the Telemetry Gateway. +const metricsGatewayPath = "/v1/metrics" + // Client interface exposes HCP operations that can be invoked by Consul // //go:generate mockery --name Client --with-expecter --inpackage @@ -291,5 +294,5 @@ func (t *TelemetryConfig) Enabled() (string, bool) { } // The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added. - return fmt.Sprintf("https://%s/v1/metrics", endpoint), true + return endpoint + metricsGatewayPath, true } diff --git a/agent/hcp/client/client_test.go b/agent/hcp/client/client_test.go index 1b6b7a421bc6..43ecf0fd5c45 100644 --- a/agent/hcp/client/client_test.go +++ b/agent/hcp/client/client_test.go @@ -18,7 +18,7 @@ func TestFetchTelemetryConfig(t *testing.T) { "success": { expect: func(mockClient *MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", MetricsConfig: &MetricsConfig{ Endpoint: "", }, @@ -29,9 +29,9 @@ func TestFetchTelemetryConfig(t *testing.T) { "overrideMetricsEndpoint": { expect: func(mockClient *MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&TelemetryConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", MetricsConfig: &MetricsConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", }, }, nil) }, diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index bf287ebdf877..d923b107ae18 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -51,7 +51,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo telemetryCfg, err := hcpClient.FetchTelemetryConfig(reqCtx) if err != nil { - logger.Error("failed to fetch telemetry config %w", err) + logger.Error("failed to fetch telemetry config", "error", err) return nil } @@ -62,13 +62,13 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo u, err := url.Parse(endpoint) if err != nil { - logger.Error("failed to parse url endpoint %w", err) + logger.Error("failed to parse url endpoint", "error", err) return nil } metricsClient, err := hcpclient.NewMetricsClient(cfg, ctx) if err != nil { - logger.Error("failed to init metrics client: %w", err) + logger.Error("failed to init metrics client", "error", err) return nil } @@ -79,7 +79,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo sink, err := telemetry.NewOTELSink(sinkOpts) if err != nil { - logger.Error("failed to init OTEL sink: %w", err) + logger.Error("failed to init OTEL sink", "error", err) return nil } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 13324776bb40..017d7c791c5e 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -21,9 +21,9 @@ func TestSink(t *testing.T) { "success": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", MetricsConfig: &client.MetricsConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", }, }, nil) }, @@ -50,7 +50,7 @@ func TestSink(t *testing.T) { "noSinkWhenMetricsClientInitFails": { expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "test.com", + Endpoint: "https://test.com", MetricsConfig: &client.MetricsConfig{ Endpoint: "", }, From de0deb9f1e398ac35c954188933e53b00128db80 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 18 May 2023 12:04:18 -0400 Subject: [PATCH 084/100] return metrics.MetricSink for sink method --- agent/hcp/deps.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index d923b107ae18..702f7d27a396 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -42,7 +42,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { // sink provides initializes an OTELSink which forwards Consul metrics to HCP. // The sink is only initialized if the server is registered with the management plane (CCM). // This step should not block server initialization, so errors are logged, but not returned. -func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) *telemetry.OTELSink { +func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) metrics.MetricSink { ctx := context.Background() ctx = hclog.WithContext(ctx, logger) From 58e838a962dba6e71410baff0c39e72c312b1d1b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 23 May 2023 11:24:07 -0400 Subject: [PATCH 085/100] Update SDK --- agent/hcp/config/config.go | 2 +- go.mod | 2 +- go.sum | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/hcp/config/config.go b/agent/hcp/config/config.go index a6d4c31979db..cf87b685e352 100644 --- a/agent/hcp/config/config.go +++ b/agent/hcp/config/config.go @@ -46,6 +46,6 @@ func (c *CloudConfig) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfi if c.ScadaAddress != "" { opts = append(opts, hcpcfg.WithSCADA(c.ScadaAddress, c.TLSConfig)) } - opts = append(opts, hcpcfg.FromEnv()) + opts = append(opts, hcpcfg.FromEnv(), hcpcfg.WithoutBrowserLogin()) return hcpcfg.NewHCPConfig(opts...) } diff --git a/go.mod b/go.mod index f0bc0826dfea..e4e32cea3732 100644 --- a/go.mod +++ b/go.mod @@ -62,7 +62,7 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/hashicorp/hcl v1.0.0 github.com/hashicorp/hcp-scada-provider v0.2.3 - github.com/hashicorp/hcp-sdk-go v0.44.1-0.20230508124639-28da4c5b03f3 + github.com/hashicorp/hcp-sdk-go v0.46.1-0.20230519164650-51657675d9e7 github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 github.com/hashicorp/memberlist v0.5.0 github.com/hashicorp/raft v1.5.0 diff --git a/go.sum b/go.sum index 91a396da17b9..bffefaaf6aab 100644 --- a/go.sum +++ b/go.sum @@ -610,8 +610,8 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hashicorp/hcp-scada-provider v0.2.3 h1:AarYR+/Pcv+cMvPdAlb92uOBmZfEH6ny4+DT+4NY2VQ= github.com/hashicorp/hcp-scada-provider v0.2.3/go.mod h1:ZFTgGwkzNv99PLQjTsulzaCplCzOTBh0IUQsPKzrQFo= -github.com/hashicorp/hcp-sdk-go v0.44.1-0.20230508124639-28da4c5b03f3 h1:9QstZdsLIS6iPyYxQoyymRz8nBw9jMdEbGy29gtgzVQ= -github.com/hashicorp/hcp-sdk-go v0.44.1-0.20230508124639-28da4c5b03f3/go.mod h1:hZqky4HEzsKwvLOt4QJlZUrjeQmb4UCZUhDP2HyQFfc= +github.com/hashicorp/hcp-sdk-go v0.46.1-0.20230519164650-51657675d9e7 h1:/7/5kyyCT5tCeRanKIJAfP8Z6JnjEV55PNuI6phn2k0= +github.com/hashicorp/hcp-sdk-go v0.46.1-0.20230519164650-51657675d9e7/go.mod h1:hZqky4HEzsKwvLOt4QJlZUrjeQmb4UCZUhDP2HyQFfc= github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038 h1:n9J0rwVWXDpNd5iZnwY7w4WZyq53/rROeI7OVvLW8Ok= github.com/hashicorp/hil v0.0.0-20200423225030-a18a1cd20038/go.mod h1:n2TSygSNwsLJ76m8qFXTSc7beTb+auJxYdqrnoqwZWE= github.com/hashicorp/logutils v1.0.0/go.mod h1:QIAnNjmIWmVIIkWDTG1z5v++HQmx9WQRO+LraFDTW64= From 60d450deef93f4827cdec4167d8fc47935f8d4ba Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 26 Apr 2023 09:32:52 -0400 Subject: [PATCH 086/100] Added telemetry agent to client and init sink in deps --- agent/hcp/deps.go | 6 ++-- agent/hcp/deps_test.go | 22 -------------- agent/hcp/telemetry/filter.go | 42 +++++++++++++++++++++++++++ agent/hcp/telemetry/filter_test.go | 39 +++++++++++++++++++++++++ agent/hcp/telemetry/otel_sink.go | 40 ++++++++++++++++++++++--- agent/hcp/telemetry/otel_sink_test.go | 31 ++++++++++++++++---- 6 files changed, 146 insertions(+), 34 deletions(-) create mode 100644 agent/hcp/telemetry/filter.go create mode 100644 agent/hcp/telemetry/filter_test.go diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 702f7d27a396..47d39c65f4ad 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -73,8 +73,10 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo } sinkOpts := &telemetry.OTELSinkOpts{ - Ctx: ctx, - Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), + Ctx: ctx, + Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), + Labels: telemetryCfg.Labels, + Filters: telemetryCfg.MetricsConfig.Filters, } sink, err := telemetry.NewOTELSink(sinkOpts) diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 017d7c791c5e..4ca01c7dfb3d 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -63,28 +63,6 @@ func TestSink(t *testing.T) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("FetchTelemetryConfig error")) }, }, - "failsWithURLParseErr": { - expect: func(mockClient *client.MockClient) { - mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - // Minimum 2 chars for a domain to be valid. - Endpoint: "s", - MetricsConfig: &client.MetricsConfig{ - // Invalid domain chars - Endpoint: " ", - }, - }, nil) - }, - }, - "noErrWithEmptyEndpoint": { - expect: func(mockClient *client.MockClient) { - mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ - Endpoint: "", - MetricsConfig: &client.MetricsConfig{ - Endpoint: "", - }, - }, nil) - }, - }, } { test := test t.Run(name, func(t *testing.T) { diff --git a/agent/hcp/telemetry/filter.go b/agent/hcp/telemetry/filter.go new file mode 100644 index 000000000000..d9cfd3418122 --- /dev/null +++ b/agent/hcp/telemetry/filter.go @@ -0,0 +1,42 @@ +package telemetry + +import ( + "fmt" + "regexp" + + "github.com/hashicorp/go-multierror" +) + +// FilterList holds a map of filters, i.e. regular expressions. +// These filters are used to identify which Consul metrics can be transmitted to HCP. +type FilterList struct { + filters map[string]*regexp.Regexp +} + +// NewFilterList returns a FilterList which holds valid regex +// used to filter metrics. It will not fail if invalid REGEX is given, but returns a list of errors. +func NewFilterList(filters []string) (*FilterList, error) { + var mErr error + compiledList := make(map[string]*regexp.Regexp, len(filters)) + for idx, filter := range filters { + re, err := regexp.Compile(filter) + if err != nil { + mErr = multierror.Append(mErr, fmt.Errorf("compilation of filter at index %d failed: %w", idx, err)) + } + compiledList[filter] = re + } + f := &FilterList{ + filters: compiledList, + } + return f, mErr +} + +// Match returns true if the metric name matches a REGEX in the allowed metric filters. +func (fl *FilterList) Match(name string) bool { + for _, re := range fl.filters { + if re.Match([]byte(name)) { + return true + } + } + return false +} diff --git a/agent/hcp/telemetry/filter_test.go b/agent/hcp/telemetry/filter_test.go new file mode 100644 index 000000000000..8b6bc453cc84 --- /dev/null +++ b/agent/hcp/telemetry/filter_test.go @@ -0,0 +1,39 @@ +package telemetry + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFilter(t *testing.T) { + for name, tc := range map[string]struct { + filters []string + wantMatch bool + wantErr string + }{ + "badFilterRegex": { + filters: []string{"(*LF)"}, + wantErr: "compilation of filter at index 0 failed", + }, + "matchFound": { + filters: []string{"raft.*"}, + wantMatch: true, + }, + "matchNotFound": { + filters: []string{"mem.heap_size"}, + wantMatch: false, + }, + } { + t.Run(name, func(t *testing.T) { + f, err := NewFilterList(tc.filters) + if tc.wantErr != "" { + require.Error(t, err) + require.Contains(t, err.Error(), tc.wantErr) + } else { + m := f.Match("consul.raft.peers") + require.Equal(t, tc.wantMatch, m) + } + }) + } +} diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 9a984150b773..a0ba918487f7 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -24,8 +24,10 @@ const DefaultExportInterval = 10 * time.Second // OTELSinkOpts is used to provide configuration when initializing an OTELSink using NewOTELSink. type OTELSinkOpts struct { - Reader otelsdk.Reader - Ctx context.Context + Reader otelsdk.Reader + Ctx context.Context + Filters []string + Labels map[string]string } // OTELSink captures and aggregates telemetry data as per the OpenTelemetry (OTEL) specification. @@ -35,6 +37,7 @@ type OTELSink struct { // spaceReplacer cleans the flattened key by removing any spaces. spaceReplacer *strings.Replacer logger hclog.Logger + filters *FilterList // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. // It handles reading/export of aggregated metric data. @@ -81,14 +84,31 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { return nil, fmt.Errorf("ferror: provide valid context") } + logger := hclog.FromContext(opts.Ctx).Named("otel_sink") + // Setup OTEL Metrics SDK to aggregate, convert and export metrics. - res := resource.NewSchemaless() + + filterList, err := NewFilterList(opts.Filters) + if err != nil { + logger.Error("Failed to initialize all filters: %w", err) + } + + attrs := make([]attribute.KeyValue, len(opts.Labels)) + for k, v := range opts.Labels { + attrs = append(attrs, attribute.KeyValue{ + Key: attribute.Key(k), + Value: attribute.StringValue(v), + }) + } + // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. + res := resource.NewWithAttributes("", attrs...) meterProvider := otelsdk.NewMeterProvider(otelsdk.WithResource(res), otelsdk.WithReader(opts.Reader)) meter := meterProvider.Meter("github.com/hashicorp/consul/agent/hcp/telemetry") return &OTELSink{ + filters: filterList, spaceReplacer: strings.NewReplacer(" ", "_"), - logger: hclog.FromContext(opts.Ctx).Named("otel_sink"), + logger: logger, meterProvider: meterProvider, meter: &meter, gaugeStore: NewGaugeStore(), @@ -118,6 +138,10 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) + if !o.filters.Match(k) { + return + } + // Set value in global Gauge store. o.gaugeStore.Set(k, float64(val), toAttributes(labels)) @@ -142,6 +166,10 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) + if !o.filters.Match(k) { + return + } + o.mutex.Lock() defer o.mutex.Unlock() @@ -164,6 +192,10 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) + if !o.filters.Match(k) { + return + } + o.mutex.Lock() defer o.mutex.Unlock() diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 2b4dc26abda7..181ae226ed88 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -17,11 +17,16 @@ import ( ) var ( - attrs = attribute.NewSet(attribute.KeyValue{ + expectedResource = resource.NewWithAttributes("", attribute.KeyValue{ Key: attribute.Key("server.id"), Value: attribute.StringValue("test"), }) + attrs = attribute.NewSet(attribute.KeyValue{ + Key: attribute.Key("metric.label"), + Value: attribute.StringValue("test"), + }) + expectedSinkMetrics = map[string]metricdata.Metrics{ "consul.raft.leader": { Name: "consul.raft.leader", @@ -129,6 +134,15 @@ func TestNewOTELSink(t *testing.T) { Ctx: context.Background(), }, }, + "success": { + opts: &OTELSinkOpts{ + Reader: metric.NewManualReader(), + Labels: map[string]string{ + "server": "test", + }, + Filters: []string{"raft"}, + }, + }, } { test := test t.Run(name, func(t *testing.T) { @@ -153,8 +167,12 @@ func TestOTELSink(t *testing.T) { ctx := context.Background() opts := &OTELSinkOpts{ - Reader: reader, - Ctx: ctx, + Reader: reader, + Ctx: ctx, + Filters: []string{"raft", "autopilot"}, + Labels: map[string]string{ + "server.id": "test", + }, } sink, err := NewOTELSink(opts) @@ -162,7 +180,7 @@ func TestOTELSink(t *testing.T) { labels := []gometrics.Label{ { - Name: "server.id", + Name: "metric.label", Value: "test", }, } @@ -187,8 +205,9 @@ func TestOTELSink_Race(t *testing.T) { reader := metric.NewManualReader() ctx := context.Background() opts := &OTELSinkOpts{ - Ctx: ctx, - Reader: reader, + Ctx: ctx, + Reader: reader, + Filters: []string{"test"}, } sink, err := NewOTELSink(opts) From 6ac7c5e4079f4a2657a7597f6565d27d95697fa1 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 11:40:46 -0400 Subject: [PATCH 087/100] Add node_id and __replica__ default labels --- agent/hcp/deps.go | 11 ++++++++--- agent/hcp/deps_test.go | 3 ++- agent/setup.go | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 47d39c65f4ad..98bd9c8efe77 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -13,6 +13,7 @@ import ( "github.com/hashicorp/consul/agent/hcp/config" "github.com/hashicorp/consul/agent/hcp/scada" "github.com/hashicorp/consul/agent/hcp/telemetry" + "github.com/hashicorp/consul/types" "github.com/hashicorp/go-hclog" ) @@ -23,7 +24,7 @@ type Deps struct { Sink metrics.MetricSink } -func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { +func NewDeps(cfg config.CloudConfig, logger hclog.Logger, nodeID types.NodeID) (d Deps, err error) { d.Client, err = hcpclient.NewClient(cfg) if err != nil { return @@ -34,7 +35,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { return } - d.Sink = sink(d.Client, &cfg, logger) + d.Sink = sink(d.Client, &cfg, logger, nodeID) return } @@ -42,7 +43,7 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) { // sink provides initializes an OTELSink which forwards Consul metrics to HCP. // The sink is only initialized if the server is registered with the management plane (CCM). // This step should not block server initialization, so errors are logged, but not returned. -func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) metrics.MetricSink { +func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger, nodeID types.NodeID) metrics.MetricSink { ctx := context.Background() ctx = hclog.WithContext(ctx, logger) @@ -72,6 +73,10 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return nil } + // Set default labels + telemetryCfg.Labels["__replica__"] = string(nodeID) + telemetryCfg.Labels["node_id"] = string(nodeID) + sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 4ca01c7dfb3d..a88bf27d906e 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/hashicorp/consul/agent/hcp/client" + "github.com/hashicorp/consul/types" ) func TestSink(t *testing.T) { @@ -70,7 +71,7 @@ func TestSink(t *testing.T) { c := client.NewMockClient(t) l := hclog.NewNullLogger() test.expect(c) - sinkOpts := sink(c, test.mockCloudCfg, l) + sinkOpts := sink(c, test.mockCloudCfg, l, types.NodeID("server1234")) if !test.expectedSink { require.Nil(t, sinkOpts) return diff --git a/agent/setup.go b/agent/setup.go index 46e60d58b266..184c9c01554f 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -104,7 +104,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl var extraSinks []metrics.MetricSink if cfg.IsCloudEnabled() { - d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger) + d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger, d.RuntimeConfig.NodeID) if err != nil { return d, err } From 8189fae9e3cc0d29f71e645cecb7acfa24abc1cf Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 12:18:24 -0400 Subject: [PATCH 088/100] add function for default labels and set x-hcp-resource-id --- agent/hcp/client/metrics_client.go | 8 ++++++++ agent/hcp/client/metrics_client_test.go | 16 +++++++++++++--- agent/hcp/client/mock_CloudConfig.go | 18 ++++++++++-------- agent/hcp/config/config.go | 5 +++++ agent/hcp/deps.go | 22 +++++++++++++++++----- agent/hcp/deps_test.go | 4 +++- 6 files changed, 56 insertions(+), 17 deletions(-) diff --git a/agent/hcp/client/metrics_client.go b/agent/hcp/client/metrics_client.go index 15bd71097f79..a45d6343fc46 100644 --- a/agent/hcp/client/metrics_client.go +++ b/agent/hcp/client/metrics_client.go @@ -12,6 +12,7 @@ import ( "github.com/hashicorp/go-hclog" "github.com/hashicorp/go-retryablehttp" hcpcfg "github.com/hashicorp/hcp-sdk-go/config" + "github.com/hashicorp/hcp-sdk-go/resource" colmetricpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1" metricpb "go.opentelemetry.io/proto/otlp/metrics/v1" "golang.org/x/oauth2" @@ -37,6 +38,7 @@ type MetricsClient interface { // cloudConfig represents cloud config for TLS abstracted in an interface for easy testing. type CloudConfig interface { HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) + Resource() (resource.Resource, error) } // otlpClient is an implementation of MetricsClient with a retryable http client for retries and to honor throttle. @@ -64,8 +66,14 @@ func NewMetricsClient(cfg CloudConfig, ctx context.Context) (MetricsClient, erro return nil, fmt.Errorf("failed to init telemetry client: %v", err) } + r, err := cfg.Resource() + if err != nil { + return nil, fmt.Errorf("failed to init telemetry client: %v", err) + } + header := make(http.Header) header.Set("Content-Type", "application/x-protobuf") + header.Set("x-hcp-resource-id", r.String()) return &otlpClient{ client: c, diff --git a/agent/hcp/client/metrics_client_test.go b/agent/hcp/client/metrics_client_test.go index 7c64d731d0b1..ee4c4262bba9 100644 --- a/agent/hcp/client/metrics_client_test.go +++ b/agent/hcp/client/metrics_client_test.go @@ -2,6 +2,7 @@ package client import ( "context" + "fmt" "net/http" "net/http/httptest" "testing" @@ -34,8 +35,17 @@ func TestNewMetricsClient(t *testing.T) { }, "failsHCPConfig": { wantErr: "failed to init telemetry client", - cfg: MockErrCloudCfg{}, - ctx: context.Background(), + cfg: MockCloudCfg{ + ConfigErr: fmt.Errorf("test bad hcp config"), + }, + ctx: context.Background(), + }, + "failsBadResource": { + wantErr: "failed to init telemetry client", + cfg: MockCloudCfg{ + ResourceErr: fmt.Errorf("test bad resource"), + }, + ctx: context.Background(), }, } { t.Run(name, func(t *testing.T) { @@ -68,7 +78,7 @@ func TestExportMetrics(t *testing.T) { t.Run(name, func(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { require.Equal(t, r.Header.Get("Content-Type"), "application/x-protobuf") - + require.Equal(t, r.Header.Get("x-hcp-resource-id"), testResourceID) require.Equal(t, r.Header.Get("Authorization"), "Bearer test-token") body := colpb.ExportMetricsServiceResponse{} diff --git a/agent/hcp/client/mock_CloudConfig.go b/agent/hcp/client/mock_CloudConfig.go index 860f50fe459a..c39edc68b28b 100644 --- a/agent/hcp/client/mock_CloudConfig.go +++ b/agent/hcp/client/mock_CloudConfig.go @@ -2,14 +2,16 @@ package client import ( "crypto/tls" - "errors" "net/url" hcpcfg "github.com/hashicorp/hcp-sdk-go/config" "github.com/hashicorp/hcp-sdk-go/profile" + "github.com/hashicorp/hcp-sdk-go/resource" "golang.org/x/oauth2" ) +const testResourceID = "organization/ccbdd191-5dc3-4a73-9e05-6ac30ca67992/project/36019e0d-ed59-4df6-9990-05bb7fc793b6/hashicorp.consul.linked-cluster/prod-on-prem" + type mockHCPCfg struct{} func (m *mockHCPCfg) Token() (*oauth2.Token, error) { @@ -25,14 +27,14 @@ func (m *mockHCPCfg) APIAddress() string { return "" } func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} } func (m *mockHCPCfg) Profile() *profile.UserProfile { return nil } -type MockCloudCfg struct{} - -func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { - return &mockHCPCfg{}, nil +type MockCloudCfg struct { + ConfigErr error } -type MockErrCloudCfg struct{} +func (m MockCloudCfg) Resource() (resource.Resource, error) { + return resource.FromString(testResourceID) +} -func (m MockErrCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { - return nil, errors.New("test bad HCP config") +func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { + return &mockHCPCfg{}, m.ConfigErr } diff --git a/agent/hcp/config/config.go b/agent/hcp/config/config.go index cf87b685e352..8d1358fa4adf 100644 --- a/agent/hcp/config/config.go +++ b/agent/hcp/config/config.go @@ -7,6 +7,7 @@ import ( "crypto/tls" hcpcfg "github.com/hashicorp/hcp-sdk-go/config" + "github.com/hashicorp/hcp-sdk-go/resource" ) // CloudConfig defines configuration for connecting to HCP services @@ -30,6 +31,10 @@ func (c *CloudConfig) WithTLSConfig(cfg *tls.Config) { c.TLSConfig = cfg } +func (c *CloudConfig) Resource() (resource.Resource, error) { + return resource.FromString(c.ResourceID) +} + func (c *CloudConfig) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { if c.TLSConfig == nil { c.TLSConfig = &tls.Config{} diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 98bd9c8efe77..0f88fe551da0 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -73,14 +73,10 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return nil } - // Set default labels - telemetryCfg.Labels["__replica__"] = string(nodeID) - telemetryCfg.Labels["node_id"] = string(nodeID) - sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), - Labels: telemetryCfg.Labels, + Labels: defaultLabels(telemetryCfg.Labels, nodeID), Filters: telemetryCfg.MetricsConfig.Filters, } @@ -92,3 +88,19 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return sink } + +// defaultLabels returns a set of string labels to be sent with each metrics Sink export +// to the HCP Telemetry Gateway. +func defaultLabels(cfgLabels map[string]string, nodeID types.NodeID) map[string]string { + // Use configured labels, if any. + labels := cfgLabels + if len(labels) == 0 { + labels = make(map[string]string, 2) + } + + // Set default labels + labels["__replica__"] = string(nodeID) + labels["node_id"] = string(nodeID) + + return labels +} diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index a88bf27d906e..22e0f4b184fc 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -49,6 +49,9 @@ func TestSink(t *testing.T) { mockCloudCfg: client.MockCloudCfg{}, }, "noSinkWhenMetricsClientInitFails": { + mockCloudCfg: client.MockCloudCfg{ + ConfigErr: fmt.Errorf("test bad hcp config"), + }, expect: func(mockClient *client.MockClient) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ Endpoint: "https://test.com", @@ -57,7 +60,6 @@ func TestSink(t *testing.T) { }, }, nil) }, - mockCloudCfg: client.MockErrCloudCfg{}, }, "failsWithFetchTelemetryFailure": { expect: func(mockClient *client.MockClient) { From a6f8e69aa55426f3e427401ce53ea957d79e3e73 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Mon, 15 May 2023 17:22:54 -0400 Subject: [PATCH 089/100] Fix labels tests --- agent/hcp/client/mock_CloudConfig.go | 6 ++++-- agent/hcp/telemetry/otel_sink_test.go | 10 +++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/agent/hcp/client/mock_CloudConfig.go b/agent/hcp/client/mock_CloudConfig.go index c39edc68b28b..153b9552d921 100644 --- a/agent/hcp/client/mock_CloudConfig.go +++ b/agent/hcp/client/mock_CloudConfig.go @@ -28,11 +28,13 @@ func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} } func (m *mockHCPCfg) Profile() *profile.UserProfile { return nil } type MockCloudCfg struct { - ConfigErr error + ConfigErr error + ResourceErr error } func (m MockCloudCfg) Resource() (resource.Resource, error) { - return resource.FromString(testResourceID) + r, _ := resource.FromString(testResourceID) + return r, m.ResourceErr } func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) { diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 181ae226ed88..119051e0efd0 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -136,6 +136,7 @@ func TestNewOTELSink(t *testing.T) { }, "success": { opts: &OTELSinkOpts{ + Ctx: context.Background(), Reader: metric.NewManualReader(), Labels: map[string]string{ "server": "test", @@ -205,8 +206,11 @@ func TestOTELSink_Race(t *testing.T) { reader := metric.NewManualReader() ctx := context.Background() opts := &OTELSinkOpts{ - Ctx: ctx, - Reader: reader, + Ctx: ctx, + Reader: reader, + Labels: map[string]string{ + "server.id": "test", + }, Filters: []string{"test"}, } @@ -322,7 +326,7 @@ func performSinkOperation(sink *OTELSink, k string, v metricdata.Metrics, errCh func isSame(t *testing.T, expectedMap map[string]metricdata.Metrics, actual metricdata.ResourceMetrics) { // Validate resource - require.Equal(t, resource.NewSchemaless(), actual.Resource) + require.Equal(t, expectedResource, actual.Resource) // Validate Metrics require.NotEmpty(t, actual.ScopeMetrics) From e01431396ca07e2cae904fa5e310100ab8c27bf9 Mon Sep 17 00:00:00 2001 From: Ashvitha Date: Tue, 16 May 2023 16:04:16 -0400 Subject: [PATCH 090/100] Commit suggestion for getDefaultLabels Co-authored-by: Joshua Timmons --- agent/hcp/deps.go | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index 0f88fe551da0..a8418dc8bea5 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -92,15 +92,14 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo // defaultLabels returns a set of string labels to be sent with each metrics Sink export // to the HCP Telemetry Gateway. func defaultLabels(cfgLabels map[string]string, nodeID types.NodeID) map[string]string { - // Use configured labels, if any. - labels := cfgLabels - if len(labels) == 0 { - labels = make(map[string]string, 2) + labels := map[string]string{ + "__replica__": string(nodeID), // used for Cortex HA-metrics (deduplication) + "node_id": string(nodeID), // used to delineate Consul nodes in graphs } - // Set default labels - labels["__replica__"] = string(nodeID) - labels["node_id"] = string(nodeID) + for k, v := range cfgLabels { + labels[k] = v + } return labels } From 2726472ffb96c3232d2402a69906ffec1a4f1eee Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 13:32:20 -0400 Subject: [PATCH 091/100] Fixed server.id, and t.Parallel() --- agent/hcp/telemetry/filter_test.go | 3 +++ agent/hcp/telemetry/otel_sink_test.go | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/agent/hcp/telemetry/filter_test.go b/agent/hcp/telemetry/filter_test.go index 8b6bc453cc84..eeb4d93be898 100644 --- a/agent/hcp/telemetry/filter_test.go +++ b/agent/hcp/telemetry/filter_test.go @@ -7,6 +7,7 @@ import ( ) func TestFilter(t *testing.T) { + t.Parallel() for name, tc := range map[string]struct { filters []string wantMatch bool @@ -25,7 +26,9 @@ func TestFilter(t *testing.T) { wantMatch: false, }, } { + tc := tc t.Run(name, func(t *testing.T) { + t.Parallel() f, err := NewFilterList(tc.filters) if tc.wantErr != "" { require.Error(t, err) diff --git a/agent/hcp/telemetry/otel_sink_test.go b/agent/hcp/telemetry/otel_sink_test.go index 119051e0efd0..34127bdf9d77 100644 --- a/agent/hcp/telemetry/otel_sink_test.go +++ b/agent/hcp/telemetry/otel_sink_test.go @@ -18,7 +18,7 @@ import ( var ( expectedResource = resource.NewWithAttributes("", attribute.KeyValue{ - Key: attribute.Key("server.id"), + Key: attribute.Key("node_id"), Value: attribute.StringValue("test"), }) @@ -172,7 +172,7 @@ func TestOTELSink(t *testing.T) { Ctx: ctx, Filters: []string{"raft", "autopilot"}, Labels: map[string]string{ - "server.id": "test", + "node_id": "test", }, } @@ -209,7 +209,7 @@ func TestOTELSink_Race(t *testing.T) { Ctx: ctx, Reader: reader, Labels: map[string]string{ - "server.id": "test", + "node_id": "test", }, Filters: []string{"test"}, } From eb7bef675687e558f969665e886df05255002c8e Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 13:44:03 -0400 Subject: [PATCH 092/100] Make defaultLabels a method on the TelemetryConfig object --- agent/hcp/client/client.go | 14 ++++++++++++++ agent/hcp/deps.go | 17 +---------------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index aec1039525aa..b284c540db29 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -296,3 +296,17 @@ func (t *TelemetryConfig) Enabled() (string, bool) { // The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added. return endpoint + metricsGatewayPath, true } + +// DefaultLabels returns a set of string pairs that must be added as attributes to all exported telemetry data. +func (t *TelemetryConfig) DefaultLabels(nodeID string) map[string]string { + labels := map[string]string{ + "__replica__": string(nodeID), // used for Cortex HA-metrics (deduplication) + "node_id": string(nodeID), // used to delineate Consul nodes in graphs + } + + for k, v := range t.Labels { + labels[k] = v + } + + return labels +} diff --git a/agent/hcp/deps.go b/agent/hcp/deps.go index a8418dc8bea5..fcf2a6ef7800 100644 --- a/agent/hcp/deps.go +++ b/agent/hcp/deps.go @@ -76,7 +76,7 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo sinkOpts := &telemetry.OTELSinkOpts{ Ctx: ctx, Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval), - Labels: defaultLabels(telemetryCfg.Labels, nodeID), + Labels: telemetryCfg.DefaultLabels(string(nodeID)), Filters: telemetryCfg.MetricsConfig.Filters, } @@ -88,18 +88,3 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo return sink } - -// defaultLabels returns a set of string labels to be sent with each metrics Sink export -// to the HCP Telemetry Gateway. -func defaultLabels(cfgLabels map[string]string, nodeID types.NodeID) map[string]string { - labels := map[string]string{ - "__replica__": string(nodeID), // used for Cortex HA-metrics (deduplication) - "node_id": string(nodeID), // used to delineate Consul nodes in graphs - } - - for k, v := range cfgLabels { - labels[k] = v - } - - return labels -} From a66ded29c1458d17e3d8ffb7ec652af669f9ea81 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 13:50:53 -0400 Subject: [PATCH 093/100] Rename FilterList to lowercase filterList --- agent/hcp/telemetry/filter.go | 16 ++++++++-------- agent/hcp/telemetry/filter_test.go | 2 +- agent/hcp/telemetry/otel_sink.go | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/agent/hcp/telemetry/filter.go b/agent/hcp/telemetry/filter.go index d9cfd3418122..f2d9ee7bc9be 100644 --- a/agent/hcp/telemetry/filter.go +++ b/agent/hcp/telemetry/filter.go @@ -7,32 +7,32 @@ import ( "github.com/hashicorp/go-multierror" ) -// FilterList holds a map of filters, i.e. regular expressions. +// filterList holds a map of filters, i.e. regular expressions. // These filters are used to identify which Consul metrics can be transmitted to HCP. -type FilterList struct { +type filterList struct { filters map[string]*regexp.Regexp } -// NewFilterList returns a FilterList which holds valid regex +// newFilterList returns a FilterList which holds valid regex // used to filter metrics. It will not fail if invalid REGEX is given, but returns a list of errors. -func NewFilterList(filters []string) (*FilterList, error) { +func newFilterList(filters []string) (*filterList, error) { var mErr error compiledList := make(map[string]*regexp.Regexp, len(filters)) - for idx, filter := range filters { + for _, filter := range filters { re, err := regexp.Compile(filter) if err != nil { - mErr = multierror.Append(mErr, fmt.Errorf("compilation of filter at index %d failed: %w", idx, err)) + mErr = multierror.Append(mErr, fmt.Errorf("compilation of filter %q failed: %w", filter, err)) } compiledList[filter] = re } - f := &FilterList{ + f := &filterList{ filters: compiledList, } return f, mErr } // Match returns true if the metric name matches a REGEX in the allowed metric filters. -func (fl *FilterList) Match(name string) bool { +func (fl *filterList) Match(name string) bool { for _, re := range fl.filters { if re.Match([]byte(name)) { return true diff --git a/agent/hcp/telemetry/filter_test.go b/agent/hcp/telemetry/filter_test.go index eeb4d93be898..26325775a6b4 100644 --- a/agent/hcp/telemetry/filter_test.go +++ b/agent/hcp/telemetry/filter_test.go @@ -29,7 +29,7 @@ func TestFilter(t *testing.T) { tc := tc t.Run(name, func(t *testing.T) { t.Parallel() - f, err := NewFilterList(tc.filters) + f, err := newFilterList(tc.filters) if tc.wantErr != "" { require.Error(t, err) require.Contains(t, err.Error(), tc.wantErr) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index a0ba918487f7..32c59a4ce5c6 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -37,7 +37,7 @@ type OTELSink struct { // spaceReplacer cleans the flattened key by removing any spaces. spaceReplacer *strings.Replacer logger hclog.Logger - filters *FilterList + filters *filterList // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. // It handles reading/export of aggregated metric data. @@ -88,7 +88,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { // Setup OTEL Metrics SDK to aggregate, convert and export metrics. - filterList, err := NewFilterList(opts.Filters) + filterList, err := newFilterList(opts.Filters) if err != nil { logger.Error("Failed to initialize all filters: %w", err) } From 6f70376baa8a0063b72ea31195edf0bfdc5fb24c Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 15:23:02 -0400 Subject: [PATCH 094/100] Cleanup filter implemetation by combining regex into a single one, and making the type lowercase --- agent/hcp/telemetry/filter.go | 37 ++++++++++++++++++++---------- agent/hcp/telemetry/filter_test.go | 36 +++++++++++++++++++++-------- agent/hcp/telemetry/otel_sink.go | 2 -- 3 files changed, 51 insertions(+), 24 deletions(-) diff --git a/agent/hcp/telemetry/filter.go b/agent/hcp/telemetry/filter.go index f2d9ee7bc9be..6de0e7809ac5 100644 --- a/agent/hcp/telemetry/filter.go +++ b/agent/hcp/telemetry/filter.go @@ -3,6 +3,7 @@ package telemetry import ( "fmt" "regexp" + "strings" "github.com/hashicorp/go-multierror" ) @@ -10,33 +11,45 @@ import ( // filterList holds a map of filters, i.e. regular expressions. // These filters are used to identify which Consul metrics can be transmitted to HCP. type filterList struct { - filters map[string]*regexp.Regexp + isValid *regexp.Regexp } -// newFilterList returns a FilterList which holds valid regex -// used to filter metrics. It will not fail if invalid REGEX is given, but returns a list of errors. +// newFilterList returns a FilterList which holds valid regex used to filter metrics. +// It will fail if there are 0 valid regex filters given. func newFilterList(filters []string) (*filterList, error) { var mErr error - compiledList := make(map[string]*regexp.Regexp, len(filters)) + var validFilters []string for _, filter := range filters { - re, err := regexp.Compile(filter) + _, err := regexp.Compile(filter) if err != nil { mErr = multierror.Append(mErr, fmt.Errorf("compilation of filter %q failed: %w", filter, err)) + continue } - compiledList[filter] = re + validFilters = append(validFilters, filter) } + + if len(validFilters) == 0 { + return nil, multierror.Append(mErr, fmt.Errorf("no valid filters")) + } + + // Combine the valid regex strings with an OR. + finalRegex := strings.Join(validFilters, "|") + composedRegex, err := regexp.Compile(finalRegex) + if err != nil { + return nil, err + } + f := &filterList{ - filters: compiledList, + isValid: composedRegex, } - return f, mErr + return f, nil } // Match returns true if the metric name matches a REGEX in the allowed metric filters. func (fl *filterList) Match(name string) bool { - for _, re := range fl.filters { - if re.Match([]byte(name)) { - return true - } + if fl.isValid.MatchString(name) { + return true } + return false } diff --git a/agent/hcp/telemetry/filter_test.go b/agent/hcp/telemetry/filter_test.go index 26325775a6b4..8944b8932c2d 100644 --- a/agent/hcp/telemetry/filter_test.go +++ b/agent/hcp/telemetry/filter_test.go @@ -9,32 +9,48 @@ import ( func TestFilter(t *testing.T) { t.Parallel() for name, tc := range map[string]struct { - filters []string - wantMatch bool - wantErr string + filters []string + expectedRegexString string + matches []string + wantErr string + wantMatch bool }{ "badFilterRegex": { filters: []string{"(*LF)"}, - wantErr: "compilation of filter at index 0 failed", + wantErr: "no valid filters", + }, + "failsWithNoRegex": { + filters: []string{}, + wantErr: "no valid filters", }, "matchFound": { - filters: []string{"raft.*"}, - wantMatch: true, + filters: []string{"raft.*", "mem.*"}, + expectedRegexString: "raft.*|mem.*", + matches: []string{"consul.raft.peers", "consul.mem.heap_size"}, + wantMatch: true, }, "matchNotFound": { - filters: []string{"mem.heap_size"}, - wantMatch: false, + filters: []string{"mem.*"}, + matches: []string{"consul.raft.peers", "consul.txn.apply"}, + expectedRegexString: "mem.*", + wantMatch: false, }, } { tc := tc t.Run(name, func(t *testing.T) { t.Parallel() f, err := newFilterList(tc.filters) + if tc.wantErr != "" { require.Error(t, err) require.Contains(t, err.Error(), tc.wantErr) - } else { - m := f.Match("consul.raft.peers") + return + } + + require.NoError(t, err) + require.Equal(t, tc.expectedRegexString, f.isValid.String()) + for _, metric := range tc.matches { + m := f.Match(metric) require.Equal(t, tc.wantMatch, m) } }) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 32c59a4ce5c6..adcaf40c755b 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -86,8 +86,6 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { logger := hclog.FromContext(opts.Ctx).Named("otel_sink") - // Setup OTEL Metrics SDK to aggregate, convert and export metrics. - filterList, err := newFilterList(opts.Filters) if err != nil { logger.Error("Failed to initialize all filters: %w", err) From dec07b2f5f6a5485526a6406cd08aa472dff18f2 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Wed, 17 May 2023 15:42:46 -0400 Subject: [PATCH 095/100] Fix append --- agent/hcp/client/client.go | 4 ++-- agent/hcp/telemetry/filter.go | 8 ++------ agent/hcp/telemetry/otel_sink.go | 7 ++++--- 3 files changed, 8 insertions(+), 11 deletions(-) diff --git a/agent/hcp/client/client.go b/agent/hcp/client/client.go index b284c540db29..4e83098c5378 100644 --- a/agent/hcp/client/client.go +++ b/agent/hcp/client/client.go @@ -300,8 +300,8 @@ func (t *TelemetryConfig) Enabled() (string, bool) { // DefaultLabels returns a set of string pairs that must be added as attributes to all exported telemetry data. func (t *TelemetryConfig) DefaultLabels(nodeID string) map[string]string { labels := map[string]string{ - "__replica__": string(nodeID), // used for Cortex HA-metrics (deduplication) - "node_id": string(nodeID), // used to delineate Consul nodes in graphs + "__replica__": nodeID, // used for Cortex HA-metrics (deduplication) + "node_id": nodeID, // used to delineate Consul nodes in graphs } for k, v := range t.Labels { diff --git a/agent/hcp/telemetry/filter.go b/agent/hcp/telemetry/filter.go index 6de0e7809ac5..3c8ae5a65b1c 100644 --- a/agent/hcp/telemetry/filter.go +++ b/agent/hcp/telemetry/filter.go @@ -36,7 +36,7 @@ func newFilterList(filters []string) (*filterList, error) { finalRegex := strings.Join(validFilters, "|") composedRegex, err := regexp.Compile(finalRegex) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to compile regex: %w", err) } f := &filterList{ @@ -47,9 +47,5 @@ func newFilterList(filters []string) (*filterList, error) { // Match returns true if the metric name matches a REGEX in the allowed metric filters. func (fl *filterList) Match(name string) bool { - if fl.isValid.MatchString(name) { - return true - } - - return false + return fl.isValid.MatchString(name) } diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index adcaf40c755b..e59d6a4c375a 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -91,12 +91,13 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { logger.Error("Failed to initialize all filters: %w", err) } - attrs := make([]attribute.KeyValue, len(opts.Labels)) + attrs := make([]attribute.KeyValue, 0) for k, v := range opts.Labels { - attrs = append(attrs, attribute.KeyValue{ + kv := attribute.KeyValue{ Key: attribute.Key(k), Value: attribute.StringValue(v), - }) + } + attrs = append(attrs, kv) } // Setup OTEL Metrics SDK to aggregate, convert and export metrics periodically. res := resource.NewWithAttributes("", attrs...) From 7facba6ab816e309a57259a7c397f6ee999ee5b1 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 18 May 2023 10:25:05 -0400 Subject: [PATCH 096/100] use regex directly for filters --- agent/hcp/telemetry/filter.go | 22 ++++------------------ agent/hcp/telemetry/filter_test.go | 6 +++--- agent/hcp/telemetry/otel_sink.go | 11 ++++++----- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/agent/hcp/telemetry/filter.go b/agent/hcp/telemetry/filter.go index 3c8ae5a65b1c..54dca7d44aef 100644 --- a/agent/hcp/telemetry/filter.go +++ b/agent/hcp/telemetry/filter.go @@ -8,17 +8,11 @@ import ( "github.com/hashicorp/go-multierror" ) -// filterList holds a map of filters, i.e. regular expressions. -// These filters are used to identify which Consul metrics can be transmitted to HCP. -type filterList struct { - isValid *regexp.Regexp -} - -// newFilterList returns a FilterList which holds valid regex used to filter metrics. +// newFilterRegex returns a valid regex used to filter metrics. // It will fail if there are 0 valid regex filters given. -func newFilterList(filters []string) (*filterList, error) { +func newFilterRegex(filters []string) (*regexp.Regexp, error) { var mErr error - var validFilters []string + validFilters := make([]string, 0, len(filters)) for _, filter := range filters { _, err := regexp.Compile(filter) if err != nil { @@ -39,13 +33,5 @@ func newFilterList(filters []string) (*filterList, error) { return nil, fmt.Errorf("failed to compile regex: %w", err) } - f := &filterList{ - isValid: composedRegex, - } - return f, nil -} - -// Match returns true if the metric name matches a REGEX in the allowed metric filters. -func (fl *filterList) Match(name string) bool { - return fl.isValid.MatchString(name) + return composedRegex, nil } diff --git a/agent/hcp/telemetry/filter_test.go b/agent/hcp/telemetry/filter_test.go index 8944b8932c2d..abe962f4cd47 100644 --- a/agent/hcp/telemetry/filter_test.go +++ b/agent/hcp/telemetry/filter_test.go @@ -39,7 +39,7 @@ func TestFilter(t *testing.T) { tc := tc t.Run(name, func(t *testing.T) { t.Parallel() - f, err := newFilterList(tc.filters) + f, err := newFilterRegex(tc.filters) if tc.wantErr != "" { require.Error(t, err) @@ -48,9 +48,9 @@ func TestFilter(t *testing.T) { } require.NoError(t, err) - require.Equal(t, tc.expectedRegexString, f.isValid.String()) + require.Equal(t, tc.expectedRegexString, f.String()) for _, metric := range tc.matches { - m := f.Match(metric) + m := f.MatchString(metric) require.Equal(t, tc.wantMatch, m) } }) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index e59d6a4c375a..6d193f1240f1 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "net/url" + "regexp" "strings" "sync" "time" @@ -37,7 +38,7 @@ type OTELSink struct { // spaceReplacer cleans the flattened key by removing any spaces. spaceReplacer *strings.Replacer logger hclog.Logger - filters *filterList + filters *regexp.Regexp // meterProvider is an OTEL MeterProvider, the entrypoint to the OTEL Metrics SDK. // It handles reading/export of aggregated metric data. @@ -86,7 +87,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { logger := hclog.FromContext(opts.Ctx).Named("otel_sink") - filterList, err := newFilterList(opts.Filters) + filterList, err := newFilterRegex(opts.Filters) if err != nil { logger.Error("Failed to initialize all filters: %w", err) } @@ -137,7 +138,7 @@ func (o *OTELSink) IncrCounter(key []string, val float32) { func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) - if !o.filters.Match(k) { + if !o.filters.MatchString(k) { return } @@ -165,7 +166,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) - if !o.filters.Match(k) { + if !o.filters.MatchString(k) { return } @@ -191,7 +192,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gometrics.Label) { k := o.flattenKey(key) - if !o.filters.Match(k) { + if !o.filters.MatchString(k) { return } From 5692d328739ae118889cb8f3ef8fa72e5a55a267 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 18 May 2023 10:25:24 -0400 Subject: [PATCH 097/100] Fix x-resource-id test to use mocked value --- agent/hcp/client/mock_CloudConfig.go | 9 +++++++-- agent/hcp/deps_test.go | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/agent/hcp/client/mock_CloudConfig.go b/agent/hcp/client/mock_CloudConfig.go index 153b9552d921..5f2ef50046d7 100644 --- a/agent/hcp/client/mock_CloudConfig.go +++ b/agent/hcp/client/mock_CloudConfig.go @@ -10,7 +10,7 @@ import ( "golang.org/x/oauth2" ) -const testResourceID = "organization/ccbdd191-5dc3-4a73-9e05-6ac30ca67992/project/36019e0d-ed59-4df6-9990-05bb7fc793b6/hashicorp.consul.linked-cluster/prod-on-prem" +const testResourceID = "organization/test-org/project/test-project/test-type/test-id" type mockHCPCfg struct{} @@ -33,7 +33,12 @@ type MockCloudCfg struct { } func (m MockCloudCfg) Resource() (resource.Resource, error) { - r, _ := resource.FromString(testResourceID) + r := resource.Resource{ + ID: "test-id", + Type: "test-type", + Organization: "test-org", + Project: "test-project", + } return r, m.ResourceErr } diff --git a/agent/hcp/deps_test.go b/agent/hcp/deps_test.go index 22e0f4b184fc..54ec7b6de478 100644 --- a/agent/hcp/deps_test.go +++ b/agent/hcp/deps_test.go @@ -66,6 +66,28 @@ func TestSink(t *testing.T) { mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(nil, fmt.Errorf("FetchTelemetryConfig error")) }, }, + "failsWithURLParseErr": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + // Minimum 2 chars for a domain to be valid. + Endpoint: "s", + MetricsConfig: &client.MetricsConfig{ + // Invalid domain chars + Endpoint: " ", + }, + }, nil) + }, + }, + "noErrWithEmptyEndpoint": { + expect: func(mockClient *client.MockClient) { + mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{ + Endpoint: "", + MetricsConfig: &client.MetricsConfig{ + Endpoint: "", + }, + }, nil) + }, + }, } { test := test t.Run(name, func(t *testing.T) { From c1575e536ac4a57c528a1693952943a23a6619c9 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Thu, 18 May 2023 13:38:51 -0400 Subject: [PATCH 098/100] Fix log.Error formats --- agent/hcp/telemetry/otel_sink.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index 6d193f1240f1..b0d1de61a659 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -89,7 +89,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { filterList, err := newFilterRegex(opts.Filters) if err != nil { - logger.Error("Failed to initialize all filters: %w", err) + logger.Error("Failed to initialize all filters", "error", err) } attrs := make([]attribute.KeyValue, 0) @@ -155,7 +155,7 @@ func (o *OTELSink) SetGaugeWithLabels(key []string, val float32, labels []gometr // It must be explicitly de-registered to be removed (which we do not do), to ensure new gauge values are exported every cycle. inst, err := (*o.meter).Float64ObservableGauge(k, otelmetric.WithFloat64Callback(o.gaugeStore.gaugeCallback(k))) if err != nil { - o.logger.Error("Failed to emit gauge: %w", err) + o.logger.Error("Failed to create gauge instrument", "error", err) return } o.gaugeInstruments[k] = inst @@ -177,7 +177,7 @@ func (o *OTELSink) AddSampleWithLabels(key []string, val float32, labels []gomet if !ok { histogram, err := (*o.meter).Float64Histogram(k) if err != nil { - o.logger.Error("Failed to emit gauge: %w", err) + o.logger.Error("Failed create histogram instrument", "error", err) return } inst = histogram @@ -203,7 +203,7 @@ func (o *OTELSink) IncrCounterWithLabels(key []string, val float32, labels []gom if !ok { counter, err := (*o.meter).Float64Counter(k) if err != nil { - o.logger.Error("Failed to emit gauge: %w", err) + o.logger.Error("Failed to create counter instrument:", "error", err) return } From 31a0aed5ba0f977ba369a11088e84fb4bd78632b Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Fri, 19 May 2023 12:26:40 -0400 Subject: [PATCH 099/100] Forgot the len(opts.Label) optimization) --- agent/hcp/telemetry/otel_sink.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/hcp/telemetry/otel_sink.go b/agent/hcp/telemetry/otel_sink.go index b0d1de61a659..39e9aa599cd8 100644 --- a/agent/hcp/telemetry/otel_sink.go +++ b/agent/hcp/telemetry/otel_sink.go @@ -92,7 +92,7 @@ func NewOTELSink(opts *OTELSinkOpts) (*OTELSink, error) { logger.Error("Failed to initialize all filters", "error", err) } - attrs := make([]attribute.KeyValue, 0) + attrs := make([]attribute.KeyValue, 0, len(opts.Labels)) for k, v := range opts.Labels { kv := attribute.KeyValue{ Key: attribute.Key(k), From 24f1ebc68ace70a40c3631877e415065b1accd44 Mon Sep 17 00:00:00 2001 From: Ashvitha Sridharan Date: Tue, 23 May 2023 12:09:41 -0400 Subject: [PATCH 100/100] Use cfg.NodeID instead --- agent/setup.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/setup.go b/agent/setup.go index 184c9c01554f..6e6bb322681d 100644 --- a/agent/setup.go +++ b/agent/setup.go @@ -104,7 +104,7 @@ func NewBaseDeps(configLoader ConfigLoader, logOut io.Writer, providedLogger hcl var extraSinks []metrics.MetricSink if cfg.IsCloudEnabled() { - d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger, d.RuntimeConfig.NodeID) + d.HCP, err = hcp.NewDeps(cfg.Cloud, d.Logger, cfg.NodeID) if err != nil { return d, err }