Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HCP Observability] Metrics filtering and Labels in Go Metrics sink #17184

Merged
merged 104 commits into from
May 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
104 commits
Select commit Hold shift + click to select a range
b6381b7
Move hcp client to subpackage hcpclient (#16800)
Achooo Apr 27, 2023
5b82711
[HCP Observability] New MetricsClient (#17100)
Achooo May 8, 2023
8a16311
[HCP Observability] New MetricsClient (#17100)
Achooo May 8, 2023
c7945b3
[HCP Observability] New MetricsClient (#17100)
Achooo May 8, 2023
e4f0761
Client configured with TLS using HCP config and retry/throttle
Achooo Apr 11, 2023
8bbc289
run go mod tidy
Achooo Apr 25, 2023
f6182b4
Remove one abstraction to use the config from deps
Achooo Apr 26, 2023
009f08e
Address PR feedback
Achooo Apr 28, 2023
41ba7ee
Client configured with TLS using HCP config and retry/throttle
Achooo Apr 11, 2023
8fd34e4
run go mod tidy
Achooo Apr 25, 2023
894cef4
Create new OTELExporter which uses the MetricsClient
Achooo Apr 24, 2023
b50057c
Fix lint error
Achooo Apr 25, 2023
0c01542
early return when there are no metrics
Achooo Apr 26, 2023
da20fe3
Add NewOTELExporter() function
Achooo Apr 28, 2023
749b1c8
Downgrade to metrics SDK version: v1.15.0-rc.1
Achooo May 1, 2023
383b366
Fix imports
Achooo May 1, 2023
1d222b1
fix small nits with comments and url.URL
Achooo May 9, 2023
5564bce
Fix tests by asserting actual error for context cancellation, fix par…
Achooo May 10, 2023
424a065
Cleanup error handling and clarify empty metrics case
Achooo May 10, 2023
470a11d
Fix input/expected naming in otel_transform_test.go
Achooo May 10, 2023
be0b01b
add comment for metric tracking
Achooo May 10, 2023
325bb4d
Add a general isEmpty method
Achooo May 10, 2023
a0352ac
Add clear error types
Achooo May 10, 2023
2d356f6
update to latest version 1.15.0 of OTEL
Achooo May 11, 2023
22bb2ee
Client configured with TLS using HCP config and retry/throttle
Achooo Apr 11, 2023
22be78f
run go mod tidy
Achooo Apr 25, 2023
478181a
Remove one abstraction to use the config from deps
Achooo Apr 26, 2023
c2ffaab
Address PR feedback
Achooo Apr 28, 2023
864e6d7
Initialize OTELSink with sync.Map for all the instrument stores.
Achooo Apr 24, 2023
05c418b
Moved PeriodicReader init to NewOtelReader function. This allows us t…
Achooo Apr 26, 2023
72ae205
Switch to mutex instead of sync.Map to avoid type assertion
Achooo Apr 26, 2023
83fba0a
Add gauge store
Achooo Apr 26, 2023
520ba9f
Clarify comments
Achooo Apr 26, 2023
190ef2a
return concrete sink type
Achooo Apr 26, 2023
659a7dd
Fix lint errors
Achooo Apr 27, 2023
9659a87
Move gauge store to be within sink
Achooo Apr 27, 2023
80e01c7
Use context.TODO,rebase and clenaup opts handling
Achooo Apr 28, 2023
7cbed58
Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
Achooo May 1, 2023
91fcfc7
Fix imports
Achooo May 1, 2023
48d69e3
Update to latest stable version by rebasing on cc-4933, fix import, r…
Achooo May 11, 2023
563330e
Add lots of documentation to the OTELSink
Achooo May 11, 2023
b98481d
Fix gauge store comment and check ok
Achooo May 11, 2023
0162bb6
Add select and ctx.Done() check to gauge callback
Achooo May 11, 2023
899dbba
use require.Equal for attributes
Achooo May 11, 2023
542d23a
Fixed import naming
Achooo May 11, 2023
2d8a18a
Remove float64 calls and add a NewGaugeStore method
Achooo May 11, 2023
5defe6a
Change name Store to Set in gaugeStore, add concurrency tests in both…
Achooo May 11, 2023
a893c32
Generate 100 gauge operations
Achooo May 11, 2023
9d5f5ef
Seperate the labels into goroutines in sink test
Achooo May 11, 2023
80a534b
Generate kv store for the test case keys to avoid using uuid
Achooo May 11, 2023
aa2a971
Added a race test with 300 samples for OTELSink
Achooo May 12, 2023
91f9a89
[HCP Observability] OTELExporter (#17128)
Achooo May 12, 2023
e3e8d3f
Merge and fix conflicts
Achooo May 15, 2023
d13849b
Do not pass in waitgroup and use error channel instead.
Achooo May 15, 2023
713c5fa
Using SHA 7dea2225a218872e86d2f580e82c089b321617b0 to avoid build fai…
Achooo May 15, 2023
c6f2f9c
Rebase onto otl exporter to downgrade metrics API to v1.15.0-rc.1
Achooo May 1, 2023
f2c0d7a
Initialize OTELSink with sync.Map for all the instrument stores.
Achooo Apr 24, 2023
d884daf
Added telemetry agent to client and init sink in deps
Achooo Apr 26, 2023
bf496f3
Fixed client
Achooo Apr 26, 2023
4a047b0
Initalize sink in deps
Achooo Apr 26, 2023
49dfd64
init sink in telemetry library
Achooo Apr 26, 2023
e58ba1e
Init deps before telemetry
Achooo Apr 26, 2023
5cd9e2e
Use concrete telemetry.OtelSink type
Achooo Apr 26, 2023
26bfd94
add /v1/metrics
Achooo Apr 27, 2023
09b498c
Avoid returning err for telemetry init
Achooo Apr 27, 2023
4604fd9
move sink init within the IsCloudEnabled()
Achooo Apr 27, 2023
5f8c33e
Use HCPSinkOpts in deps instead
Achooo Apr 27, 2023
675a494
update golden test for configuration file
Achooo Apr 27, 2023
f9b8e59
Switch to using extra sinks in the telemetry library
Achooo Apr 28, 2023
b07c731
keep name MetricsConfig
Achooo Apr 28, 2023
e6f367f
fix log in verifyCCMRegistration
Achooo Apr 28, 2023
e918544
Set logger in context
Achooo May 1, 2023
d357cb5
pass around MetricSink in deps
Achooo May 1, 2023
3e79438
Fix imports
Achooo May 1, 2023
3d57ff5
Rebased onto otel sink pr
Achooo May 12, 2023
dc48f65
Fix URL in test
Achooo May 15, 2023
218a4f6
[HCP Observability] OTELSink (#17159)
Achooo May 16, 2023
4f7daed
Merge branch feature/hcp-telemetry
Achooo May 16, 2023
19b1d0c
pass extraSinks as function param instead
Achooo May 17, 2023
8ae6af6
Add default interval as package export
Achooo May 17, 2023
f2ef9c7
remove verifyCCM func
Achooo May 17, 2023
ca41a33
Add clusterID
Achooo May 17, 2023
52a1ee4
Fix import and add t.Parallel() for missing tests
Achooo May 17, 2023
e7f6d8b
merge feature/hcp-telemetry
Achooo May 18, 2023
fc86337
Kick Vercel CI
Achooo May 18, 2023
d0dbecb
Remove scheme from endpoint path, and fix error logging
Achooo May 18, 2023
de0deb9
return metrics.MetricSink for sink method
Achooo May 18, 2023
58e838a
Update SDK
Achooo May 23, 2023
60d450d
Added telemetry agent to client and init sink in deps
Achooo Apr 26, 2023
6ac7c5e
Add node_id and __replica__ default labels
Achooo May 15, 2023
8189fae
add function for default labels and set x-hcp-resource-id
Achooo May 15, 2023
a6f8e69
Fix labels tests
Achooo May 15, 2023
e014313
Commit suggestion for getDefaultLabels
Achooo May 16, 2023
2726472
Fixed server.id, and t.Parallel()
Achooo May 17, 2023
eb7bef6
Make defaultLabels a method on the TelemetryConfig object
Achooo May 17, 2023
a66ded2
Rename FilterList to lowercase filterList
Achooo May 17, 2023
6f70376
Cleanup filter implemetation by combining regex into a single one, an…
Achooo May 17, 2023
dec07b2
Fix append
Achooo May 17, 2023
7facba6
use regex directly for filters
Achooo May 18, 2023
5692d32
Fix x-resource-id test to use mocked value
Achooo May 18, 2023
c1575e5
Fix log.Error formats
Achooo May 18, 2023
31a0aed
Forgot the len(opts.Label) optimization)
Achooo May 19, 2023
24f1ebc
Use cfg.NodeID instead
Achooo May 23, 2023
1dd5e2b
merge feature/hcp-telemetry
Achooo May 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions agent/hcp/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,3 +296,17 @@ func (t *TelemetryConfig) Enabled() (string, bool) {
// The endpoint from Telemetry Gateway is a domain without scheme, and without the metrics path, so they must be added.
return endpoint + metricsGatewayPath, true
}

// DefaultLabels returns a set of <key, value> string pairs that must be added as attributes to all exported telemetry data.
func (t *TelemetryConfig) DefaultLabels(nodeID string) map[string]string {
labels := map[string]string{
"__replica__": nodeID, // used for Cortex HA-metrics (deduplication)
"node_id": nodeID, // used to delineate Consul nodes in graphs
}

for k, v := range t.Labels {
labels[k] = v
}

return labels
}
8 changes: 8 additions & 0 deletions agent/hcp/client/metrics_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/hashicorp/go-retryablehttp"
hcpcfg "github.com/hashicorp/hcp-sdk-go/config"
"github.com/hashicorp/hcp-sdk-go/resource"
colmetricpb "go.opentelemetry.io/proto/otlp/collector/metrics/v1"
metricpb "go.opentelemetry.io/proto/otlp/metrics/v1"
"golang.org/x/oauth2"
Expand All @@ -37,6 +38,7 @@ type MetricsClient interface {
// cloudConfig represents cloud config for TLS abstracted in an interface for easy testing.
type CloudConfig interface {
HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error)
Resource() (resource.Resource, error)
}

// otlpClient is an implementation of MetricsClient with a retryable http client for retries and to honor throttle.
Expand Down Expand Up @@ -64,8 +66,14 @@ func NewMetricsClient(cfg CloudConfig, ctx context.Context) (MetricsClient, erro
return nil, fmt.Errorf("failed to init telemetry client: %v", err)
}

r, err := cfg.Resource()
if err != nil {
return nil, fmt.Errorf("failed to init telemetry client: %v", err)
}

header := make(http.Header)
header.Set("Content-Type", "application/x-protobuf")
header.Set("x-hcp-resource-id", r.String())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Set default x-hcp-resource-id which should be generated using the Go HCP SDK.


return &otlpClient{
client: c,
Expand Down
16 changes: 13 additions & 3 deletions agent/hcp/client/metrics_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package client

import (
"context"
"fmt"
"net/http"
"net/http/httptest"
"testing"
Expand Down Expand Up @@ -34,8 +35,17 @@ func TestNewMetricsClient(t *testing.T) {
},
"failsHCPConfig": {
wantErr: "failed to init telemetry client",
cfg: MockErrCloudCfg{},
ctx: context.Background(),
cfg: MockCloudCfg{
ConfigErr: fmt.Errorf("test bad hcp config"),
},
ctx: context.Background(),
},
"failsBadResource": {
wantErr: "failed to init telemetry client",
cfg: MockCloudCfg{
ResourceErr: fmt.Errorf("test bad resource"),
},
ctx: context.Background(),
},
} {
t.Run(name, func(t *testing.T) {
Expand Down Expand Up @@ -68,7 +78,7 @@ func TestExportMetrics(t *testing.T) {
t.Run(name, func(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
require.Equal(t, r.Header.Get("Content-Type"), "application/x-protobuf")

require.Equal(t, r.Header.Get("x-hcp-resource-id"), testResourceID)
require.Equal(t, r.Header.Get("Authorization"), "Bearer test-token")

body := colpb.ExportMetricsServiceResponse{}
Expand Down
25 changes: 17 additions & 8 deletions agent/hcp/client/mock_CloudConfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@ package client

import (
"crypto/tls"
"errors"
"net/url"

hcpcfg "github.com/hashicorp/hcp-sdk-go/config"
"github.com/hashicorp/hcp-sdk-go/profile"
"github.com/hashicorp/hcp-sdk-go/resource"
"golang.org/x/oauth2"
)

const testResourceID = "organization/test-org/project/test-project/test-type/test-id"

type mockHCPCfg struct{}

func (m *mockHCPCfg) Token() (*oauth2.Token, error) {
Expand All @@ -25,14 +27,21 @@ func (m *mockHCPCfg) APIAddress() string { return "" }
func (m *mockHCPCfg) PortalURL() *url.URL { return &url.URL{} }
func (m *mockHCPCfg) Profile() *profile.UserProfile { return nil }

type MockCloudCfg struct{}

func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) {
return &mockHCPCfg{}, nil
type MockCloudCfg struct {
ConfigErr error
ResourceErr error
}

type MockErrCloudCfg struct{}
func (m MockCloudCfg) Resource() (resource.Resource, error) {
r := resource.Resource{
ID: "test-id",
Type: "test-type",
Organization: "test-org",
Project: "test-project",
}
return r, m.ResourceErr
}

func (m MockErrCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) {
return nil, errors.New("test bad HCP config")
func (m MockCloudCfg) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) {
return &mockHCPCfg{}, m.ConfigErr
}
5 changes: 5 additions & 0 deletions agent/hcp/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"crypto/tls"

hcpcfg "github.com/hashicorp/hcp-sdk-go/config"
"github.com/hashicorp/hcp-sdk-go/resource"
)

// CloudConfig defines configuration for connecting to HCP services
Expand All @@ -30,6 +31,10 @@ func (c *CloudConfig) WithTLSConfig(cfg *tls.Config) {
c.TLSConfig = cfg
}

func (c *CloudConfig) Resource() (resource.Resource, error) {
return resource.FromString(c.ResourceID)
}

func (c *CloudConfig) HCPConfig(opts ...hcpcfg.HCPConfigOption) (hcpcfg.HCPConfig, error) {
if c.TLSConfig == nil {
c.TLSConfig = &tls.Config{}
Expand Down
13 changes: 8 additions & 5 deletions agent/hcp/deps.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/hashicorp/consul/agent/hcp/config"
"github.com/hashicorp/consul/agent/hcp/scada"
"github.com/hashicorp/consul/agent/hcp/telemetry"
"github.com/hashicorp/consul/types"
"github.com/hashicorp/go-hclog"
)

Expand All @@ -23,7 +24,7 @@ type Deps struct {
Sink metrics.MetricSink
}

func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) {
func NewDeps(cfg config.CloudConfig, logger hclog.Logger, nodeID types.NodeID) (d Deps, err error) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need to pass in the NodeID here to set the labels

d.Client, err = hcpclient.NewClient(cfg)
if err != nil {
return
Expand All @@ -34,15 +35,15 @@ func NewDeps(cfg config.CloudConfig, logger hclog.Logger) (d Deps, err error) {
return
}

d.Sink = sink(d.Client, &cfg, logger)
d.Sink = sink(d.Client, &cfg, logger, nodeID)

return
}

// sink provides initializes an OTELSink which forwards Consul metrics to HCP.
// The sink is only initialized if the server is registered with the management plane (CCM).
// This step should not block server initialization, so errors are logged, but not returned.
func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger) metrics.MetricSink {
func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Logger, nodeID types.NodeID) metrics.MetricSink {
ctx := context.Background()
ctx = hclog.WithContext(ctx, logger)

Expand Down Expand Up @@ -73,8 +74,10 @@ func sink(hcpClient hcpclient.Client, cfg hcpclient.CloudConfig, logger hclog.Lo
}

sinkOpts := &telemetry.OTELSinkOpts{
Ctx: ctx,
Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval),
Ctx: ctx,
Reader: telemetry.NewOTELReader(metricsClient, u, telemetry.DefaultExportInterval),
Labels: telemetryCfg.DefaultLabels(string(nodeID)),
Filters: telemetryCfg.MetricsConfig.Filters,
}

sink, err := telemetry.NewOTELSink(sinkOpts)
Expand Down
7 changes: 5 additions & 2 deletions agent/hcp/deps_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"github.com/stretchr/testify/require"

"github.com/hashicorp/consul/agent/hcp/client"
"github.com/hashicorp/consul/types"
)

func TestSink(t *testing.T) {
Expand Down Expand Up @@ -48,6 +49,9 @@ func TestSink(t *testing.T) {
mockCloudCfg: client.MockCloudCfg{},
},
"noSinkWhenMetricsClientInitFails": {
mockCloudCfg: client.MockCloudCfg{
ConfigErr: fmt.Errorf("test bad hcp config"),
},
expect: func(mockClient *client.MockClient) {
mockClient.EXPECT().FetchTelemetryConfig(mock.Anything).Return(&client.TelemetryConfig{
Endpoint: "https://test.com",
Expand All @@ -56,7 +60,6 @@ func TestSink(t *testing.T) {
},
}, nil)
},
mockCloudCfg: client.MockErrCloudCfg{},
},
"failsWithFetchTelemetryFailure": {
expect: func(mockClient *client.MockClient) {
Expand Down Expand Up @@ -92,7 +95,7 @@ func TestSink(t *testing.T) {
c := client.NewMockClient(t)
l := hclog.NewNullLogger()
test.expect(c)
sinkOpts := sink(c, test.mockCloudCfg, l)
sinkOpts := sink(c, test.mockCloudCfg, l, types.NodeID("server1234"))
if !test.expectedSink {
require.Nil(t, sinkOpts)
return
Expand Down
37 changes: 37 additions & 0 deletions agent/hcp/telemetry/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package telemetry

import (
"fmt"
"regexp"
"strings"

"github.com/hashicorp/go-multierror"
)

// newFilterRegex returns a valid regex used to filter metrics.
// It will fail if there are 0 valid regex filters given.
func newFilterRegex(filters []string) (*regexp.Regexp, error) {
var mErr error
validFilters := make([]string, 0, len(filters))
for _, filter := range filters {
_, err := regexp.Compile(filter)
if err != nil {
mErr = multierror.Append(mErr, fmt.Errorf("compilation of filter %q failed: %w", filter, err))
continue
}
validFilters = append(validFilters, filter)
}

if len(validFilters) == 0 {
return nil, multierror.Append(mErr, fmt.Errorf("no valid filters"))
}

// Combine the valid regex strings with an OR.
finalRegex := strings.Join(validFilters, "|")
composedRegex, err := regexp.Compile(finalRegex)
if err != nil {
return nil, fmt.Errorf("failed to compile regex: %w", err)
}

return composedRegex, nil
}
58 changes: 58 additions & 0 deletions agent/hcp/telemetry/filter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package telemetry

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestFilter(t *testing.T) {
t.Parallel()
for name, tc := range map[string]struct {
filters []string
expectedRegexString string
matches []string
wantErr string
wantMatch bool
}{
"badFilterRegex": {
filters: []string{"(*LF)"},
wantErr: "no valid filters",
},
"failsWithNoRegex": {
filters: []string{},
wantErr: "no valid filters",
},
"matchFound": {
filters: []string{"raft.*", "mem.*"},
expectedRegexString: "raft.*|mem.*",
matches: []string{"consul.raft.peers", "consul.mem.heap_size"},
wantMatch: true,
},
"matchNotFound": {
filters: []string{"mem.*"},
matches: []string{"consul.raft.peers", "consul.txn.apply"},
expectedRegexString: "mem.*",
wantMatch: false,
},
} {
tc := tc
t.Run(name, func(t *testing.T) {
t.Parallel()
f, err := newFilterRegex(tc.filters)

if tc.wantErr != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tc.wantErr)
return
}

require.NoError(t, err)
require.Equal(t, tc.expectedRegexString, f.String())
for _, metric := range tc.matches {
m := f.MatchString(metric)
require.Equal(t, tc.wantMatch, m)
}
})
}
}
Loading