Skip to content

Commit

Permalink
feat: add otel tracing support for Refinery internal operations (#1218)
Browse files Browse the repository at this point in the history
<!--
Thank you for contributing to the project! 💜
Please make sure to:
- Chat with us first if this is a big change
  - Open a new issue (or comment on an existing one)
- We want to make sure you don't spend time implementing something we
might have to say No to
- Add unit tests
- Mention any relevant issues in the PR description (e.g. "Fixes #123")

Please see our [OSS process
document](https://github.com/honeycombio/home/blob/main/honeycomb-oss-lifecycle-and-practices.md#)
to get an idea of how we operate.
-->

## Which problem is this PR solving?

Enable otel tracing for debugging Refinery internal operations.

## Short description of the changes

- Add otel tracing configurations
- inject a tracer on Refinery startup
- Update `github.com/honeycombio/opentelemetry-proto-go/otlp` to v1.3.1

---------

Co-authored-by: Kent Quirk <kentquirk@honeycomb.io>
  • Loading branch information
VinozzZ and kentquirk authored Jul 2, 2024
1 parent 6536ea8 commit b658cf7
Show file tree
Hide file tree
Showing 11 changed files with 331 additions and 29 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,6 @@ dockerize*
.idea/

.DS_Store

# redis dump file
*.rdb
17 changes: 17 additions & 0 deletions cmd/refinery/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (
"syscall"
"time"

"go.opentelemetry.io/otel/trace"
"go.opentelemetry.io/otel/trace/noop"
_ "go.uber.org/automaxprocs"
"golang.org/x/exp/slices"

Expand All @@ -23,6 +25,7 @@ import (
"github.com/honeycombio/refinery/app"
"github.com/honeycombio/refinery/collect"
"github.com/honeycombio/refinery/config"
"github.com/honeycombio/refinery/internal/otelutil"
"github.com/honeycombio/refinery/internal/peer"
"github.com/honeycombio/refinery/logger"
"github.com/honeycombio/refinery/metrics"
Expand Down Expand Up @@ -198,6 +201,18 @@ func main() {
oTelMetrics = &metrics.OTelMetrics{}
}

resourceLib := "refinery"
resourceVer := version
tracer := trace.Tracer(noop.Tracer{})
shutdown := func() {}

if c.GetOTelTracingConfig().Enabled {
// let's set up some OTel tracing
tracer, shutdown = otelutil.SetupTracing(c.GetOTelTracingConfig(), resourceLib, resourceVer)
}

defer shutdown()

// we need to include all the metrics types so we can inject them in case they're needed
var g inject.Graph
if opts.Debug {
Expand All @@ -216,6 +231,8 @@ func main() {
{Value: legacyMetrics, Name: "legacyMetrics"},
{Value: promMetrics, Name: "promMetrics"},
{Value: oTelMetrics, Name: "otelMetrics"},
{Value: tracer, Name: "tracer"},

{Value: metricsSingleton, Name: "metrics"},
{Value: genericMetricsRecorder, Name: "genericMetrics"},
{Value: upstreamMetricsRecorder, Name: "upstreamMetrics"},
Expand Down
1 change: 1 addition & 0 deletions config/cmdenv.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ type CmdEnv struct {
HoneycombLoggerAPIKey string `long:"logger-api-key" env:"REFINERY_HONEYCOMB_LOGGER_API_KEY" description:"Honeycomb logger API key"`
LegacyMetricsAPIKey string `long:"legacy-metrics-api-key" env:"REFINERY_HONEYCOMB_METRICS_API_KEY" description:"API key for legacy Honeycomb metrics"`
OTelMetricsAPIKey string `long:"otel-metrics-api-key" env:"REFINERY_OTEL_METRICS_API_KEY" description:"API key for OTel metrics if being sent to Honeycomb"`
OTelTracesAPIKey string `long:"otel-traces-api-key" env:"REFINERY_OTEL_TRACES_API_KEY" description:"API key for OTel traces if being sent to Honeycomb"`
QueryAuthToken string `long:"query-auth-token" env:"REFINERY_QUERY_AUTH_TOKEN" description:"Token for debug/management queries"`
AvailableMemory MemorySize `long:"available-memory" env:"REFINERY_AVAILABLE_MEMORY" description:"The maximum memory available for Refinery to use (ex: 4GiB)."`
Debug bool `short:"d" long:"debug" description:"Runs debug service (on the first open port between localhost:6060 and :6069 by default)"`
Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,8 @@ type Config interface {

GetIdentifierInterfaceName() (string, error)

GetOTelTracingConfig() OTelTracingConfig

GetUseIPV6Identifier() (bool, error)

GetRedisIdentifier() (string, error)
Expand Down
4 changes: 4 additions & 0 deletions config/configLoadHelpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ func validateConfig(opts *CmdEnv) ([]string, error) {
config.OTelMetrics.APIKey = "InvalidHoneycombAPIKey"
}

if config.OTelTracing.APIKey == "" {
config.OTelTracing.APIKey = "InvalidHoneycombAPIKey"
}

// write it out to a YAML buffer
buf := new(bytes.Buffer)
encoder := yaml.NewEncoder(buf)
Expand Down
16 changes: 16 additions & 0 deletions config/file_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ type configContents struct {
PrometheusMetrics PrometheusMetricsConfig `yaml:"PrometheusMetrics"`
LegacyMetrics LegacyMetricsConfig `yaml:"LegacyMetrics"`
OTelMetrics OTelMetricsConfig `yaml:"OTelMetrics"`
OTelTracing OTelTracingConfig `yaml:"OTelTracing"`
PeerManagement PeerManagementConfig `yaml:"PeerManagement"`
RedisPeerManagement RedisPeerManagementConfig `yaml:"RedisPeerManagement"`
Collection CollectionConfig `yaml:"Collection"`
Expand Down Expand Up @@ -183,6 +184,14 @@ type OTelMetricsConfig struct {
ReportingInterval Duration `yaml:"ReportingInterval" default:"30s"`
}

type OTelTracingConfig struct {
Enabled bool `yaml:"Enabled" default:"false"`
APIHost string `yaml:"APIHost" default:"https://api.honeycomb.io"`
APIKey string `yaml:"APIKey" cmdenv:"OTelTracesAPIKey,HoneycombAPIKey"`
Dataset string `yaml:"Dataset" default:"Refinery Traces"`
SampleRate uint64 `yaml:"SampleRate" default:"100"`
}

type PeerManagementConfig struct {
Type string `yaml:"Type" default:"file"`
Identifier string `yaml:"Identifier"`
Expand Down Expand Up @@ -832,6 +841,13 @@ func (f *fileConfig) GetEnvironmentCacheTTL() time.Duration {
return time.Duration(f.mainConfig.Specialized.EnvironmentCacheTTL)
}

func (f *fileConfig) GetOTelTracingConfig() OTelTracingConfig {
f.mux.RLock()
defer f.mux.RUnlock()

return f.mainConfig.OTelTracing
}

func (f *fileConfig) GetDatasetPrefix() string {
f.mux.RLock()
defer f.mux.RUnlock()
Expand Down
73 changes: 73 additions & 0 deletions config/metadata/configMeta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,79 @@ groups:
compression costs may outweigh the benefits, in which case `none`
may be used.
- name: OTelTracing
title: "OpenTelemetry Tracing"
description: contains configuration for Refinery's own tracing.
fields:
- name: Enabled
type: bool
valuetype: nondefault
default: false
reload: false
firstversion: v2.6
summary: controls whether to send Refinery's own otel traces.
description: >
The setting specifies if Refinery sends traces.
- name: APIHost
type: url
valuetype: nondefault
default: "https://api.honeycomb.io"
reload: false
firstversion: v2.6
summary: is the URL of the OpenTelemetry API to which traces will be sent.
description: >
Refinery's internal traces will be sent to the `/v1/traces`
endpoint on this host.
- name: APIKey
type: string
pattern: apikey
valuetype: nondefault
default: ""
example: "SetThisToAHoneycombKey"
reload: false
firstversion: v2.6
envvar: REFINERY_HONEYCOMB_TRACES_API_KEY, REFINERY_HONEYCOMB_API_KEY
commandline: otel-traces-api-key
validations:
- type: format
arg: apikey
summary: is the API key used to send Refinery's traces to Honeycomb.
description: >
It is recommended that you create a separate team and key for
Refinery telemetry.
If this is blank, then Refinery will not set the Honeycomb-specific
headers for OpenTelemetry, and your `APIHost` must be set to a
valid OpenTelemetry endpoint.
- name: Dataset
type: string
valuetype: nondefault
default: "Refinery Traces"
reload: false
validations:
- type: notempty
firstversion: v2.6
summary: is the Honeycomb dataset to which Refinery sends its OpenTelemetry metrics.
description: >
Only used if `APIKey` is specified.
- name: SampleRate
type: int
valuetype: nondefault
default: 100
validations:
- type: minimum
arg: 1
reload: true
summary: is the rate at which Refinery samples its own traces.
description: >
This is the Honeycomb sample rate used to sample traces sent by Refinery. Since each
incoming span generates multiple outgoing spans, a sample rate of at least 100 is
strongly advised.
- name: PeerManagement
title: "Peer Management"
description: controls how the Refinery cluster communicates between peers.
Expand Down
8 changes: 8 additions & 0 deletions config/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ type MockConfig struct {
GetLegacyMetricsConfigVal LegacyMetricsConfig
GetPrometheusMetricsConfigVal PrometheusMetricsConfig
GetOTelMetricsConfigVal OTelMetricsConfig
GetOTelTracingConfigVal OTelTracingConfig
GetSendDelayErr error
GetSendDelayVal time.Duration
GetBatchTimeoutVal time.Duration
Expand Down Expand Up @@ -295,6 +296,13 @@ func (m *MockConfig) GetOTelMetricsConfig() OTelMetricsConfig {
return m.GetOTelMetricsConfigVal
}

func (m *MockConfig) GetOTelTracingConfig() OTelTracingConfig {
m.Mux.RLock()
defer m.Mux.RUnlock()

return m.GetOTelTracingConfigVal
}

func (m *MockConfig) GetSendDelay() (time.Duration, error) {
m.Mux.RLock()
defer m.Mux.RUnlock()
Expand Down
39 changes: 28 additions & 11 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ require (
github.com/honeycombio/dynsampler-go v0.6.0
github.com/honeycombio/husky v0.30.0
github.com/honeycombio/libhoney-go v1.23.1
github.com/honeycombio/otel-config-go v1.16.0
github.com/jessevdk/go-flags v1.5.0
github.com/jonboulle/clockwork v0.4.0
github.com/json-iterator/go v1.1.12
Expand All @@ -38,7 +39,8 @@ require (
go.opentelemetry.io/otel/metric v1.27.0
go.opentelemetry.io/otel/sdk v1.27.0
go.opentelemetry.io/otel/sdk/metric v1.27.0
go.opentelemetry.io/proto/otlp v1.2.0
go.opentelemetry.io/otel/trace v1.27.0
go.opentelemetry.io/proto/otlp v1.3.1
go.uber.org/automaxprocs v1.5.3
golang.org/x/exp v0.0.0-20231127185646-65229373498e
google.golang.org/grpc v1.64.0
Expand All @@ -57,28 +59,43 @@ require (
github.com/facebookgo/limitgroup v0.0.0-20150612190941-6abd8d71ec01 // indirect
github.com/facebookgo/muster v0.0.0-20150708232844-fd3d7953fd52 // indirect
github.com/facebookgo/structtag v0.0.0-20150214074306-217e25fb9691 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
github.com/lufia/plan9stats v0.0.0-20240513124658-fba389f38bae // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/sethvargo/go-envconfig v1.0.3 // indirect
github.com/shirou/gopsutil/v3 v3.24.5 // indirect
github.com/shoenig/go-m1cpu v0.1.6 // indirect
github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.14 // indirect
github.com/tklauser/numcpus v0.8.0 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
go.opentelemetry.io/otel/trace v1.27.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.9.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/text v0.15.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240520151616-dc85e6b867a5 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240515191416-fc5f0ca64291 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
go.opentelemetry.io/contrib/instrumentation/host v0.52.0 // indirect
go.opentelemetry.io/contrib/instrumentation/runtime v0.52.0 // indirect
go.opentelemetry.io/contrib/propagators/b3 v1.27.0 // indirect
go.opentelemetry.io/contrib/propagators/ot v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.27.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/net v0.26.0 // indirect
golang.org/x/sys v0.21.0 // indirect
golang.org/x/text v0.16.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240617180043-68d350f18fd4 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240617180043-68d350f18fd4 // indirect
)

replace go.opentelemetry.io/proto/otlp => github.com/honeycombio/opentelemetry-proto-go/otlp v0.19.0-compat
replace go.opentelemetry.io/proto/otlp => github.com/honeycombio/opentelemetry-proto-go/otlp v1.3.1-compat

replace github.com/panmari/cuckoofilter => github.com/honeycombio/cuckoofilter v0.0.0-20230630225016-cf48793fb7c1
Loading

0 comments on commit b658cf7

Please sign in to comment.