From 2e736089aced9fc1e0802d3ac65d710393881cae Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 24 May 2022 15:55:38 -0400 Subject: [PATCH 01/27] WIP per-service fx-ified OTEL tracing --- common/telemetry/attribute.go | 69 ++++++++++++++++++++ common/telemetry/fx.go | 48 ++++++++++++++ common/telemetry/fx_test.go | 55 ++++++++++++++++ common/telemetry/otel.go | 81 ++++++++++++++++++++++++ go.mod | 12 +++- go.sum | 20 +++++- service/frontend/fx.go | 3 + service/fx.go | 3 + service/history/consts/const.go | 1 + service/history/fx.go | 4 ++ service/history/handler.go | 3 + service/history/historyEngine.go | 4 ++ service/history/historyEngineFactory.go | 3 + service/history/shard/controller_impl.go | 2 + service/history/shard/fx.go | 4 ++ service/matching/fx.go | 3 + service/worker/fx.go | 3 + 17 files changed, 316 insertions(+), 2 deletions(-) create mode 100644 common/telemetry/attribute.go create mode 100644 common/telemetry/fx.go create mode 100644 common/telemetry/fx_test.go create mode 100644 common/telemetry/otel.go diff --git a/common/telemetry/attribute.go b/common/telemetry/attribute.go new file mode 100644 index 00000000000..1270943e2d3 --- /dev/null +++ b/common/telemetry/attribute.go @@ -0,0 +1,69 @@ +package telemetry + +import ( + "fmt" + "strings" + + "go.opentelemetry.io/otel/attribute" + logtag "go.temporal.io/server/common/log/tag" + "go.temporal.io/server/common/metrics" +) + +type ( + // LogKey is a type wrapper for the key part of a common/log/tag.Tag that + // can be used to create full common/log/tag.Tag instances. It is intended + // to be used as a const global and shared throughout the codebase. + LogKey string + + // MetricsKey is a type wrapper for the key part of a common/metrics.Tag that + // can be used to create full common/metrics.Tag instances. It is intended + // to be used as a const global and shared throughout the codebase. + MetricsKey string + + // Key is a type that brings some consistency to the server's use of log, + // trace, and metrics annotations. The three tagging systems currently in + // use are not compatible so we provide this type to at least normalize + // naming. Consumers are expected to create package-global Key instances and + // then use those instances to create values as appropriate to the context. + Key struct { + OTEL attribute.Key + Log LogKey + Metrics MetricsKey + } + + mtag struct { + k, v string + } +) + +func (t mtag) Key() string { return t.k } +func (t mtag) Value() string { return t.v } + +// NewKey creates a new Key instance, inserting the "io.temporal." prefix for +// OTEL attributes if it is absent. +func NewKey(str string) Key { + fqstr := str + if !strings.HasPrefix(str, "io.temporal") { + fqstr = fmt.Sprintf("io.temporal.%s", str) + } + return Key{ + OTEL: attribute.Key(fqstr), + Log: LogKey(str), + Metrics: MetricsKey(str), + } +} + +// String creates a new string-valued logging tag +func (k LogKey) String(v string) logtag.Tag { + return logtag.NewStringTag(string(k), v) +} + +// Int creates a new int-valued logging tag +func (k LogKey) Int(v int) logtag.Tag { + return logtag.NewInt(string(k), v) +} + +// String creates a new string-valued metrics tag +func (k MetricsKey) String(v string) metrics.Tag { + return mtag{k: string(k), v: v} +} diff --git a/common/telemetry/fx.go b/common/telemetry/fx.go new file mode 100644 index 00000000000..94db60c85a3 --- /dev/null +++ b/common/telemetry/fx.go @@ -0,0 +1,48 @@ +package telemetry + +import ( + "context" + + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel/trace" + "go.uber.org/fx" +) + +// GlobalModule holds process-global telemetry fx state. The following +// types can be overriden with fx.Replace/fx.Decorate: +// - go.opentelemetry.io/otel/sdk/trace.SpanExporter +var GlobalModule = fx.Module("io.temporal.telemetry.Global", fx.Provide(newExporter)) + +// ServiceModule holds per-service (i.e. frontend/history/matching/worker) fx +// state. The following types can be overriden with fx.Replace/fx.Decorate: +// - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option +// - go.opentelemetry.io/otel/sdk/trace.SpanProcessor +// - go.opentelemetry.io/otel/trace.TracerProvider +// - *go.opentelemetry.io/otel/sdk/resource.Resource +// - []go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/ogelgrpc.Option +// +// If the TracerProvider provided by this module supports graceful shutdown, +// that function is invoked when the module is stopped. +func ServiceModule(serviceName string) fx.Option { + grpcInstrumentationOpts := func(tp trace.TracerProvider) []otelgrpc.Option { + return []otelgrpc.Option{otelgrpc.WithTracerProvider(tp)} + } + + shutdownTracing := func(lc fx.Lifecycle, tp trace.TracerProvider) { + type shutdowner interface{ Shutdown(context.Context) error } + if shutdowner, ok := tp.(shutdowner); ok { + lc.Append(fx.Hook{OnStop: shutdowner.Shutdown}) + } + } + + return fx.Module( + serviceName, + fx.Supply(mustParseServiceName(serviceName)), + fx.Provide(grpcExporterOpts), + fx.Provide(newSpanProcessor), + fx.Provide(newResource), + fx.Provide(newTracerProvider), + fx.Provide(grpcInstrumentationOpts), + fx.Invoke(shutdownTracing), + ) +} diff --git a/common/telemetry/fx_test.go b/common/telemetry/fx_test.go new file mode 100644 index 00000000000..12e8d95064c --- /dev/null +++ b/common/telemetry/fx_test.go @@ -0,0 +1,55 @@ +package telemetry_test + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/sdk/trace/tracetest" + semconv "go.opentelemetry.io/otel/semconv/v1.10.0" + "go.opentelemetry.io/otel/trace" + "go.temporal.io/server/common/telemetry" + "go.uber.org/fx" +) + +const svcname = "io.temporal.some_service" + +type CapturingExporter struct { + *tracetest.InMemoryExporter +} + +// Shutdown hides the default Shutdown behavior of the InMemoryExporter +// which is to clear the in-memory set of captured spans. We don't want that. +func (ce CapturingExporter) Shutdown(context.Context) error { + return nil +} + +func TestServiceModule(t *testing.T) { + sink := CapturingExporter{tracetest.NewInMemoryExporter()} + fx.New( + telemetry.GlobalModule, + telemetry.ServiceModule(svcname), + + // replace batching otlp span exporter with a synchronous in-memory + // version of the same interfaces + fx.Replace(fx.Annotate( + sdktrace.NewSimpleSpanProcessor(sink), + fx.As(new(sdktrace.SpanProcessor)))), + + // create a single span and then shutdown + fx.Invoke(func(tp trace.TracerProvider, shutdowner fx.Shutdowner) { + _, span := tp.Tracer(t.Name()).Start(context.TODO(), "span0") + span.End() + shutdowner.Shutdown() + }), + ).Run() + + spans := sink.GetSpans() + require.Len(t, spans, 1) + require.Equal(t, "span0", spans[0].Name) + require.Contains(t, + spans[0].Resource.Attributes(), + semconv.ServiceNameKey.String(svcname)) + require.Equal(t, spans[0].InstrumentationLibrary.Name, t.Name()) +} diff --git a/common/telemetry/otel.go b/common/telemetry/otel.go new file mode 100644 index 00000000000..82caf8b8b73 --- /dev/null +++ b/common/telemetry/otel.go @@ -0,0 +1,81 @@ +package telemetry + +import ( + "context" + "errors" + "fmt" + "strings" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.10.0" + "go.opentelemetry.io/otel/trace" +) + +func init() { + // make this global - it doesn't vary by service + // intentionally omitting baggage propagation until we need it. + otel.SetTextMapPropagator(propagation.TraceContext{}) +} + +type serviceName string + +func (sn serviceName) String() string { + return string(sn) +} + +func mustParseServiceName(str string) serviceName { + result, err := parseServiceName(str) + if err != nil { + panic(err) + } + return result +} + +func parseServiceName(str string) (serviceName, error) { + if str == "" { + return "", errors.New("empty service name") + } + if !strings.HasPrefix(str, "io.temporal.") { + str = fmt.Sprintf("io.temporal.%v", str) + } + return serviceName(str), nil +} + +func grpcExporterOpts() []otlptracegrpc.Option { + return []otlptracegrpc.Option{ + // this is the same as the default - just here for clarity + otlptracegrpc.WithEndpoint("localhost:4317"), + otlptracegrpc.WithInsecure(), + } +} + +func newExporter(opts []otlptracegrpc.Option) (sdktrace.SpanExporter, error) { + return otlptrace.New(context.Background(), otlptracegrpc.NewClient(opts...)) +} + +func newTracerProvider( + resource *resource.Resource, + sp sdktrace.SpanProcessor, +) (trace.TracerProvider, error) { + return sdktrace.NewTracerProvider( + sdktrace.WithSpanProcessor(sp), + sdktrace.WithResource(resource), + ), nil +} + +func newSpanProcessor(exp sdktrace.SpanExporter) sdktrace.SpanProcessor { + return sdktrace.NewBatchSpanProcessor(exp) +} + +func newResource(serviceName serviceName) (*resource.Resource, error) { + return resource.Merge( + resource.Default(), + resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(serviceName.String()))) +} diff --git a/go.mod b/go.mod index 37c68d9f1e8..8120a3099e8 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,10 @@ require ( github.com/urfave/cli v1.22.5 github.com/urfave/cli/v2 v2.4.0 github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 go.opentelemetry.io/otel v1.7.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 go.opentelemetry.io/otel/exporters/prometheus v0.30.0 go.opentelemetry.io/otel/metric v0.30.0 go.opentelemetry.io/otel/sdk v1.7.0 @@ -75,6 +78,13 @@ require ( github.com/aws/aws-sdk-go-v2/service/sso v1.11.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.16.4 // indirect github.com/aws/smithy-go v1.11.2 // indirect + github.com/cenkalti/backoff/v4 v4.1.3 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect + go.opentelemetry.io/proto/otlp v0.16.0 // indirect +) + +require ( github.com/benbjohnson/clock v1.3.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect @@ -112,7 +122,7 @@ require ( github.com/twmb/murmur3 v1.1.6 // indirect github.com/uber-common/bark v1.3.0 // indirect go.opencensus.io v0.23.0 // indirect - go.opentelemetry.io/otel/trace v1.7.0 // indirect + go.opentelemetry.io/otel/trace v1.7.0 go.uber.org/dig v1.14.1 // indirect golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect diff --git a/go.sum b/go.sum index d8200a3e217..a29f15e231c 100644 --- a/go.sum +++ b/go.sum @@ -116,6 +116,8 @@ github.com/brianvoe/gofakeit/v6 v6.15.0/go.mod h1:Ow6qC71xtwm79anlwKRlWZW6zVq9D2 github.com/cactus/go-statsd-client/statsd v0.0.0-20191106001114-12b4e2b38748/go.mod h1:l/bIBLeOl9eX+wxJAzxS4TveKRtAqlyDpHjhkfO0MEI= github.com/cactus/go-statsd-client/statsd v0.0.0-20200423205355-cb0885a1018c h1:HIGF0r/56+7fuIZw2V4isE22MK6xpxWx7BbV8dJ290w= github.com/cactus/go-statsd-client/statsd v0.0.0-20200423205355-cb0885a1018c/go.mod h1:l/bIBLeOl9eX+wxJAzxS4TveKRtAqlyDpHjhkfO0MEI= +github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= +github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -196,6 +198,8 @@ github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY github.com/golang-jwt/jwt/v4 v4.4.1 h1:pC5DB52sCeK48Wlb9oPcdhnjkz1TKt1D/P7WKJ0kUcQ= github.com/golang-jwt/jwt/v4 v4.4.1/go.mod h1:m21LjoU+eqJr34lmDMbreY2eSTRJ1cv77w39/MY0Ch0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.0.0 h1:nfP3RFugxnNRyKgeWd4oI1nYvXpxrx8ck8ZrcizshdQ= +github.com/golang/glog v1.0.0/go.mod h1:EWib/APOK0SL3dFbYqvxE3UYd8E6s1ouQ7iEp/0LWV4= github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= @@ -288,6 +292,8 @@ github.com/googleapis/go-type-adapters v1.0.0/go.mod h1:zHW75FOG2aur7gAO2B+MLby+ github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 h1:+9834+KizmvFV7pXQGSXQTsaWhq2GjuNUt0aUU0YBYw= github.com/grpc-ecosystem/go-grpc-middleware v1.3.0/go.mod h1:z0ButlSOZa5vEBq9m2m2hlwIgKw+rp3sdCBRoJY+30Y= github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 h1:BZHcxBETFHIdVyhyEfOvn/RdU/QGdLI4y34qQGjGWO0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0/go.mod h1:hgWBS7lorOAVIJEQMi4ZsPv9hVvWI6+ch50m39Pf2Ks= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -466,8 +472,16 @@ go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk= go.opencensus.io v0.23.0 h1:gqCw0LfLxScz8irSi8exQc7fyQ0fKQU/qnC/X8+V/1M= go.opencensus.io v0.23.0/go.mod h1:XItmlyltB5F7CS4xOC1DcqMoFqwtC6OG2xF7mCv7P7E= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 h1:WenoaOMNP71oq3KkMZ/jnxI9xU/JSCLw8yZILSI2lfU= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0/go.mod h1:J0dBVrt7dPS/lKJyQoW0xzQiUr4r2Ik1VwPjAUWnofI= go.opentelemetry.io/otel v1.7.0 h1:Z2lA3Tdch0iDcrhJXDIlC94XE+bxok1F9B+4Lz/lGsM= go.opentelemetry.io/otel v1.7.0/go.mod h1:5BdUoMIz5WEs0vt0CUEMtSSaTSHBBVwrhnz7+nrD5xk= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 h1:7Yxsak1q4XrJ5y7XBnNwqWx9amMZvoidCctv62XOQ6Y= +go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0/go.mod h1:M1hVZHNxcbkAlcvrOMlpQ4YOO3Awf+4N2dxkZL3xm04= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 h1:cMDtmgJ5FpRvqx9x2Aq+Mm0O6K/zcUkH73SFz20TuBw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0/go.mod h1:ceUgdyfNv4h4gLxHR0WNfDiiVmZFodZhZSbOLhpxqXE= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 h1:MFAyzUPrTwLOwCi+cltN0ZVyy4phU41lwH+lyMyQTS4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0/go.mod h1:E+/KKhwOSw8yoPxSSuUHG6vKppkvhN+S1Jc7Nib3k3o= go.opentelemetry.io/otel/exporters/prometheus v0.30.0 h1:YXo5ZY5nofaEYMCMTTMaRH2cLDZB8+0UGuk5RwMfIo0= go.opentelemetry.io/otel/exporters/prometheus v0.30.0/go.mod h1:qN5feW+0/d661KDtJuATEmHtw5bKBK7NSvNEP927zSs= go.opentelemetry.io/otel/metric v0.30.0 h1:Hs8eQZ8aQgs0U49diZoaS6Uaxw3+bBE3lcMUKBFIk3c= @@ -479,6 +493,8 @@ go.opentelemetry.io/otel/sdk/metric v0.30.0/go.mod h1:8AKFRi5HyvTR0RRty3paN1aMC9 go.opentelemetry.io/otel/trace v1.7.0 h1:O37Iogk1lEkMRXewVtZ1BBTVn5JEp8GrJvP92bJqC6o= go.opentelemetry.io/otel/trace v1.7.0/go.mod h1:fzLSB9nqR2eXzxPXb2JW9IKE+ScyXA48yyE4TNvoHqU= go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI= +go.opentelemetry.io/proto/otlp v0.16.0 h1:WHzDWdXUvbc5bG2ObdrGfaNpQz7ft7QN9HHmJlbiB1E= +go.opentelemetry.io/proto/otlp v0.16.0/go.mod h1:H7XAot3MsfNsj7EXtrA2q5xSNQ10UqI405h3+duxN4U= go.temporal.io/api v1.8.0/go.mod h1:7m1ZOVUFi/54a5IMzMeELnvDy5sJwRfz11zi3Jrww8w= go.temporal.io/api v1.8.1-0.20220603192404-e65836719706 h1:9zrW4CMQUgBMx9IUZ0qE/HhRxZEugmgvFTXBZhIdlsw= go.temporal.io/api v1.8.1-0.20220603192404-e65836719706/go.mod h1:7m1ZOVUFi/54a5IMzMeELnvDy5sJwRfz11zi3Jrww8w= @@ -496,8 +512,9 @@ go.uber.org/dig v1.14.1 h1:fyakRgZDdi2F8FgwJJoRGangMSPTIxPSLGzR3Oh0/54= go.uber.org/dig v1.14.1/go.mod h1:52EKx/Vjdpz9EzeNcweC4YMsTrDdFn9mS/+Uw5ZnVTI= go.uber.org/fx v1.17.1 h1:S42dZ6Pok8hQ3jxKwo6ZMYcCgHQA/wAS/gnpRa1Pksg= go.uber.org/fx v1.17.1/go.mod h1:yO7KN5rhlARljyo4LR047AjaV6J+KFzd/Z7rnTbEn0A= -go.uber.org/goleak v1.1.11 h1:wy28qYRKZgnJTxGxvye5/wgWr1EKjmUDGYox5mGlRlI= go.uber.org/goleak v1.1.11/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= +go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= +go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= go.uber.org/multierr v1.3.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= go.uber.org/multierr v1.4.0/go.mod h1:VgVr7evmIr6uPjLBxg28wmKNXyqE9akIJ5XnfpiKl+4= @@ -951,6 +968,7 @@ google.golang.org/grpc v1.39.0/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnD google.golang.org/grpc v1.39.1/go.mod h1:PImNr+rS9TWYb2O4/emRugxiyHZ5JyHW5F+RPnDzfrE= google.golang.org/grpc v1.40.0/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= google.golang.org/grpc v1.40.1/go.mod h1:ogyxbiOoUXAkP+4+xa6PZSE9DZgIHtSpzjDTB9KAK34= +google.golang.org/grpc v1.42.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.44.0/go.mod h1:k+4IHHFw41K8+bbowsex27ge2rCb65oeWqe4jJ590SU= google.golang.org/grpc v1.45.0/go.mod h1:lN7owxKUQEqMfSyQikvvk5tf/6zMPsrK+ONuO11+0rQ= google.golang.org/grpc v1.46.0/go.mod h1:vN9eftEi1UMyUsIF80+uQXhHjbXYbm0uXoFCACuMGWk= diff --git a/service/frontend/fx.go b/service/frontend/fx.go index ed4885d4852..5f8bcf04ecc 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -61,6 +61,7 @@ import ( "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/frontend/configs" ) @@ -69,6 +70,8 @@ type FEReplicatorNamespaceReplicationQueue persistence.NamespaceReplicationQueue var Module = fx.Options( resource.Module, + telemetry.GlobalModule, + telemetry.ServiceModule(common.FrontendServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(ConfigProvider), fx.Provide(NamespaceLogInterceptorProvider), diff --git a/service/fx.go b/service/fx.go index 5e895f0d1aa..b2f0e019611 100644 --- a/service/fx.go +++ b/service/fx.go @@ -25,6 +25,7 @@ package service import ( + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "go.temporal.io/server/common" @@ -59,6 +60,7 @@ func PersistenceMaxQpsFn( func GrpcServerOptionsProvider( logger log.Logger, rpcFactory common.RPCFactory, + otelGrpcOpts []otelgrpc.Option, telemetryInterceptor *interceptor.TelemetryInterceptor, rateLimitInterceptor *interceptor.RateLimitInterceptor, ) []grpc.ServerOption { @@ -72,6 +74,7 @@ func GrpcServerOptionsProvider( grpcServerOptions, grpc.ChainUnaryInterceptor( rpc.ServiceErrorInterceptor, + otelgrpc.UnaryServerInterceptor(otelGrpcOpts...), metrics.NewServerMetricsContextInjectorInterceptor(), metrics.NewServerMetricsTrailerPropagatorInterceptor(logger), telemetryInterceptor.Intercept, diff --git a/service/history/consts/const.go b/service/history/consts/const.go index 28823340b14..5031f6002ba 100644 --- a/service/history/consts/const.go +++ b/service/history/consts/const.go @@ -36,6 +36,7 @@ import ( const ( IdentityHistoryService = "history-service" IdentityResetter = "history-resetter" + LibraryName = "go.temporal.io/service/history" ) var ( diff --git a/service/history/fx.go b/service/history/fx.go index f8b9b207434..ec7f8d4150e 100644 --- a/service/history/fx.go +++ b/service/history/fx.go @@ -50,6 +50,7 @@ import ( "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/history/configs" "go.temporal.io/server/service/history/events" @@ -62,6 +63,8 @@ var Module = fx.Options( resource.Module, workflow.Module, shard.Module, + telemetry.GlobalModule, + telemetry.ServiceModule(common.HistoryServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(ConfigProvider), // might be worth just using provider for configs.Config directly fx.Provide(TelemetryInterceptorProvider), @@ -131,6 +134,7 @@ func HandlerProvider(args NewHandlerArgs) *Handler { controller: args.ShardController, eventNotifier: args.EventNotifier, replicationTaskFetcherFactory: args.ReplicationTaskFetcherFactory, + tracer: args.TracerProvider.Tracer("go.temporal.io/service/history"), } // prevent us from trying to serve requests before shard controller is started and ready diff --git a/service/history/handler.go b/service/history/handler.go index 71a815fef18..f1b0ac6d166 100644 --- a/service/history/handler.go +++ b/service/history/handler.go @@ -33,6 +33,7 @@ import ( "sync/atomic" "github.com/pborman/uuid" + "go.opentelemetry.io/otel/trace" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" "go.temporal.io/api/serviceerror" @@ -98,6 +99,7 @@ type ( archivalMetadata archiver.ArchivalMetadata hostInfoProvider membership.HostInfoProvider controller *shard.ControllerImpl + tracer trace.Tracer } NewHandlerArgs struct { @@ -122,6 +124,7 @@ type ( ShardController *shard.ControllerImpl EventNotifier events.Notifier ReplicationTaskFetcherFactory replication.TaskFetcherFactory + TracerProvider trace.TracerProvider } ) diff --git a/service/history/historyEngine.go b/service/history/historyEngine.go index 93f9f0149e6..fe6c1c00b15 100644 --- a/service/history/historyEngine.go +++ b/service/history/historyEngine.go @@ -32,6 +32,7 @@ import ( "time" "github.com/pborman/uuid" + "go.opentelemetry.io/otel/trace" commonpb "go.temporal.io/api/common/v1" enumspb "go.temporal.io/api/enums/v1" historypb "go.temporal.io/api/history/v1" @@ -114,6 +115,7 @@ type ( workflowDeleteManager workflow.DeleteManager eventSerializer serialization.Serializer workflowConsistencyChecker api.WorkflowConsistencyChecker + tracer trace.Tracer } ) @@ -132,6 +134,7 @@ func NewEngineWithShardContext( queueProcessorFactories []queues.ProcessorFactory, replicationTaskFetcherFactory replication.TaskFetcherFactory, replicationTaskExecutorProvider replication.TaskExecutorProvider, + tracerProvider trace.TracerProvider, ) shard.Engine { currentClusterName := shard.GetClusterMetadata().GetCurrentClusterName() @@ -166,6 +169,7 @@ func NewEngineWithShardContext( workflowDeleteManager: workflowDeleteManager, eventSerializer: eventSerializer, workflowConsistencyChecker: api.NewWorkflowConsistencyChecker(shard, historyCache), + tracer: tracerProvider.Tracer(consts.LibraryName), } historyEngImpl.queueProcessors = make(map[tasks.Category]queues.Processor) diff --git a/service/history/historyEngineFactory.go b/service/history/historyEngineFactory.go index 0a6dd6d4ccd..2eb3b636a6d 100644 --- a/service/history/historyEngineFactory.go +++ b/service/history/historyEngineFactory.go @@ -25,6 +25,7 @@ package history import ( + "go.opentelemetry.io/otel/trace" "go.uber.org/fx" "go.temporal.io/server/client" @@ -56,6 +57,7 @@ type ( QueueProcessorFactories []queues.ProcessorFactory `group:"queueProcessorFactory"` ReplicationTaskFetcherFactory replication.TaskFetcherFactory ReplicationTaskExecutorProvider replication.TaskExecutorProvider + TracerProvider trace.TracerProvider } historyEngineFactory struct { @@ -80,5 +82,6 @@ func (f *historyEngineFactory) CreateEngine( f.QueueProcessorFactories, f.ReplicationTaskFetcherFactory, f.ReplicationTaskExecutorProvider, + f.TracerProvider, ) } diff --git a/service/history/shard/controller_impl.go b/service/history/shard/controller_impl.go index 9adf75b4a53..9cfb05f5633 100644 --- a/service/history/shard/controller_impl.go +++ b/service/history/shard/controller_impl.go @@ -31,6 +31,7 @@ import ( "sync/atomic" "time" + "go.opentelemetry.io/otel/trace" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/client" "go.temporal.io/server/common" @@ -84,6 +85,7 @@ type ( clusterMetadata cluster.Metadata archivalMetadata archiver.ArchivalMetadata hostInfoProvider membership.HostInfoProvider + tracer trace.Tracer } ) diff --git a/service/history/shard/fx.go b/service/history/shard/fx.go index 495b83c2d49..0223057f5cd 100644 --- a/service/history/shard/fx.go +++ b/service/history/shard/fx.go @@ -25,6 +25,7 @@ package shard import ( + "go.opentelemetry.io/otel/trace" "go.uber.org/fx" "go.temporal.io/server/api/historyservice/v1" @@ -42,6 +43,7 @@ import ( "go.temporal.io/server/common/resource" "go.temporal.io/server/common/searchattribute" "go.temporal.io/server/service/history/configs" + "go.temporal.io/server/service/history/consts" ) var Module = fx.Options( @@ -68,6 +70,7 @@ func ShardControllerProvider( archivalMetadata archiver.ArchivalMetadata, hostInfoProvider membership.HostInfoProvider, engineFactory EngineFactory, + tracerProvider trace.TracerProvider, ) *ControllerImpl { return &ControllerImpl{ status: common.DaemonStatusInitialized, @@ -95,5 +98,6 @@ func ShardControllerProvider( archivalMetadata: archivalMetadata, hostInfoProvider: hostInfoProvider, engineFactory: engineFactory, + tracer: tracerProvider.Tracer(consts.LibraryName), } } diff --git a/service/matching/fx.go b/service/matching/fx.go index 3988a938178..d4d9036f5f3 100644 --- a/service/matching/fx.go +++ b/service/matching/fx.go @@ -41,11 +41,14 @@ import ( persistenceClient "go.temporal.io/server/common/persistence/client" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/rpc/interceptor" + "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/matching/configs" ) var Module = fx.Options( + telemetry.GlobalModule, + telemetry.ServiceModule(common.MatchingServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(NewConfig), fx.Provide(PersistenceMaxQpsProvider), diff --git a/service/worker/fx.go b/service/worker/fx.go index 3b752520e31..222bc0abe78 100644 --- a/service/worker/fx.go +++ b/service/worker/fx.go @@ -41,6 +41,7 @@ import ( "go.temporal.io/server/common/resolver" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/worker/addsearchattributes" "go.temporal.io/server/service/worker/deletenamespace" @@ -50,6 +51,8 @@ import ( var Module = fx.Options( migration.Module, + telemetry.GlobalModule, + telemetry.ServiceModule(common.WorkerServiceName), addsearchattributes.Module, resource.Module, deletenamespace.Module, From 30a61ad57cabdc4aa96c8f2a30f31d4a9464b716 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 27 May 2022 13:29:58 -0400 Subject: [PATCH 02/27] Ignore .envrc for those using direnv --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 86d805d9d28..1fba4de9569 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ # Fossa .fossa.yml +# direnv +.envrc From 4597795b9271a2fc18fabd7917b7efd5e9da09d5 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 27 May 2022 13:30:29 -0400 Subject: [PATCH 03/27] Move OTEL fx code into temporal/fx.go --- common/telemetry/fx.go | 48 ---------------- common/telemetry/fx_test.go | 55 ------------------ common/telemetry/otel.go | 81 -------------------------- service/frontend/fx.go | 3 - service/history/fx.go | 6 +- service/matching/fx.go | 3 - service/worker/fx.go | 3 - temporal/fx.go | 111 ++++++++++++++++++++++++++++++++++++ 8 files changed, 113 insertions(+), 197 deletions(-) delete mode 100644 common/telemetry/fx.go delete mode 100644 common/telemetry/fx_test.go delete mode 100644 common/telemetry/otel.go diff --git a/common/telemetry/fx.go b/common/telemetry/fx.go deleted file mode 100644 index 94db60c85a3..00000000000 --- a/common/telemetry/fx.go +++ /dev/null @@ -1,48 +0,0 @@ -package telemetry - -import ( - "context" - - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" - "go.opentelemetry.io/otel/trace" - "go.uber.org/fx" -) - -// GlobalModule holds process-global telemetry fx state. The following -// types can be overriden with fx.Replace/fx.Decorate: -// - go.opentelemetry.io/otel/sdk/trace.SpanExporter -var GlobalModule = fx.Module("io.temporal.telemetry.Global", fx.Provide(newExporter)) - -// ServiceModule holds per-service (i.e. frontend/history/matching/worker) fx -// state. The following types can be overriden with fx.Replace/fx.Decorate: -// - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option -// - go.opentelemetry.io/otel/sdk/trace.SpanProcessor -// - go.opentelemetry.io/otel/trace.TracerProvider -// - *go.opentelemetry.io/otel/sdk/resource.Resource -// - []go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/ogelgrpc.Option -// -// If the TracerProvider provided by this module supports graceful shutdown, -// that function is invoked when the module is stopped. -func ServiceModule(serviceName string) fx.Option { - grpcInstrumentationOpts := func(tp trace.TracerProvider) []otelgrpc.Option { - return []otelgrpc.Option{otelgrpc.WithTracerProvider(tp)} - } - - shutdownTracing := func(lc fx.Lifecycle, tp trace.TracerProvider) { - type shutdowner interface{ Shutdown(context.Context) error } - if shutdowner, ok := tp.(shutdowner); ok { - lc.Append(fx.Hook{OnStop: shutdowner.Shutdown}) - } - } - - return fx.Module( - serviceName, - fx.Supply(mustParseServiceName(serviceName)), - fx.Provide(grpcExporterOpts), - fx.Provide(newSpanProcessor), - fx.Provide(newResource), - fx.Provide(newTracerProvider), - fx.Provide(grpcInstrumentationOpts), - fx.Invoke(shutdownTracing), - ) -} diff --git a/common/telemetry/fx_test.go b/common/telemetry/fx_test.go deleted file mode 100644 index 12e8d95064c..00000000000 --- a/common/telemetry/fx_test.go +++ /dev/null @@ -1,55 +0,0 @@ -package telemetry_test - -import ( - "context" - "testing" - - "github.com/stretchr/testify/require" - sdktrace "go.opentelemetry.io/otel/sdk/trace" - "go.opentelemetry.io/otel/sdk/trace/tracetest" - semconv "go.opentelemetry.io/otel/semconv/v1.10.0" - "go.opentelemetry.io/otel/trace" - "go.temporal.io/server/common/telemetry" - "go.uber.org/fx" -) - -const svcname = "io.temporal.some_service" - -type CapturingExporter struct { - *tracetest.InMemoryExporter -} - -// Shutdown hides the default Shutdown behavior of the InMemoryExporter -// which is to clear the in-memory set of captured spans. We don't want that. -func (ce CapturingExporter) Shutdown(context.Context) error { - return nil -} - -func TestServiceModule(t *testing.T) { - sink := CapturingExporter{tracetest.NewInMemoryExporter()} - fx.New( - telemetry.GlobalModule, - telemetry.ServiceModule(svcname), - - // replace batching otlp span exporter with a synchronous in-memory - // version of the same interfaces - fx.Replace(fx.Annotate( - sdktrace.NewSimpleSpanProcessor(sink), - fx.As(new(sdktrace.SpanProcessor)))), - - // create a single span and then shutdown - fx.Invoke(func(tp trace.TracerProvider, shutdowner fx.Shutdowner) { - _, span := tp.Tracer(t.Name()).Start(context.TODO(), "span0") - span.End() - shutdowner.Shutdown() - }), - ).Run() - - spans := sink.GetSpans() - require.Len(t, spans, 1) - require.Equal(t, "span0", spans[0].Name) - require.Contains(t, - spans[0].Resource.Attributes(), - semconv.ServiceNameKey.String(svcname)) - require.Equal(t, spans[0].InstrumentationLibrary.Name, t.Name()) -} diff --git a/common/telemetry/otel.go b/common/telemetry/otel.go deleted file mode 100644 index 82caf8b8b73..00000000000 --- a/common/telemetry/otel.go +++ /dev/null @@ -1,81 +0,0 @@ -package telemetry - -import ( - "context" - "errors" - "fmt" - "strings" - - "go.opentelemetry.io/otel" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" - "go.opentelemetry.io/otel/propagation" - "go.opentelemetry.io/otel/sdk/resource" - sdktrace "go.opentelemetry.io/otel/sdk/trace" - semconv "go.opentelemetry.io/otel/semconv/v1.10.0" - "go.opentelemetry.io/otel/trace" -) - -func init() { - // make this global - it doesn't vary by service - // intentionally omitting baggage propagation until we need it. - otel.SetTextMapPropagator(propagation.TraceContext{}) -} - -type serviceName string - -func (sn serviceName) String() string { - return string(sn) -} - -func mustParseServiceName(str string) serviceName { - result, err := parseServiceName(str) - if err != nil { - panic(err) - } - return result -} - -func parseServiceName(str string) (serviceName, error) { - if str == "" { - return "", errors.New("empty service name") - } - if !strings.HasPrefix(str, "io.temporal.") { - str = fmt.Sprintf("io.temporal.%v", str) - } - return serviceName(str), nil -} - -func grpcExporterOpts() []otlptracegrpc.Option { - return []otlptracegrpc.Option{ - // this is the same as the default - just here for clarity - otlptracegrpc.WithEndpoint("localhost:4317"), - otlptracegrpc.WithInsecure(), - } -} - -func newExporter(opts []otlptracegrpc.Option) (sdktrace.SpanExporter, error) { - return otlptrace.New(context.Background(), otlptracegrpc.NewClient(opts...)) -} - -func newTracerProvider( - resource *resource.Resource, - sp sdktrace.SpanProcessor, -) (trace.TracerProvider, error) { - return sdktrace.NewTracerProvider( - sdktrace.WithSpanProcessor(sp), - sdktrace.WithResource(resource), - ), nil -} - -func newSpanProcessor(exp sdktrace.SpanExporter) sdktrace.SpanProcessor { - return sdktrace.NewBatchSpanProcessor(exp) -} - -func newResource(serviceName serviceName) (*resource.Resource, error) { - return resource.Merge( - resource.Default(), - resource.NewWithAttributes( - semconv.SchemaURL, - semconv.ServiceNameKey.String(serviceName.String()))) -} diff --git a/service/frontend/fx.go b/service/frontend/fx.go index 5f8bcf04ecc..ed4885d4852 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -61,7 +61,6 @@ import ( "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" - "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/frontend/configs" ) @@ -70,8 +69,6 @@ type FEReplicatorNamespaceReplicationQueue persistence.NamespaceReplicationQueue var Module = fx.Options( resource.Module, - telemetry.GlobalModule, - telemetry.ServiceModule(common.FrontendServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(ConfigProvider), fx.Provide(NamespaceLogInterceptorProvider), diff --git a/service/history/fx.go b/service/history/fx.go index ec7f8d4150e..c345286eb95 100644 --- a/service/history/fx.go +++ b/service/history/fx.go @@ -50,9 +50,9 @@ import ( "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" - "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/history/configs" + "go.temporal.io/server/service/history/consts" "go.temporal.io/server/service/history/events" "go.temporal.io/server/service/history/shard" "go.temporal.io/server/service/history/workflow" @@ -63,8 +63,6 @@ var Module = fx.Options( resource.Module, workflow.Module, shard.Module, - telemetry.GlobalModule, - telemetry.ServiceModule(common.HistoryServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(ConfigProvider), // might be worth just using provider for configs.Config directly fx.Provide(TelemetryInterceptorProvider), @@ -134,7 +132,7 @@ func HandlerProvider(args NewHandlerArgs) *Handler { controller: args.ShardController, eventNotifier: args.EventNotifier, replicationTaskFetcherFactory: args.ReplicationTaskFetcherFactory, - tracer: args.TracerProvider.Tracer("go.temporal.io/service/history"), + tracer: args.TracerProvider.Tracer(consts.LibraryName), } // prevent us from trying to serve requests before shard controller is started and ready diff --git a/service/matching/fx.go b/service/matching/fx.go index d4d9036f5f3..3988a938178 100644 --- a/service/matching/fx.go +++ b/service/matching/fx.go @@ -41,14 +41,11 @@ import ( persistenceClient "go.temporal.io/server/common/persistence/client" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/rpc/interceptor" - "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/matching/configs" ) var Module = fx.Options( - telemetry.GlobalModule, - telemetry.ServiceModule(common.MatchingServiceName), fx.Provide(dynamicconfig.NewCollection), fx.Provide(NewConfig), fx.Provide(PersistenceMaxQpsProvider), diff --git a/service/worker/fx.go b/service/worker/fx.go index 222bc0abe78..3b752520e31 100644 --- a/service/worker/fx.go +++ b/service/worker/fx.go @@ -41,7 +41,6 @@ import ( "go.temporal.io/server/common/resolver" "go.temporal.io/server/common/resource" "go.temporal.io/server/common/searchattribute" - "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/worker/addsearchattributes" "go.temporal.io/server/service/worker/deletenamespace" @@ -51,8 +50,6 @@ import ( var Module = fx.Options( migration.Module, - telemetry.GlobalModule, - telemetry.ServiceModule(common.WorkerServiceName), addsearchattributes.Module, resource.Module, deletenamespace.Module, diff --git a/temporal/fx.go b/temporal/fx.go index 84f77233260..5c1cbf29d50 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -27,10 +27,20 @@ package temporal import ( "context" "fmt" + "os" + "strings" "time" "go.temporal.io/server/service/history/replication" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/propagation" + otelresource "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" "go.uber.org/fx" @@ -132,6 +142,7 @@ func NewServerFx(opts ...ServerOption) *ServerFx { ServerFxImplModule, fx.Supply(opts), fx.Provide(ServerOptionsProvider), + TraceExportModule, fx.Provide(PersistenceFactoryProvider), fx.Provide(HistoryServiceProvider), @@ -328,6 +339,7 @@ type ( Authorizer authorization.Authorizer ClaimMapper authorization.ClaimMapper DataStoreFactory persistenceClient.AbstractDataStoreFactory + SpanExporters []sdktrace.SpanExporter } ) @@ -373,6 +385,8 @@ func HistoryServiceProvider( fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), fx.Provide(workflow.NewTaskGeneratorProvider), + fx.Supply(params.SpanExporters), + ServiceTracingModule(serviceName), resource.DefaultOptions, history.QueueProcessorModule, history.Module, @@ -430,6 +444,8 @@ func MatchingServiceProvider( fx.Provide(func() metrics.Reporter { return params.ServerReporter }), fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), + fx.Supply(params.SpanExporters), + ServiceTracingModule(serviceName), resource.DefaultOptions, matching.Module, fx.NopLogger, @@ -486,6 +502,8 @@ func FrontendServiceProvider( fx.Provide(func() resource.NamespaceLogger { return params.NamespaceLogger }), fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), + fx.Supply(params.SpanExporters), + ServiceTracingModule(serviceName), resource.DefaultOptions, frontend.Module, fx.NopLogger, @@ -541,6 +559,8 @@ func WorkerServiceProvider( fx.Provide(func() metrics.Reporter { return params.ServerReporter }), fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), + fx.Supply(params.SpanExporters), + ServiceTracingModule(serviceName), resource.DefaultOptions, worker.Module, fx.NopLogger, @@ -762,3 +782,94 @@ func verifyPersistenceCompatibleVersion(config config.Persistence, persistenceSe } return nil } + +// TraceExportModule holds process-global telemetry fx state. The following +// types can be overriden/augmented with fx.Replace/fx.Decorate: +// +// - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option +// default: empty slice or oteltracegrpc.WithInsecure() depending on $OTEL_EXPORTER_OTLP_INSECURE +// - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Client +// - []go.opentelemetry.io/otel/sdk/trace.SpanExporter +var TraceExportModule = fx.Options( + // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.11.0/specification/protocol/exporter.md#configuration-options + // not yet supported by the Go OTEL sdk as of v1.7.0 so we'll do it manually here + fx.Provide(func() []otlptracegrpc.Option { + opts := []otlptracegrpc.Option{} + if boolstr := os.Getenv("OTEL_EXPORTER_OTLP_INSECURE"); boolstr == "true" { + opts = append(opts, otlptracegrpc.WithInsecure()) + } + return opts + }), + // Need this func to have the right type signature - fx will not inject a + // slice into a provider func defined with variadic parameters + fx.Provide(func(opts []otlptracegrpc.Option) otlptrace.Client { + return otlptracegrpc.NewClient(opts...) + }), + fx.Provide(func(lc fx.Lifecycle, c otlptrace.Client) []sdktrace.SpanExporter { + e := otlptrace.NewUnstarted(c) + lc.Append(fx.Hook{OnStart: e.Start, OnStop: e.Shutdown}) + return []sdktrace.SpanExporter{e} + }), +) + +// ServiceTracingModule holds per-service (i.e. frontend/history/matching/worker) fx +// state. The following types can be overriden with fx.Replace/fx.Decorate: +// +// - []go.opentelemetry.io/otel/sdk/trace.BatchSpanProcessorOption +// default: empty slice +// - []go.opentelemetry.io/otel/sdk/trace.SpanProcessor +// default: wrap each sdktrace.SpanExporter with sdktrace.NewBatchSpanProcessor +// - *go.opentelemetry.io/otel/sdk/resource.Resource +// default: resource.Default() augmented with the supplied serviceName +// - []go.opentelemetry.io/otel/sdk/trace.TracerProviderOption +// default: the provided resource.Resource and each of the sdktrace.SpanExporter +// - go.opentelemetry.io/otel/trace.TracerProvider +// default: sdktrace.NewTracerProvider with each of the sdktrace.TracerProviderOption +// - go.opentelemetry.io/otel/ppropagation.TextMapPropagator +// default: propagation.TraceContext{} +// - []go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/ogelgrpc.Option +// default: just otelgrpc.WithTracerProvider +func ServiceTracingModule(serviceName string) fx.Option { + if !strings.HasPrefix(serviceName, "io.temporal.") { + serviceName = fmt.Sprintf("io.temporal.%s", serviceName) + } + return fx.Options( + fx.Supply([]sdktrace.BatchSpanProcessorOption{}), + fx.Provide(func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { + sps := make([]sdktrace.SpanProcessor, 0, len(exps)) + for _, exp := range exps { + sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) + } + return sps + }), + fx.Provide(func() (*otelresource.Resource, error) { + return otelresource.Merge( + otelresource.Default(), + otelresource.NewSchemaless( + semconv.ServiceNameKey.String(serviceName), + // todo: semconv.ServiceInstanceIDKey.String("?"), + )) + }), + fx.Provide(func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { + opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) + opts = append(opts, sdktrace.WithResource(r)) + for _, sp := range sps { + opts = append(opts, sdktrace.WithSpanProcessor(sp)) + } + return opts + }), + fx.Provide(func(lc fx.Lifecycle, opts []sdktrace.TracerProviderOption) trace.TracerProvider { + tp := sdktrace.NewTracerProvider(opts...) + lc.Append(fx.Hook{OnStop: tp.Shutdown}) + return tp + }), + // Haven't had use for baggage propagation yet + fx.Provide(func() propagation.TextMapPropagator { return propagation.TraceContext{} }), + fx.Provide(func(tp trace.TracerProvider, tmp propagation.TextMapPropagator) []otelgrpc.Option { + return []otelgrpc.Option{ + otelgrpc.WithTracerProvider(tp), + otelgrpc.WithPropagators(tmp), + } + }), + ) +} From abce48d173553cc92dddb1505469676f393dd43d Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 27 May 2022 16:56:15 -0400 Subject: [PATCH 04/27] Put otelgrpc interceptor on frontend service --- service/frontend/fx.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/service/frontend/fx.go b/service/frontend/fx.go index ed4885d4852..633f0d95193 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -28,6 +28,7 @@ import ( "context" "net" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.uber.org/fx" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -140,6 +141,7 @@ func GrpcServerOptionsProvider( audienceGetter authorization.JWTAudienceMapper, customInterceptors []grpc.UnaryServerInterceptor, metricsClient metrics.Client, + otelGrpcOpts []otelgrpc.Option, ) []grpc.ServerOption { kep := keepalive.EnforcementPolicy{ MinTime: serviceConfig.KeepAliveMinTime(), @@ -159,6 +161,7 @@ func GrpcServerOptionsProvider( interceptors := []grpc.UnaryServerInterceptor{ namespaceLogInterceptor.Intercept, rpc.ServiceErrorInterceptor, + otelgrpc.UnaryServerInterceptor(otelGrpcOpts...), metrics.NewServerMetricsContextInjectorInterceptor(), telemetryInterceptor.Intercept, namespaceValidatorInterceptor.Intercept, From d2f34ee4e8e4d68c7da452fcb5d2ca074b89bed7 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 27 May 2022 16:56:46 -0400 Subject: [PATCH 05/27] The INSECURE property can be inferred from the URI scheme --- temporal/fx.go | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/temporal/fx.go b/temporal/fx.go index 5c1cbf29d50..9c06e27a667 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -27,7 +27,6 @@ package temporal import ( "context" "fmt" - "os" "strings" "time" @@ -787,24 +786,18 @@ func verifyPersistenceCompatibleVersion(config config.Persistence, persistenceSe // types can be overriden/augmented with fx.Replace/fx.Decorate: // // - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option -// default: empty slice or oteltracegrpc.WithInsecure() depending on $OTEL_EXPORTER_OTLP_INSECURE +// default: empty slice // - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Client // - []go.opentelemetry.io/otel/sdk/trace.SpanExporter var TraceExportModule = fx.Options( - // https://github.com/open-telemetry/opentelemetry-specification/blob/v1.11.0/specification/protocol/exporter.md#configuration-options - // not yet supported by the Go OTEL sdk as of v1.7.0 so we'll do it manually here - fx.Provide(func() []otlptracegrpc.Option { - opts := []otlptracegrpc.Option{} - if boolstr := os.Getenv("OTEL_EXPORTER_OTLP_INSECURE"); boolstr == "true" { - opts = append(opts, otlptracegrpc.WithInsecure()) - } - return opts - }), + fx.Supply([]otlptracegrpc.Option{}), + // Need this func to have the right type signature - fx will not inject a // slice into a provider func defined with variadic parameters fx.Provide(func(opts []otlptracegrpc.Option) otlptrace.Client { return otlptracegrpc.NewClient(opts...) }), + fx.Provide(func(lc fx.Lifecycle, c otlptrace.Client) []sdktrace.SpanExporter { e := otlptrace.NewUnstarted(c) lc.Append(fx.Hook{OnStart: e.Start, OnStop: e.Shutdown}) From ff390ff554003a62476dc4703a65430f4174c29b Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 31 May 2022 10:03:52 -0400 Subject: [PATCH 06/27] Find service name through DI rather than parameter Removes the serviceName parameter from the ServiceTrackingModule function (now just a var) as that piece of data can be read from the DI container as a dependency. Also inits the OTEL error logger to delegate to the server's Zap logger. --- temporal/fx.go | 113 ++++++++++++++++++++++++++++--------------------- 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/temporal/fx.go b/temporal/fx.go index 9c06e27a667..4b323b86e99 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -33,6 +33,7 @@ import ( "go.temporal.io/server/service/history/replication" "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/propagation" @@ -339,6 +340,7 @@ type ( ClaimMapper authorization.ClaimMapper DataStoreFactory persistenceClient.AbstractDataStoreFactory SpanExporters []sdktrace.SpanExporter + InstanceID resource.InstanceID `optional:"true"` } ) @@ -385,7 +387,7 @@ func HistoryServiceProvider( fx.Provide(params.PersistenceFactoryProvider), fx.Provide(workflow.NewTaskGeneratorProvider), fx.Supply(params.SpanExporters), - ServiceTracingModule(serviceName), + ServiceTracingModule, resource.DefaultOptions, history.QueueProcessorModule, history.Module, @@ -444,7 +446,7 @@ func MatchingServiceProvider( fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), fx.Supply(params.SpanExporters), - ServiceTracingModule(serviceName), + ServiceTracingModule, resource.DefaultOptions, matching.Module, fx.NopLogger, @@ -502,7 +504,7 @@ func FrontendServiceProvider( fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), fx.Supply(params.SpanExporters), - ServiceTracingModule(serviceName), + ServiceTracingModule, resource.DefaultOptions, frontend.Module, fx.NopLogger, @@ -559,7 +561,7 @@ func WorkerServiceProvider( fx.Provide(func() esclient.Client { return params.EsClient }), fx.Provide(params.PersistenceFactoryProvider), fx.Supply(params.SpanExporters), - ServiceTracingModule(serviceName), + ServiceTracingModule, resource.DefaultOptions, worker.Module, fx.NopLogger, @@ -782,7 +784,8 @@ func verifyPersistenceCompatibleVersion(config config.Persistence, persistenceSe return nil } -// TraceExportModule holds process-global telemetry fx state. The following +// TraceExportModule holds process-global telemetry fx state defining the set of +// OTEL trace/span exporters used by tracing instrumentation. The following // types can be overriden/augmented with fx.Replace/fx.Decorate: // // - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option @@ -792,6 +795,14 @@ func verifyPersistenceCompatibleVersion(config config.Persistence, persistenceSe var TraceExportModule = fx.Options( fx.Supply([]otlptracegrpc.Option{}), + fx.Invoke(func(log log.Logger) { + otel.SetErrorHandler(otel.ErrorHandlerFunc( + func(err error) { + log.Warn("OTEL error", tag.Error(err), tag.ErrorType(err)) + }), + ) + }), + // Need this func to have the right type signature - fx will not inject a // slice into a provider func defined with variadic parameters fx.Provide(func(opts []otlptracegrpc.Option) otlptrace.Client { @@ -822,47 +833,51 @@ var TraceExportModule = fx.Options( // default: propagation.TraceContext{} // - []go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/ogelgrpc.Option // default: just otelgrpc.WithTracerProvider -func ServiceTracingModule(serviceName string) fx.Option { - if !strings.HasPrefix(serviceName, "io.temporal.") { - serviceName = fmt.Sprintf("io.temporal.%s", serviceName) - } - return fx.Options( - fx.Supply([]sdktrace.BatchSpanProcessorOption{}), - fx.Provide(func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { - sps := make([]sdktrace.SpanProcessor, 0, len(exps)) - for _, exp := range exps { - sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) - } - return sps - }), - fx.Provide(func() (*otelresource.Resource, error) { - return otelresource.Merge( - otelresource.Default(), - otelresource.NewSchemaless( - semconv.ServiceNameKey.String(serviceName), - // todo: semconv.ServiceInstanceIDKey.String("?"), - )) - }), - fx.Provide(func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { - opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) - opts = append(opts, sdktrace.WithResource(r)) - for _, sp := range sps { - opts = append(opts, sdktrace.WithSpanProcessor(sp)) - } - return opts - }), - fx.Provide(func(lc fx.Lifecycle, opts []sdktrace.TracerProviderOption) trace.TracerProvider { - tp := sdktrace.NewTracerProvider(opts...) - lc.Append(fx.Hook{OnStop: tp.Shutdown}) - return tp - }), - // Haven't had use for baggage propagation yet - fx.Provide(func() propagation.TextMapPropagator { return propagation.TraceContext{} }), - fx.Provide(func(tp trace.TracerProvider, tmp propagation.TextMapPropagator) []otelgrpc.Option { - return []otelgrpc.Option{ - otelgrpc.WithTracerProvider(tp), - otelgrpc.WithPropagators(tmp), - } - }), - ) -} +var ServiceTracingModule = fx.Options( + fx.Supply([]sdktrace.BatchSpanProcessorOption{}), + fx.Provide(func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { + sps := make([]sdktrace.SpanProcessor, 0, len(exps)) + for _, exp := range exps { + sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) + } + return sps + }), + fx.Provide( + fx.Annotate( + func(rsn resource.ServiceName, rsi resource.InstanceID) (*otelresource.Resource, error) { + serviceName := string(rsn) + if !strings.HasPrefix(serviceName, "io.temporal.") { + serviceName = fmt.Sprintf("io.temporal.%s", serviceName) + } + return otelresource.Merge( + otelresource.Default(), + otelresource.NewSchemaless( + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceInstanceIDKey.String(string(rsi)), + )) + }, + fx.ParamTags(``, `optional:"true"`), + ), + ), + fx.Provide(func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { + opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) + opts = append(opts, sdktrace.WithResource(r)) + for _, sp := range sps { + opts = append(opts, sdktrace.WithSpanProcessor(sp)) + } + return opts + }), + fx.Provide(func(lc fx.Lifecycle, opts []sdktrace.TracerProviderOption) trace.TracerProvider { + tp := sdktrace.NewTracerProvider(opts...) + lc.Append(fx.Hook{OnStop: tp.Shutdown}) + return tp + }), + // Haven't had use for baggage propagation yet + fx.Provide(func() propagation.TextMapPropagator { return propagation.TraceContext{} }), + fx.Provide(func(tp trace.TracerProvider, tmp propagation.TextMapPropagator) []otelgrpc.Option { + return []otelgrpc.Option{ + otelgrpc.WithTracerProvider(tp), + otelgrpc.WithPropagators(tmp), + } + }), +) From 42cc3f5d1ad157096402c0075d54ea0dbefeb09b Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 31 May 2022 16:27:50 -0400 Subject: [PATCH 07/27] Use the resource info detectors OTEL provides Host and process attributes can be auto-detected. --- temporal/fx.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/temporal/fx.go b/temporal/fx.go index 4b323b86e99..f2696c34bb1 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -34,12 +34,13 @@ import ( "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/propagation" otelresource "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" - semconv "go.opentelemetry.io/otel/semconv/v1.4.0" + semconv "go.opentelemetry.io/otel/semconv/v1.10.0" "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" @@ -849,12 +850,19 @@ var ServiceTracingModule = fx.Options( if !strings.HasPrefix(serviceName, "io.temporal.") { serviceName = fmt.Sprintf("io.temporal.%s", serviceName) } - return otelresource.Merge( - otelresource.Default(), - otelresource.NewSchemaless( - semconv.ServiceNameKey.String(serviceName), - semconv.ServiceInstanceIDKey.String(string(rsi)), - )) + attrs := []attribute.KeyValue{ + semconv.ServiceNameKey.String(serviceName), + } + if rsi != "" { + attrs = append(attrs, semconv.ServiceInstanceIDKey.String(string(rsi))) + } + return otelresource.New(context.Background(), + otelresource.WithProcess(), + otelresource.WithOS(), + otelresource.WithHost(), + otelresource.WithContainer(), + otelresource.WithAttributes(attrs...), + ) }, fx.ParamTags(``, `optional:"true"`), ), From 25e1978a244bd8979e9033c259bc8846819d7cbb Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 14:45:10 -0400 Subject: [PATCH 08/27] Build otel exporter configuration from yaml --- common/config/config.go | 3 + common/telemetry/config.go | 246 +++++++++++++++++++++++++++++++++++ common/telemetry/grpc.go | 37 ++++++ common/telemetry/util.go | 43 ++++++ config/development-cass.yaml | 11 ++ go.mod | 2 +- service/frontend/fx.go | 6 +- service/fx.go | 6 +- temporal/fx.go | 42 +++--- 9 files changed, 364 insertions(+), 32 deletions(-) create mode 100644 common/telemetry/config.go create mode 100644 common/telemetry/grpc.go create mode 100644 common/telemetry/util.go diff --git a/common/config/config.go b/common/config/config.go index adb38371f28..28eab689542 100644 --- a/common/config/config.go +++ b/common/config/config.go @@ -38,6 +38,7 @@ import ( "go.temporal.io/server/common/masker" "go.temporal.io/server/common/metrics" "go.temporal.io/server/common/persistence/visibility/store/elasticsearch/client" + "go.temporal.io/server/common/telemetry" ) type ( @@ -64,6 +65,8 @@ type ( DynamicConfigClient *dynamicconfig.FileBasedClientConfig `yaml:"dynamicConfigClient"` // NamespaceDefaults is the default config for every namespace NamespaceDefaults NamespaceDefaults `yaml:"namespaceDefaults"` + // ExporterConfig allows the specification of process-wide OTEL exporters + ExporterConfig telemetry.ExportConfig `yaml:"otel"` } // Service contains the service specific config items diff --git a/common/telemetry/config.go b/common/telemetry/config.go new file mode 100644 index 00000000000..129ac7f2988 --- /dev/null +++ b/common/telemetry/config.go @@ -0,0 +1,246 @@ +package telemetry + +import ( + "context" + "fmt" + "strings" + "time" + + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + "google.golang.org/grpc" + "google.golang.org/grpc/backoff" + "google.golang.org/grpc/credentials/insecure" + "gopkg.in/yaml.v3" +) + +type ( + metadata struct { + Name string `yaml:"name"` + Labels map[string]string `yaml:"labels"` + } + + connection struct { + Kind string `yaml:"kind"` + Metadata metadata `yaml:"metadata"` + Spec interface{} `yaml:"-"` + } + + grpcconn struct { + Endpoint string `yaml:"endpoint"` + Block bool `yaml:"block"` + ConnectParams struct { + MinConnectTimeout time.Duration `yaml:"min_connect_timeout"` + Backoff struct { + BaseDelay time.Duration `yaml:"base_delay"` + Multiplier float64 `yaml:"multiplier"` + Jitter float64 `yaml:"jitter"` + MaxDelay time.Duration `yaml:"max_delay"` + } `yaml:"backoff"` + } `yaml:"connect_params"` + UserAgent string `yaml:"user_agent"` + ReadBufferSize int `yaml:"read_buffer_size"` + WriteBufferSize int `yaml:"write_buffer_size"` + Authority string `yaml:"authority"` + Insecure bool `yaml:"insecure"` + + cc *grpc.ClientConn + } + + exporter struct { + Kind struct { + Signal string `yaml:"signal"` + Model string `yaml:"model"` + Proto string `yaml:"proto"` + } `yaml:"kind"` + Metadata metadata `yaml:"metadata"` + Spec interface{} `yaml:"-"` + } + + otlpGrpcSpanExporter struct { + ConnectionName string `yaml:"connection_name"` + Connection grpcconn `yaml:"connection"` + Headers map[string]string `yaml:"headers"` + Timeout time.Duration `yaml:"timeout"` + Retry struct { + Enabled bool `yaml:"enabled"` + InitialInterval time.Duration `yaml:"initial_interval"` + MaxInterval time.Duration `yaml:"max_interval"` + MaxElapsedTime time.Duration `yaml:"max_elapsed_time"` + } `yaml:"retry"` + } + + exportConfig struct { + Connections []connection `yaml:"connections"` + Exporters []exporter `yaml:"exporters"` + } + + // ExportConfig represents YAML structured configuration for a set of OTEL + // trace/span/log exporters. + ExportConfig struct { + inner exportConfig `yaml:",inline"` + } +) + +func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error { + return n.Decode(&ec.inner) +} + +func (g *grpcconn) dial(ctx context.Context) (*grpc.ClientConn, error) { + if g.cc != nil { + return g.cc, nil + } + cc, err := grpc.DialContext(ctx, g.Endpoint, g.dialOpts()...) + if err != nil { + return nil, err + } + g.cc = cc + return cc, nil +} + +func (g *grpcconn) dialOpts() []grpc.DialOption { + out := []grpc.DialOption{ + grpc.WithReadBufferSize(valueOrDefault(g.ReadBufferSize, 32*1024)), + grpc.WithWriteBufferSize(valueOrDefault(g.WriteBufferSize, 32*1024)), + grpc.WithUserAgent(g.UserAgent), + grpc.WithConnectParams(grpc.ConnectParams{ + MinConnectTimeout: valueOrDefault(g.ConnectParams.MinConnectTimeout, 10*time.Second), + Backoff: backoff.Config{ + BaseDelay: valueOrDefault(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay), + MaxDelay: valueOrDefault(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay), + Jitter: valueOrDefault(g.ConnectParams.Backoff.Jitter, backoff.DefaultConfig.Jitter), + Multiplier: valueOrDefault(g.ConnectParams.Backoff.Multiplier, backoff.DefaultConfig.Multiplier), + }, + }), + } + if g.Insecure { + out = append(out, grpc.WithTransportCredentials(insecure.NewCredentials())) + } + if g.Block { + out = append(out, grpc.WithBlock()) + } + if g.Authority != "" { + out = append(out, grpc.WithAuthority(g.Authority)) + } + return out +} + +// SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML +// unmarshaled into this ExportConfig object. The returned SpanExporters have +// not been started. +func (c *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExporter, error) { + out := make([]sdktrace.SpanExporter, 0, len(c.inner.Exporters)) + for _, expcfg := range c.inner.Exporters { + if !strings.HasPrefix(expcfg.Kind.Signal, "trace") { + continue + } + switch spec := expcfg.Spec.(type) { + case *otlpGrpcSpanExporter: + spanexp, err := c.buildOtlpGrpcSpanExporter(ctx, spec) + if err != nil { + return nil, err + } + out = append(out, spanexp) + default: + return nil, fmt.Errorf("unsupported span exporter type: %T", spec) + } + } + return out, nil +} + +func (c *ExportConfig) buildOtlpGrpcSpanExporter( + ctx context.Context, + cfg *otlpGrpcSpanExporter, +) (sdktrace.SpanExporter, error) { + dopts := cfg.Connection.dialOpts() + opts := []otlptracegrpc.Option{ + otlptracegrpc.WithEndpoint(cfg.Connection.Endpoint), + otlptracegrpc.WithHeaders(cfg.Headers), + otlptracegrpc.WithTimeout(valueOrDefault(cfg.Timeout, 10*time.Second)), + otlptracegrpc.WithDialOption(dopts...), + otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ + Enabled: valueOrDefault(cfg.Retry.Enabled, true), + InitialInterval: valueOrDefault(cfg.Retry.InitialInterval, 5*time.Second), + MaxInterval: valueOrDefault(cfg.Retry.MaxInterval, 30*time.Second), + MaxElapsedTime: valueOrDefault(cfg.Retry.MaxElapsedTime, 1*time.Minute), + }), + } + + // work around https://github.com/open-telemetry/opentelemetry-go/issues/2940 + if cfg.Connection.Insecure { + opts = append(opts, otlptracegrpc.WithInsecure()) + } + + if cfg.ConnectionName == "" { + return otlptracegrpc.NewUnstarted(opts...), nil + } + + conncfg, ok := c.findNamedGrpcConnCfg(cfg.ConnectionName) + if !ok { + return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) + } + cc, err := conncfg.dial(ctx) + if err != nil { + return nil, err + } + opts = append(opts, otlptracegrpc.WithGRPCConn(cc)) + return otlptracegrpc.NewUnstarted(opts...), nil +} + +func (c *ExportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { + if name == "" { + return nil, false + } + for _, conn := range c.inner.Connections { + if gconn, ok := conn.Spec.(*grpcconn); ok && conn.Metadata.Name == name { + return gconn, true + } + } + return nil, false +} + +func (c *connection) UnmarshalYAML(n *yaml.Node) error { + type conn connection + type overlay struct { + *conn `yaml:",inline"` + Spec yaml.Node `yaml:"spec"` + } + obj := overlay{conn: (*conn)(c)} + err := n.Decode(&obj) + if err != nil { + return err + } + switch c.Kind { + case "grpc": + c.Spec = &grpcconn{} + default: + return fmt.Errorf("unsupported connection kind: %q", c.Kind) + } + return obj.Spec.Decode(c.Spec) +} + +func (e *exporter) UnmarshalYAML(n *yaml.Node) error { + type exp exporter + type overlay struct { + *exp `yaml:",inline"` + Spec yaml.Node `yaml:"spec"` + } + obj := overlay{exp: (*exp)(e)} + err := n.Decode(&obj) + if err != nil { + return err + } + descriptor := fmt.Sprintf("%v+%v+%v", e.Kind.Signal, e.Kind.Model, e.Kind.Proto) + switch descriptor { + case "traces+otlp+grpc", "trace+otlp+grpc": + e.Spec = new(otlpGrpcSpanExporter) + default: + return fmt.Errorf( + "unsupported exporter kind: signal=%q; model=%q; proto=%q", + e.Kind.Signal, + e.Kind.Model, + e.Kind.Proto, + ) + } + return obj.Spec.Decode(e.Spec) +} diff --git a/common/telemetry/grpc.go b/common/telemetry/grpc.go new file mode 100644 index 00000000000..bae094a8397 --- /dev/null +++ b/common/telemetry/grpc.go @@ -0,0 +1,37 @@ +package telemetry + +import ( + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "go.opentelemetry.io/otel/propagation" + "go.opentelemetry.io/otel/trace" + "google.golang.org/grpc" +) + +type ( + ServerTraceInterceptor grpc.UnaryServerInterceptor + ClientTraceInterceptor grpc.UnaryClientInterceptor +) + +func NewServerTraceInterceptor( + tp trace.TracerProvider, + tmp propagation.TextMapPropagator, +) ServerTraceInterceptor { + return ServerTraceInterceptor( + otelgrpc.UnaryServerInterceptor( + otelgrpc.WithPropagators(tmp), + otelgrpc.WithTracerProvider(tp), + ), + ) +} + +func NewClientTraceInterceptor( + tp trace.TracerProvider, + tmp propagation.TextMapPropagator, +) ClientTraceInterceptor { + return ClientTraceInterceptor( + otelgrpc.UnaryClientInterceptor( + otelgrpc.WithPropagators(tmp), + otelgrpc.WithTracerProvider(tp), + ), + ) +} diff --git a/common/telemetry/util.go b/common/telemetry/util.go new file mode 100644 index 00000000000..bf1f162b6ff --- /dev/null +++ b/common/telemetry/util.go @@ -0,0 +1,43 @@ +package telemetry + +import ( + "context" + + sdktrace "go.opentelemetry.io/otel/sdk/trace" +) + +type starter interface{ Start(context.Context) error } + +func Starter(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { + return func(ctx context.Context) error { + for _, e := range exporters { + if starter, ok := e.(starter); ok { + err := starter.Start(ctx) + if err != nil { + return err + } + } + } + return nil + } +} + +func Stopper(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { + return func(ctx context.Context) error { + for _, e := range exporters { + err := e.Shutdown(ctx) + if err != nil { + return err + } + } + return nil + } +} + +func valueOrDefault[T comparable](v, defval T) T { + var zero T + if v == zero { + return defval + } + return v +} diff --git a/config/development-cass.yaml b/config/development-cass.yaml index 43710853b5d..9c25a67b89e 100644 --- a/config/development-cass.yaml +++ b/config/development-cass.yaml @@ -1,3 +1,14 @@ +otel: + exporters: + - kind: + signal: traces + model: otlp + proto: grpc + spec: + connection: + insecure: true + endpoint: localhost:4317 + log: stdout: true level: info diff --git a/go.mod b/go.mod index 8120a3099e8..c11d8dd6d60 100644 --- a/go.mod +++ b/go.mod @@ -39,7 +39,6 @@ require ( github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 go.opentelemetry.io/otel v1.7.0 - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 go.opentelemetry.io/otel/exporters/prometheus v0.30.0 go.opentelemetry.io/otel/metric v0.30.0 @@ -81,6 +80,7 @@ require ( github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect go.opentelemetry.io/proto/otlp v0.16.0 // indirect ) diff --git a/service/frontend/fx.go b/service/frontend/fx.go index 633f0d95193..a7ea0f841eb 100644 --- a/service/frontend/fx.go +++ b/service/frontend/fx.go @@ -28,7 +28,6 @@ import ( "context" "net" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.uber.org/fx" "google.golang.org/grpc" "google.golang.org/grpc/health" @@ -62,6 +61,7 @@ import ( "go.temporal.io/server/common/rpc/interceptor" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/telemetry" "go.temporal.io/server/service" "go.temporal.io/server/service/frontend/configs" ) @@ -135,13 +135,13 @@ func GrpcServerOptionsProvider( namespaceValidatorInterceptor *interceptor.NamespaceValidatorInterceptor, telemetryInterceptor *interceptor.TelemetryInterceptor, rateLimitInterceptor *interceptor.RateLimitInterceptor, + traceInterceptor telemetry.ServerTraceInterceptor, sdkVersionInterceptor *interceptor.SDKVersionInterceptor, authorizer authorization.Authorizer, claimMapper authorization.ClaimMapper, audienceGetter authorization.JWTAudienceMapper, customInterceptors []grpc.UnaryServerInterceptor, metricsClient metrics.Client, - otelGrpcOpts []otelgrpc.Option, ) []grpc.ServerOption { kep := keepalive.EnforcementPolicy{ MinTime: serviceConfig.KeepAliveMinTime(), @@ -161,7 +161,7 @@ func GrpcServerOptionsProvider( interceptors := []grpc.UnaryServerInterceptor{ namespaceLogInterceptor.Intercept, rpc.ServiceErrorInterceptor, - otelgrpc.UnaryServerInterceptor(otelGrpcOpts...), + grpc.UnaryServerInterceptor(traceInterceptor), metrics.NewServerMetricsContextInjectorInterceptor(), telemetryInterceptor.Intercept, namespaceValidatorInterceptor.Intercept, diff --git a/service/fx.go b/service/fx.go index b2f0e019611..dbc27ebafd4 100644 --- a/service/fx.go +++ b/service/fx.go @@ -25,7 +25,6 @@ package service import ( - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "google.golang.org/grpc" "go.temporal.io/server/common" @@ -36,6 +35,7 @@ import ( persistenceClient "go.temporal.io/server/common/persistence/client" "go.temporal.io/server/common/rpc" "go.temporal.io/server/common/rpc/interceptor" + "go.temporal.io/server/common/telemetry" ) func PersistenceMaxQpsFn( @@ -60,9 +60,9 @@ func PersistenceMaxQpsFn( func GrpcServerOptionsProvider( logger log.Logger, rpcFactory common.RPCFactory, - otelGrpcOpts []otelgrpc.Option, telemetryInterceptor *interceptor.TelemetryInterceptor, rateLimitInterceptor *interceptor.RateLimitInterceptor, + tracingInterceptor telemetry.ServerTraceInterceptor, ) []grpc.ServerOption { grpcServerOptions, err := rpcFactory.GetInternodeGRPCServerOptions() @@ -74,7 +74,7 @@ func GrpcServerOptionsProvider( grpcServerOptions, grpc.ChainUnaryInterceptor( rpc.ServiceErrorInterceptor, - otelgrpc.UnaryServerInterceptor(otelGrpcOpts...), + grpc.UnaryServerInterceptor(tracingInterceptor), metrics.NewServerMetricsContextInjectorInterceptor(), metrics.NewServerMetricsTrailerPropagatorInterceptor(logger), telemetryInterceptor.Intercept, diff --git a/temporal/fx.go b/temporal/fx.go index f2696c34bb1..b90040eb311 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -35,8 +35,6 @@ import ( "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace" - "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/propagation" otelresource "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" @@ -49,7 +47,9 @@ import ( "github.com/pborman/uuid" "go.temporal.io/server/common/collection" + "go.temporal.io/server/common/headers" "go.temporal.io/server/common/resource" + "go.temporal.io/server/common/telemetry" persistencespb "go.temporal.io/server/api/persistence/v1" "go.temporal.io/server/client" @@ -789,13 +789,8 @@ func verifyPersistenceCompatibleVersion(config config.Persistence, persistenceSe // OTEL trace/span exporters used by tracing instrumentation. The following // types can be overriden/augmented with fx.Replace/fx.Decorate: // -// - []go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Option -// default: empty slice -// - go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc.Client // - []go.opentelemetry.io/otel/sdk/trace.SpanExporter var TraceExportModule = fx.Options( - fx.Supply([]otlptracegrpc.Option{}), - fx.Invoke(func(log log.Logger) { otel.SetErrorHandler(otel.ErrorHandlerFunc( func(err error) { @@ -804,16 +799,16 @@ var TraceExportModule = fx.Options( ) }), - // Need this func to have the right type signature - fx will not inject a - // slice into a provider func defined with variadic parameters - fx.Provide(func(opts []otlptracegrpc.Option) otlptrace.Client { - return otlptracegrpc.NewClient(opts...) - }), - - fx.Provide(func(lc fx.Lifecycle, c otlptrace.Client) []sdktrace.SpanExporter { - e := otlptrace.NewUnstarted(c) - lc.Append(fx.Hook{OnStart: e.Start, OnStop: e.Shutdown}) - return []sdktrace.SpanExporter{e} + fx.Provide(func(lc fx.Lifecycle, c *config.Config) ([]sdktrace.SpanExporter, error) { + exporters, err := c.ExporterConfig.SpanExporters(context.Background()) + if err != nil { + return nil, err + } + lc.Append(fx.Hook{ + OnStart: telemetry.Starter(exporters), + OnStop: telemetry.Stopper(exporters), + }) + return exporters, nil }), ) @@ -832,8 +827,8 @@ var TraceExportModule = fx.Options( // default: sdktrace.NewTracerProvider with each of the sdktrace.TracerProviderOption // - go.opentelemetry.io/otel/ppropagation.TextMapPropagator // default: propagation.TraceContext{} -// - []go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/ogelgrpc.Option -// default: just otelgrpc.WithTracerProvider +// - telemetry.ServerTraceInterceptor +// - telemetry.ClientTraceInterceptor var ServiceTracingModule = fx.Options( fx.Supply([]sdktrace.BatchSpanProcessorOption{}), fx.Provide(func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { @@ -852,6 +847,7 @@ var ServiceTracingModule = fx.Options( } attrs := []attribute.KeyValue{ semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(headers.ServerVersion), } if rsi != "" { attrs = append(attrs, semconv.ServiceInstanceIDKey.String(string(rsi))) @@ -882,10 +878,6 @@ var ServiceTracingModule = fx.Options( }), // Haven't had use for baggage propagation yet fx.Provide(func() propagation.TextMapPropagator { return propagation.TraceContext{} }), - fx.Provide(func(tp trace.TracerProvider, tmp propagation.TextMapPropagator) []otelgrpc.Option { - return []otelgrpc.Option{ - otelgrpc.WithTracerProvider(tp), - otelgrpc.WithPropagators(tmp), - } - }), + fx.Provide(telemetry.NewServerTraceInterceptor), + fx.Provide(telemetry.NewClientTraceInterceptor), ) From b1fedf94facfee385b15f43c2056028e8f0c9f29 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 15:37:48 -0400 Subject: [PATCH 09/27] rebase fixups --- temporal/fx.go | 1 - 1 file changed, 1 deletion(-) diff --git a/temporal/fx.go b/temporal/fx.go index b90040eb311..7524f7210f0 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -32,7 +32,6 @@ import ( "go.temporal.io/server/service/history/replication" - "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/propagation" From 06e63eead09bfae9b19b8bb5bacd6e362a109a95 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 16:10:53 -0400 Subject: [PATCH 10/27] Lift some literals up into constants --- common/telemetry/config.go | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 129ac7f2988..2b9c8e8789e 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -14,6 +14,15 @@ import ( "gopkg.in/yaml.v3" ) +const ( + // these defaults were taken from the grpc docs as of grpc v1.46. + // they are not available programatically + + defaultReadBufferSize = 32 * 1024 + defaultWriteBufferSize = 32 * 1024 + defaultMinConnectTimeout = 10 * time.Second +) + type ( metadata struct { Name string `yaml:"name"` @@ -100,11 +109,11 @@ func (g *grpcconn) dial(ctx context.Context) (*grpc.ClientConn, error) { func (g *grpcconn) dialOpts() []grpc.DialOption { out := []grpc.DialOption{ - grpc.WithReadBufferSize(valueOrDefault(g.ReadBufferSize, 32*1024)), - grpc.WithWriteBufferSize(valueOrDefault(g.WriteBufferSize, 32*1024)), + grpc.WithReadBufferSize(valueOrDefault(g.ReadBufferSize, defaultReadBufferSize)), + grpc.WithWriteBufferSize(valueOrDefault(g.WriteBufferSize, defaultWriteBufferSize)), grpc.WithUserAgent(g.UserAgent), grpc.WithConnectParams(grpc.ConnectParams{ - MinConnectTimeout: valueOrDefault(g.ConnectParams.MinConnectTimeout, 10*time.Second), + MinConnectTimeout: valueOrDefault(g.ConnectParams.MinConnectTimeout, defaultMinConnectTimeout), Backoff: backoff.Config{ BaseDelay: valueOrDefault(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay), MaxDelay: valueOrDefault(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay), From bac0b2ffcda5ec305c2dbd4f757c3949893fd90b Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 16:12:19 -0400 Subject: [PATCH 11/27] License comments --- common/telemetry/config.go | 24 ++++++++++++++++++++++++ common/telemetry/grpc.go | 24 ++++++++++++++++++++++++ common/telemetry/util.go | 24 ++++++++++++++++++++++++ 3 files changed, 72 insertions(+) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 2b9c8e8789e..6597ce080e1 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -1,3 +1,27 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry import ( diff --git a/common/telemetry/grpc.go b/common/telemetry/grpc.go index bae094a8397..2d1bcc7d064 100644 --- a/common/telemetry/grpc.go +++ b/common/telemetry/grpc.go @@ -1,3 +1,27 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry import ( diff --git a/common/telemetry/util.go b/common/telemetry/util.go index bf1f162b6ff..e8b7fbb2290 100644 --- a/common/telemetry/util.go +++ b/common/telemetry/util.go @@ -1,3 +1,27 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry import ( From aabe9a81fdd2371957cc04020112523714be0821 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 20:18:49 -0400 Subject: [PATCH 12/27] Comments! --- common/telemetry/config.go | 8 +++++ common/telemetry/grpc.go | 11 +++++++ common/telemetry/util.go | 67 -------------------------------------- temporal/fx.go | 31 ++++++++++++++++-- 4 files changed, 48 insertions(+), 69 deletions(-) delete mode 100644 common/telemetry/util.go diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 6597ce080e1..37949143afb 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -277,3 +277,11 @@ func (e *exporter) UnmarshalYAML(n *yaml.Node) error { } return obj.Spec.Decode(e.Spec) } + +func valueOrDefault[T comparable](v, defval T) T { + var zero T + if v == zero { + return defval + } + return v +} diff --git a/common/telemetry/grpc.go b/common/telemetry/grpc.go index 2d1bcc7d064..c0456c074b1 100644 --- a/common/telemetry/grpc.go +++ b/common/telemetry/grpc.go @@ -32,10 +32,18 @@ import ( ) type ( + // ServerTraceInterceptor gives a named type to the + // grpc.UnaryServerInterceptor implementation provided by otelgrpc ServerTraceInterceptor grpc.UnaryServerInterceptor + + // ClientTraceInterceptor gives a named type to the + // grpc.UnaryClientInterceptor implementation provided by otelgrpc ClientTraceInterceptor grpc.UnaryClientInterceptor ) +// NewServerTraceInterceptor creates a new gRPC server interceptor that tracks +// each request with an encapsulating span using the provided TracerProvider and +// TextMapPropagator. func NewServerTraceInterceptor( tp trace.TracerProvider, tmp propagation.TextMapPropagator, @@ -48,6 +56,9 @@ func NewServerTraceInterceptor( ) } +// NewClientTraceInterceptor creates a new gRPC client interceptor that tracks +// each request with an encapsulating span using the provided TracerProvider and +// TextMapPropagator. func NewClientTraceInterceptor( tp trace.TracerProvider, tmp propagation.TextMapPropagator, diff --git a/common/telemetry/util.go b/common/telemetry/util.go deleted file mode 100644 index e8b7fbb2290..00000000000 --- a/common/telemetry/util.go +++ /dev/null @@ -1,67 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package telemetry - -import ( - "context" - - sdktrace "go.opentelemetry.io/otel/sdk/trace" -) - -type starter interface{ Start(context.Context) error } - -func Starter(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { - return func(ctx context.Context) error { - for _, e := range exporters { - if starter, ok := e.(starter); ok { - err := starter.Start(ctx) - if err != nil { - return err - } - } - } - return nil - } -} - -func Stopper(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { - return func(ctx context.Context) error { - for _, e := range exporters { - err := e.Shutdown(ctx) - if err != nil { - return err - } - } - return nil - } -} - -func valueOrDefault[T comparable](v, defval T) T { - var zero T - if v == zero { - return defval - } - return v -} diff --git a/temporal/fx.go b/temporal/fx.go index 7524f7210f0..f048bd9205f 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -804,8 +804,8 @@ var TraceExportModule = fx.Options( return nil, err } lc.Append(fx.Hook{ - OnStart: telemetry.Starter(exporters), - OnStop: telemetry.Stopper(exporters), + OnStart: startAll(exporters), + OnStop: shutdownAll(exporters), }) return exporters, nil }), @@ -880,3 +880,30 @@ var ServiceTracingModule = fx.Options( fx.Provide(telemetry.NewServerTraceInterceptor), fx.Provide(telemetry.NewClientTraceInterceptor), ) + +func startAll(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { + type starter interface{ Start(context.Context) error } + return func(ctx context.Context) error { + for _, e := range exporters { + if starter, ok := e.(starter); ok { + err := starter.Start(ctx) + if err != nil { + return err + } + } + } + return nil + } +} + +func shutdownAll(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { + return func(ctx context.Context) error { + for _, e := range exporters { + err := e.Shutdown(ctx) + if err != nil { + return err + } + } + return nil + } +} From 341112ae393f07973705c88d15bbc8385ad0c615 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 20:26:31 -0400 Subject: [PATCH 13/27] License header --- common/telemetry/attribute.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/common/telemetry/attribute.go b/common/telemetry/attribute.go index 1270943e2d3..0f89a31bd4b 100644 --- a/common/telemetry/attribute.go +++ b/common/telemetry/attribute.go @@ -1,3 +1,27 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry import ( From 1d87471cff98181953fde91da9fc5ec28052638e Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Fri, 3 Jun 2022 23:55:11 -0400 Subject: [PATCH 14/27] Unit tests --- common/telemetry/config.go | 48 +++++++++++----------- common/telemetry/config_test.go | 70 +++++++++++++++++++++++++++++++++ common/telemetry/export_test.go | 6 +++ 3 files changed, 100 insertions(+), 24 deletions(-) create mode 100644 common/telemetry/config_test.go create mode 100644 common/telemetry/export_test.go diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 37949143afb..7ba37c14211 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -49,63 +49,63 @@ const ( type ( metadata struct { - Name string `yaml:"name"` - Labels map[string]string `yaml:"labels"` + Name string + Labels map[string]string } connection struct { - Kind string `yaml:"kind"` - Metadata metadata `yaml:"metadata"` + Kind string + Metadata metadata Spec interface{} `yaml:"-"` } grpcconn struct { - Endpoint string `yaml:"endpoint"` - Block bool `yaml:"block"` + Endpoint string + Block bool ConnectParams struct { MinConnectTimeout time.Duration `yaml:"min_connect_timeout"` Backoff struct { BaseDelay time.Duration `yaml:"base_delay"` - Multiplier float64 `yaml:"multiplier"` - Jitter float64 `yaml:"jitter"` + Multiplier float64 + Jitter float64 MaxDelay time.Duration `yaml:"max_delay"` - } `yaml:"backoff"` + } } `yaml:"connect_params"` UserAgent string `yaml:"user_agent"` ReadBufferSize int `yaml:"read_buffer_size"` WriteBufferSize int `yaml:"write_buffer_size"` - Authority string `yaml:"authority"` - Insecure bool `yaml:"insecure"` + Authority string + Insecure bool cc *grpc.ClientConn } exporter struct { Kind struct { - Signal string `yaml:"signal"` - Model string `yaml:"model"` - Proto string `yaml:"proto"` - } `yaml:"kind"` - Metadata metadata `yaml:"metadata"` + Signal string + Model string + Proto string + } + Metadata metadata Spec interface{} `yaml:"-"` } otlpGrpcSpanExporter struct { - ConnectionName string `yaml:"connection_name"` - Connection grpcconn `yaml:"connection"` - Headers map[string]string `yaml:"headers"` - Timeout time.Duration `yaml:"timeout"` + ConnectionName string `yaml:"connection_name"` + Connection grpcconn + Headers map[string]string + Timeout time.Duration Retry struct { - Enabled bool `yaml:"enabled"` + Enabled bool InitialInterval time.Duration `yaml:"initial_interval"` MaxInterval time.Duration `yaml:"max_interval"` MaxElapsedTime time.Duration `yaml:"max_elapsed_time"` - } `yaml:"retry"` + } } exportConfig struct { - Connections []connection `yaml:"connections"` - Exporters []exporter `yaml:"exporters"` + Connections []connection + Exporters []exporter } // ExportConfig represents YAML structured configuration for a set of OTEL diff --git a/common/telemetry/config_test.go b/common/telemetry/config_test.go new file mode 100644 index 00000000000..2a9428f0d38 --- /dev/null +++ b/common/telemetry/config_test.go @@ -0,0 +1,70 @@ +package telemetry_test + +import ( + "context" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/require" + "go.temporal.io/server/common/telemetry" + "gopkg.in/yaml.v3" +) + +var basicOTLPTraceOnlyConfig = ` +exporters: + - kind: + signal: traces + model: otlp + proto: grpc + spec: + headers: + a: b + c: d + timeout: 10s + retry: + enabled: true + initial_interval: 1s + max_interval: 1s + max_elapsed_time: 1s + connection: + block: false + insecure: true + endpoint: localhost:4317 +` + +func TestEmptyConfig(t *testing.T) { + cfg := telemetry.ExportConfig{} + exporters, err := cfg.SpanExporters(context.TODO()) + require.NoError(t, err) + require.Len(t, exporters, 0) +} + +func TestOTLPTraceGRPC(t *testing.T) { + cfg := telemetry.PrivateExportConfig{} + d := yaml.NewDecoder(strings.NewReader(basicOTLPTraceOnlyConfig)) + err := d.Decode(&cfg) + require.NoError(t, err) + require.Len(t, cfg.Connections, 0) + require.Len(t, cfg.Exporters, 1) + + exp := cfg.Exporters[0] + require.Equal(t, exp.Kind.Signal, "traces") + require.Equal(t, exp.Kind.Model, "otlp") + require.Equal(t, exp.Kind.Proto, "grpc") + require.NotNil(t, exp.Spec) + + spec, ok := exp.Spec.(*telemetry.OTLPGRPCSpanExporter) + require.True(t, ok) + require.Equal(t, map[string]string{"a": "b", "c": "d"}, spec.Headers) + require.Equal(t, 10*time.Second, spec.Timeout) + require.True(t, spec.Retry.Enabled) + require.Equal(t, time.Second, spec.Retry.InitialInterval) + require.Equal(t, time.Second, spec.Retry.MaxInterval) + require.Equal(t, time.Second, spec.Retry.MaxElapsedTime) + + conn := spec.Connection + require.True(t, conn.Insecure) + require.Equal(t, "localhost:4317", conn.Endpoint) + require.False(t, conn.Block) +} diff --git a/common/telemetry/export_test.go b/common/telemetry/export_test.go new file mode 100644 index 00000000000..72dc05769ab --- /dev/null +++ b/common/telemetry/export_test.go @@ -0,0 +1,6 @@ +package telemetry + +type ( + OTLPGRPCSpanExporter = otlpGrpcSpanExporter + PrivateExportConfig = exportConfig +) From f858b3421c033bf5eed666788e9cb9af8e034cca Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Sat, 4 Jun 2022 13:31:23 -0400 Subject: [PATCH 15/27] Comments and licenses for linters --- common/telemetry/config.go | 19 +++++++++++-------- common/telemetry/config_test.go | 28 +++++++++++++++++++++++++--- common/telemetry/export_test.go | 24 ++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 11 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 7ba37c14211..cfb6c09be8f 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -115,6 +115,7 @@ type ( } ) +// UnmarshalYAML loads the state of an ExportConfig from parsed YAML func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error { return n.Decode(&ec.inner) } @@ -161,15 +162,15 @@ func (g *grpcconn) dialOpts() []grpc.DialOption { // SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML // unmarshaled into this ExportConfig object. The returned SpanExporters have // not been started. -func (c *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExporter, error) { - out := make([]sdktrace.SpanExporter, 0, len(c.inner.Exporters)) - for _, expcfg := range c.inner.Exporters { +func (ec *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExporter, error) { + out := make([]sdktrace.SpanExporter, 0, len(ec.inner.Exporters)) + for _, expcfg := range ec.inner.Exporters { if !strings.HasPrefix(expcfg.Kind.Signal, "trace") { continue } switch spec := expcfg.Spec.(type) { case *otlpGrpcSpanExporter: - spanexp, err := c.buildOtlpGrpcSpanExporter(ctx, spec) + spanexp, err := ec.buildOtlpGrpcSpanExporter(ctx, spec) if err != nil { return nil, err } @@ -181,7 +182,7 @@ func (c *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExport return out, nil } -func (c *ExportConfig) buildOtlpGrpcSpanExporter( +func (ec *ExportConfig) buildOtlpGrpcSpanExporter( ctx context.Context, cfg *otlpGrpcSpanExporter, ) (sdktrace.SpanExporter, error) { @@ -208,7 +209,7 @@ func (c *ExportConfig) buildOtlpGrpcSpanExporter( return otlptracegrpc.NewUnstarted(opts...), nil } - conncfg, ok := c.findNamedGrpcConnCfg(cfg.ConnectionName) + conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName) if !ok { return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) } @@ -220,11 +221,11 @@ func (c *ExportConfig) buildOtlpGrpcSpanExporter( return otlptracegrpc.NewUnstarted(opts...), nil } -func (c *ExportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { +func (ec *ExportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { if name == "" { return nil, false } - for _, conn := range c.inner.Connections { + for _, conn := range ec.inner.Connections { if gconn, ok := conn.Spec.(*grpcconn); ok && conn.Metadata.Name == name { return gconn, true } @@ -232,6 +233,7 @@ func (c *ExportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { return nil, false } +// UnmarshalYAML loads the state of a generic connection from parsed YAML func (c *connection) UnmarshalYAML(n *yaml.Node) error { type conn connection type overlay struct { @@ -252,6 +254,7 @@ func (c *connection) UnmarshalYAML(n *yaml.Node) error { return obj.Spec.Decode(c.Spec) } +// UnmarshalYAML loads the state of a generic exporter from parsed YAML func (e *exporter) UnmarshalYAML(n *yaml.Node) error { type exp exporter type overlay struct { diff --git a/common/telemetry/config_test.go b/common/telemetry/config_test.go index 2a9428f0d38..3a7f99716b0 100644 --- a/common/telemetry/config_test.go +++ b/common/telemetry/config_test.go @@ -1,8 +1,31 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry_test import ( "context" - "strings" "testing" "time" @@ -42,8 +65,7 @@ func TestEmptyConfig(t *testing.T) { func TestOTLPTraceGRPC(t *testing.T) { cfg := telemetry.PrivateExportConfig{} - d := yaml.NewDecoder(strings.NewReader(basicOTLPTraceOnlyConfig)) - err := d.Decode(&cfg) + err := yaml.Unmarshal([]byte(basicOTLPTraceOnlyConfig), &cfg) require.NoError(t, err) require.Len(t, cfg.Connections, 0) require.Len(t, cfg.Exporters, 1) diff --git a/common/telemetry/export_test.go b/common/telemetry/export_test.go index 72dc05769ab..c011d48c025 100644 --- a/common/telemetry/export_test.go +++ b/common/telemetry/export_test.go @@ -1,3 +1,27 @@ +// The MIT License +// +// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. +// +// Copyright (c) 2020 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + package telemetry type ( From 7148c7826e1063f4d823a7caced42cf35133d814 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Sat, 4 Jun 2022 23:05:18 -0400 Subject: [PATCH 16/27] Wrap exporters referencing shared grpc.ClientConns Do this so that we can make the ClientConn dialing lazy (i.e. occurs at when Start() is called on the exporter) because (a) that's preferrable anyway and (b) easier to unit test when construction and execution phases are separate. --- common/telemetry/config.go | 157 +++++++++++++++++++++++++++++--- common/telemetry/config_test.go | 68 +++++++++++++- common/telemetry/export_test.go | 5 +- go.mod | 2 + go.sum | 4 + temporal/fx.go | 2 +- 6 files changed, 218 insertions(+), 20 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index cfb6c09be8f..dccad5cfe2b 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -28,9 +28,12 @@ import ( "context" "fmt" "strings" + "sync" "time" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + sdkmetricexp "go.opentelemetry.io/otel/sdk/metric/export" sdktrace "go.opentelemetry.io/otel/sdk/trace" "google.golang.org/grpc" "google.golang.org/grpc/backoff" @@ -90,7 +93,7 @@ type ( Spec interface{} `yaml:"-"` } - otlpGrpcSpanExporter struct { + otlpGrpcExporter struct { ConnectionName string `yaml:"connection_name"` Connection grpcconn Headers map[string]string @@ -103,6 +106,13 @@ type ( } } + otlpGrpcSpanExporter struct { + otlpGrpcExporter `yaml:",inline"` + } + otlpGrpcMetricExporter struct { + otlpGrpcExporter `yaml:",inline"` + } + exportConfig struct { Connections []connection Exporters []exporter @@ -120,7 +130,15 @@ func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error { return n.Decode(&ec.inner) } -func (g *grpcconn) dial(ctx context.Context) (*grpc.ClientConn, error) { +func (ec *ExportConfig) SpanExporters() ([]sdktrace.SpanExporter, error) { + return ec.inner.SpanExporters() +} + +func (ec *ExportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { + return ec.inner.MetricExporters() +} + +func (g *grpcconn) Dial(ctx context.Context) (*grpc.ClientConn, error) { if g.cc != nil { return g.cc, nil } @@ -162,15 +180,15 @@ func (g *grpcconn) dialOpts() []grpc.DialOption { // SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML // unmarshaled into this ExportConfig object. The returned SpanExporters have // not been started. -func (ec *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExporter, error) { - out := make([]sdktrace.SpanExporter, 0, len(ec.inner.Exporters)) - for _, expcfg := range ec.inner.Exporters { +func (ec *exportConfig) SpanExporters() ([]sdktrace.SpanExporter, error) { + out := make([]sdktrace.SpanExporter, 0, len(ec.Exporters)) + for _, expcfg := range ec.Exporters { if !strings.HasPrefix(expcfg.Kind.Signal, "trace") { continue } switch spec := expcfg.Spec.(type) { case *otlpGrpcSpanExporter: - spanexp, err := ec.buildOtlpGrpcSpanExporter(ctx, spec) + spanexp, err := ec.buildOtlpGrpcSpanExporter(spec) if err != nil { return nil, err } @@ -182,8 +200,64 @@ func (ec *ExportConfig) SpanExporters(ctx context.Context) ([]sdktrace.SpanExpor return out, nil } -func (ec *ExportConfig) buildOtlpGrpcSpanExporter( - ctx context.Context, +func (ec *exportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { + out := make([]sdkmetricexp.Exporter, 0, len(ec.Exporters)) + for _, expcfg := range ec.Exporters { + if !strings.HasPrefix(expcfg.Kind.Signal, "metric") { + continue + } + switch spec := expcfg.Spec.(type) { + case *otlpGrpcMetricExporter: + metricexp, err := ec.buildOtlpGrpcMetricExporter(spec) + if err != nil { + return nil, err + } + out = append(out, metricexp) + default: + return nil, fmt.Errorf("unsupported metric exporter type: %T", spec) + } + } + return out, nil + +} + +func (ec *exportConfig) buildOtlpGrpcMetricExporter( + cfg *otlpGrpcMetricExporter, +) (sdkmetricexp.Exporter, error) { + dopts := cfg.Connection.dialOpts() + opts := []otlpmetricgrpc.Option{ + otlpmetricgrpc.WithEndpoint(cfg.Connection.Endpoint), + otlpmetricgrpc.WithHeaders(cfg.Headers), + otlpmetricgrpc.WithTimeout(valueOrDefault(cfg.Timeout, 10*time.Second)), + otlpmetricgrpc.WithDialOption(dopts...), + otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{ + Enabled: valueOrDefault(cfg.Retry.Enabled, true), + InitialInterval: valueOrDefault(cfg.Retry.InitialInterval, 5*time.Second), + MaxInterval: valueOrDefault(cfg.Retry.MaxInterval, 30*time.Second), + MaxElapsedTime: valueOrDefault(cfg.Retry.MaxElapsedTime, 1*time.Minute), + }), + } + + // work around https://github.com/open-telemetry/opentelemetry-go/issues/2940 + if cfg.Connection.Insecure { + opts = append(opts, otlpmetricgrpc.WithInsecure()) + } + + if cfg.ConnectionName == "" { + return otlpmetricgrpc.NewUnstarted(opts...), nil + } + + conncfg, ok := ec.findNamedGrpcConnCfg(cfg.ConnectionName) + if !ok { + return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) + } + return &sharedConnMetricExporter{ + baseOpts: opts, + dialer: conncfg, + }, nil +} + +func (ec *exportConfig) buildOtlpGrpcSpanExporter( cfg *otlpGrpcSpanExporter, ) (sdktrace.SpanExporter, error) { dopts := cfg.Connection.dialOpts() @@ -213,19 +287,70 @@ func (ec *ExportConfig) buildOtlpGrpcSpanExporter( if !ok { return nil, fmt.Errorf("OTEL exporter connection %q not found", cfg.ConnectionName) } - cc, err := conncfg.dial(ctx) - if err != nil { - return nil, err + return &sharedConnSpanExporter{ + baseOpts: opts, + dialer: conncfg, + }, nil +} + +// sharedConnSpanExporter exists to wrap a span exporter that uses a shared +// *grpc.ClientConn so that the grpc.Dial call doesn't happen until Start() is +// called. Without this wrapper the grpc.ClientConn (which can only be created +// via grpc.Dial or grpc.DialContext) would need to exist at exporter +// _construction_ time, meaning that we would need to dial at construction +// rather then during the start phase. +type sharedConnSpanExporter struct { + baseOpts []otlptracegrpc.Option + dialer interface { + Dial(context.Context) (*grpc.ClientConn, error) + } + startOnce sync.Once + sdktrace.SpanExporter +} + +// Start +func (scse *sharedConnSpanExporter) Start(ctx context.Context) error { + var err error + scse.startOnce.Do(func() { + var cc *grpc.ClientConn + cc, err = scse.dialer.Dial(ctx) + if err != nil { + return + } + opts := append(scse.baseOpts, otlptracegrpc.WithGRPCConn(cc)) + scse.SpanExporter, err = otlptracegrpc.New(ctx, opts...) + }) + return err +} + +type sharedConnMetricExporter struct { + baseOpts []otlpmetricgrpc.Option + dialer interface { + Dial(context.Context) (*grpc.ClientConn, error) } - opts = append(opts, otlptracegrpc.WithGRPCConn(cc)) - return otlptracegrpc.NewUnstarted(opts...), nil + startOnce sync.Once + sdkmetricexp.Exporter +} + +func (scme *sharedConnMetricExporter) Start(ctx context.Context) error { + var err error + scme.startOnce.Do(func() { + var cc *grpc.ClientConn + cc, err = scme.dialer.Dial(ctx) + if err != nil { + return + } + opts := append(scme.baseOpts, otlpmetricgrpc.WithGRPCConn(cc)) + scme.Exporter, err = otlpmetricgrpc.New(ctx, opts...) + }) + return err } -func (ec *ExportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { +func (ec *exportConfig) findNamedGrpcConnCfg(name string) (*grpcconn, bool) { if name == "" { return nil, false } - for _, conn := range ec.inner.Connections { + for _, conn := range ec.Connections { if gconn, ok := conn.Spec.(*grpcconn); ok && conn.Metadata.Name == name { return gconn, true } @@ -270,6 +395,8 @@ func (e *exporter) UnmarshalYAML(n *yaml.Node) error { switch descriptor { case "traces+otlp+grpc", "trace+otlp+grpc": e.Spec = new(otlpGrpcSpanExporter) + case "metrics+otlp+grpc", "metric+otlp+grpc": + e.Spec = new(otlpGrpcMetricExporter) default: return fmt.Errorf( "unsupported exporter kind: signal=%q; model=%q; proto=%q", diff --git a/common/telemetry/config_test.go b/common/telemetry/config_test.go index 3a7f99716b0..cab57fd12fe 100644 --- a/common/telemetry/config_test.go +++ b/common/telemetry/config_test.go @@ -25,7 +25,6 @@ package telemetry_test import ( - "context" "testing" "time" @@ -56,13 +55,78 @@ exporters: endpoint: localhost:4317 ` +var sharedConnOTLPConfig = ` +otel: + connections: + - kind: grpc + metadata: + name: conn1 + spec: + endpoint: localhost:4317 + exporters: + - kind: + signal: traces + model: otlp + proto: grpc + spec: + connection_name: conn1 + - kind: + signal: metrics + model: otlp + proto: grpc + spec: + connection_name: conn1 +` + func TestEmptyConfig(t *testing.T) { cfg := telemetry.ExportConfig{} - exporters, err := cfg.SpanExporters(context.TODO()) + exporters, err := cfg.SpanExporters() require.NoError(t, err) require.Len(t, exporters, 0) } +func TestExportersWithSharedConn(t *testing.T) { + root := struct{ Otel telemetry.PrivateExportConfig }{} + err := yaml.Unmarshal([]byte(sharedConnOTLPConfig), &root) + require.NoError(t, err) + cfg := &root.Otel + + spanExporters, err := cfg.SpanExporters() + require.NoError(t, err) + require.Len(t, spanExporters, 1) + + metricExporters, err := cfg.MetricExporters() + require.NoError(t, err) + require.Len(t, metricExporters, 1) +} + +func TestSharedConn(t *testing.T) { + root := struct{ Otel telemetry.PrivateExportConfig }{} + err := yaml.Unmarshal([]byte(sharedConnOTLPConfig), &root) + require.NoError(t, err) + cfg := &root.Otel + require.Len(t, cfg.Connections, 1) + require.Len(t, cfg.Exporters, 2) + + exp := cfg.Exporters[0] + require.Equal(t, exp.Kind.Signal, "traces") + require.Equal(t, exp.Kind.Model, "otlp") + require.Equal(t, exp.Kind.Proto, "grpc") + require.NotNil(t, exp.Spec) + sspec, ok := exp.Spec.(*telemetry.OTLPGRPCSpanExporter) + require.True(t, ok) + require.Equal(t, "conn1", sspec.ConnectionName) + + exp = cfg.Exporters[1] + require.Equal(t, exp.Kind.Signal, "metrics") + require.Equal(t, exp.Kind.Model, "otlp") + require.Equal(t, exp.Kind.Proto, "grpc") + require.NotNil(t, exp.Spec) + mspec, ok := exp.Spec.(*telemetry.OTLPGRPCMetricExporter) + require.True(t, ok) + require.Equal(t, "conn1", mspec.ConnectionName) +} + func TestOTLPTraceGRPC(t *testing.T) { cfg := telemetry.PrivateExportConfig{} err := yaml.Unmarshal([]byte(basicOTLPTraceOnlyConfig), &cfg) diff --git a/common/telemetry/export_test.go b/common/telemetry/export_test.go index c011d48c025..64c3bc80946 100644 --- a/common/telemetry/export_test.go +++ b/common/telemetry/export_test.go @@ -25,6 +25,7 @@ package telemetry type ( - OTLPGRPCSpanExporter = otlpGrpcSpanExporter - PrivateExportConfig = exportConfig + OTLPGRPCSpanExporter = otlpGrpcSpanExporter + OTLPGRPCMetricExporter = otlpGrpcMetricExporter + PrivateExportConfig = exportConfig ) diff --git a/go.mod b/go.mod index c11d8dd6d60..7f613503b49 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,8 @@ require ( github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.30.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect go.opentelemetry.io/proto/otlp v0.16.0 // indirect ) diff --git a/go.sum b/go.sum index a29f15e231c..451be4dcf16 100644 --- a/go.sum +++ b/go.sum @@ -478,6 +478,10 @@ go.opentelemetry.io/otel v1.7.0 h1:Z2lA3Tdch0iDcrhJXDIlC94XE+bxok1F9B+4Lz/lGsM= go.opentelemetry.io/otel v1.7.0/go.mod h1:5BdUoMIz5WEs0vt0CUEMtSSaTSHBBVwrhnz7+nrD5xk= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 h1:7Yxsak1q4XrJ5y7XBnNwqWx9amMZvoidCctv62XOQ6Y= go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0/go.mod h1:M1hVZHNxcbkAlcvrOMlpQ4YOO3Awf+4N2dxkZL3xm04= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0 h1:Os0ds8fJp2AUa9DNraFWIycgUzevz47i6UvnSh+8LQ0= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0/go.mod h1:8Lz1GGcrx1kPGE3zqDrK7ZcPzABEfIQqBjq7roQa5ZA= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.30.0 h1:7E8znQuiqnaFDDl1zJYUpoqHteZI6u2rrcxH3Gwoiis= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.30.0/go.mod h1:RejW0QAFotPIixlFZKZka4/70S5UaFOqDO9DYOgScIs= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 h1:cMDtmgJ5FpRvqx9x2Aq+Mm0O6K/zcUkH73SFz20TuBw= go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0/go.mod h1:ceUgdyfNv4h4gLxHR0WNfDiiVmZFodZhZSbOLhpxqXE= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 h1:MFAyzUPrTwLOwCi+cltN0ZVyy4phU41lwH+lyMyQTS4= diff --git a/temporal/fx.go b/temporal/fx.go index f048bd9205f..0d1986a20ac 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -799,7 +799,7 @@ var TraceExportModule = fx.Options( }), fx.Provide(func(lc fx.Lifecycle, c *config.Config) ([]sdktrace.SpanExporter, error) { - exporters, err := c.ExporterConfig.SpanExporters(context.Background()) + exporters, err := c.ExporterConfig.SpanExporters() if err != nil { return nil, err } From 7784a8dec440b95e297c3dbfeb7c282e07192aa2 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Sun, 5 Jun 2022 11:07:53 -0400 Subject: [PATCH 17/27] Get onebox fx working again --- go.mod | 2 +- host/onebox.go | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 7f613503b49..42d88e90f59 100644 --- a/go.mod +++ b/go.mod @@ -39,6 +39,7 @@ require ( github.com/xwb1989/sqlparser v0.0.0-20180606152119-120387863bf2 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.32.0 go.opentelemetry.io/otel v1.7.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.30.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.7.0 go.opentelemetry.io/otel/exporters/prometheus v0.30.0 go.opentelemetry.io/otel/metric v0.30.0 @@ -81,7 +82,6 @@ require ( github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.30.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect go.opentelemetry.io/proto/otlp v0.16.0 // indirect ) diff --git a/host/onebox.go b/host/onebox.go index e5fe943b6b7..3333af3a1ba 100644 --- a/host/onebox.go +++ b/host/onebox.go @@ -39,6 +39,7 @@ import ( "go.temporal.io/api/operatorservice/v1" "go.temporal.io/api/workflowservice/v1" + sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.temporal.io/server/api/adminservice/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/client" @@ -71,6 +72,7 @@ import ( "go.temporal.io/server/service/worker" "go.temporal.io/server/service/worker/archiver" "go.temporal.io/server/service/worker/replicator" + "go.temporal.io/server/temporal" ) // Temporal hosts all of temporal services in one process @@ -127,6 +129,7 @@ type ( workerConfig *WorkerConfig mockAdminClient map[string]adminservice.AdminServiceClient namespaceReplicationTaskExecutor namespace.ReplicationTaskExecutor + spanExporters []sdktrace.SpanExporter } // HistoryConfig contains configs for history service @@ -158,6 +161,7 @@ type ( WorkerConfig *WorkerConfig MockAdminClient map[string]adminservice.AdminServiceClient NamespaceReplicationTaskExecutor namespace.ReplicationTaskExecutor + SpanExporters []sdktrace.SpanExporter } ) @@ -183,6 +187,7 @@ func NewTemporal(params *TemporalParams) *temporalImpl { workerConfig: params.WorkerConfig, mockAdminClient: params.MockAdminClient, namespaceReplicationTaskExecutor: params.NamespaceReplicationTaskExecutor, + spanExporters: params.SpanExporters, } } @@ -432,6 +437,8 @@ func (c *temporalImpl) startFrontend(hosts map[string][]string, startWG *sync.Wa fx.Provide(func() log.Logger { return c.logger }), fx.Provide(func() *esclient.Config { return c.esConfig }), fx.Provide(func() esclient.Client { return c.esClient }), + fx.Supply(c.spanExporters), + temporal.ServiceTracingModule, frontend.Module, fx.Populate(&frontendService, &clientBean, &namespaceRegistry), fx.NopLogger, @@ -528,6 +535,8 @@ func (c *temporalImpl) startHistory( fx.Provide(func() *esclient.Config { return c.esConfig }), fx.Provide(func() esclient.Client { return c.esClient }), fx.Provide(workflow.NewTaskGeneratorProvider), + fx.Supply(c.spanExporters), + temporal.ServiceTracingModule, history.QueueProcessorModule, history.Module, replication.Module, @@ -603,6 +612,8 @@ func (c *temporalImpl) startMatching(hosts map[string][]string, startWG *sync.Wa fx.Provide(func() persistenceClient.AbstractDataStoreFactory { return nil }), fx.Provide(func() dynamicconfig.Client { return newIntegrationConfigClient(dynamicconfig.NewNoopClient()) }), fx.Provide(func() log.Logger { return c.logger }), + fx.Supply(c.spanExporters), + temporal.ServiceTracingModule, matching.Module, fx.Populate(&matchingService, &clientBean, &namespaceRegistry), fx.NopLogger, @@ -697,7 +708,8 @@ func (c *temporalImpl) startWorker(hosts map[string][]string, startWG *sync.Wait fx.Provide(func() log.Logger { return c.logger }), fx.Provide(func() esclient.Client { return c.esClient }), fx.Provide(func() *esclient.Config { return c.esConfig }), - + fx.Supply(c.spanExporters), + temporal.ServiceTracingModule, worker.Module, fx.Populate(&workerService, &clientBean, &namespaceRegistry), fx.NopLogger, From 7a86512cc6c49ac780c2232a939987328354f7cb Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Sun, 5 Jun 2022 11:54:23 -0400 Subject: [PATCH 18/27] Mark span exporters as an optional dependency --- temporal/fx.go | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/temporal/fx.go b/temporal/fx.go index 0d1986a20ac..976f48c61a1 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -830,13 +830,18 @@ var TraceExportModule = fx.Options( // - telemetry.ClientTraceInterceptor var ServiceTracingModule = fx.Options( fx.Supply([]sdktrace.BatchSpanProcessorOption{}), - fx.Provide(func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { - sps := make([]sdktrace.SpanProcessor, 0, len(exps)) - for _, exp := range exps { - sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) - } - return sps - }), + fx.Provide( + fx.Annotate( + func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { + sps := make([]sdktrace.SpanProcessor, 0, len(exps)) + for _, exp := range exps { + sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) + } + return sps + }, + fx.ParamTags(`optional:"true"`, ``), + ), + ), fx.Provide( fx.Annotate( func(rsn resource.ServiceName, rsi resource.InstanceID) (*otelresource.Resource, error) { @@ -862,14 +867,16 @@ var ServiceTracingModule = fx.Options( fx.ParamTags(``, `optional:"true"`), ), ), - fx.Provide(func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { - opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) - opts = append(opts, sdktrace.WithResource(r)) - for _, sp := range sps { - opts = append(opts, sdktrace.WithSpanProcessor(sp)) - } - return opts - }), + fx.Provide( + func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { + opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) + opts = append(opts, sdktrace.WithResource(r)) + for _, sp := range sps { + opts = append(opts, sdktrace.WithSpanProcessor(sp)) + } + return opts + }, + ), fx.Provide(func(lc fx.Lifecycle, opts []sdktrace.TracerProviderOption) trace.TracerProvider { tp := sdktrace.NewTracerProvider(opts...) lc.Append(fx.Hook{OnStop: tp.Shutdown}) From db89208237b0109f5be983f7a61b39ccf7e00831 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Sun, 5 Jun 2022 21:24:11 -0400 Subject: [PATCH 19/27] Put type declarations where they belong Made valueOrDefault more general and renamed to coalesce --- common/telemetry/config.go | 114 +++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 56 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index dccad5cfe2b..dd44857309e 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -118,6 +118,32 @@ type ( Exporters []exporter } + // sharedConnSpanExporter and sharedConnMetricExporter exist to wrap a span + // exporter that uses a shared *grpc.ClientConn so that the grpc.Dial call + // doesn't happen until Start() is called. Without this wrapper the + // grpc.ClientConn (which can only be created via grpc.Dial or + // grpc.DialContext) would need to exist at _construction_ time, meaning + // that we would need to dial at construction rather then during the start + // phase. + + sharedConnSpanExporter struct { + baseOpts []otlptracegrpc.Option + dialer interface { + Dial(context.Context) (*grpc.ClientConn, error) + } + startOnce sync.Once + sdktrace.SpanExporter + } + + sharedConnMetricExporter struct { + baseOpts []otlpmetricgrpc.Option + dialer interface { + Dial(context.Context) (*grpc.ClientConn, error) + } + startOnce sync.Once + sdkmetricexp.Exporter + } + // ExportConfig represents YAML structured configuration for a set of OTEL // trace/span/log exporters. ExportConfig struct { @@ -138,30 +164,28 @@ func (ec *ExportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { return ec.inner.MetricExporters() } +// Dial returns the cached *grpc.ClientConn instance or creates a new one, +// caches and then returns it. This function is not threadsafe. func (g *grpcconn) Dial(ctx context.Context) (*grpc.ClientConn, error) { - if g.cc != nil { - return g.cc, nil - } - cc, err := grpc.DialContext(ctx, g.Endpoint, g.dialOpts()...) - if err != nil { - return nil, err + var err error + if g.cc == nil { + g.cc, err = grpc.DialContext(ctx, g.Endpoint, g.dialOpts()...) } - g.cc = cc - return cc, nil + return g.cc, err } func (g *grpcconn) dialOpts() []grpc.DialOption { out := []grpc.DialOption{ - grpc.WithReadBufferSize(valueOrDefault(g.ReadBufferSize, defaultReadBufferSize)), - grpc.WithWriteBufferSize(valueOrDefault(g.WriteBufferSize, defaultWriteBufferSize)), + grpc.WithReadBufferSize(coalesce(g.ReadBufferSize, defaultReadBufferSize)), + grpc.WithWriteBufferSize(coalesce(g.WriteBufferSize, defaultWriteBufferSize)), grpc.WithUserAgent(g.UserAgent), grpc.WithConnectParams(grpc.ConnectParams{ - MinConnectTimeout: valueOrDefault(g.ConnectParams.MinConnectTimeout, defaultMinConnectTimeout), + MinConnectTimeout: coalesce(g.ConnectParams.MinConnectTimeout, defaultMinConnectTimeout), Backoff: backoff.Config{ - BaseDelay: valueOrDefault(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay), - MaxDelay: valueOrDefault(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay), - Jitter: valueOrDefault(g.ConnectParams.Backoff.Jitter, backoff.DefaultConfig.Jitter), - Multiplier: valueOrDefault(g.ConnectParams.Backoff.Multiplier, backoff.DefaultConfig.Multiplier), + BaseDelay: coalesce(g.ConnectParams.Backoff.BaseDelay, backoff.DefaultConfig.BaseDelay), + MaxDelay: coalesce(g.ConnectParams.Backoff.MaxDelay, backoff.DefaultConfig.MaxDelay), + Jitter: coalesce(g.ConnectParams.Backoff.Jitter, backoff.DefaultConfig.Jitter), + Multiplier: coalesce(g.ConnectParams.Backoff.Multiplier, backoff.DefaultConfig.Multiplier), }, }), } @@ -228,13 +252,13 @@ func (ec *exportConfig) buildOtlpGrpcMetricExporter( opts := []otlpmetricgrpc.Option{ otlpmetricgrpc.WithEndpoint(cfg.Connection.Endpoint), otlpmetricgrpc.WithHeaders(cfg.Headers), - otlpmetricgrpc.WithTimeout(valueOrDefault(cfg.Timeout, 10*time.Second)), + otlpmetricgrpc.WithTimeout(coalesce(cfg.Timeout, 10*time.Second)), otlpmetricgrpc.WithDialOption(dopts...), otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{ - Enabled: valueOrDefault(cfg.Retry.Enabled, true), - InitialInterval: valueOrDefault(cfg.Retry.InitialInterval, 5*time.Second), - MaxInterval: valueOrDefault(cfg.Retry.MaxInterval, 30*time.Second), - MaxElapsedTime: valueOrDefault(cfg.Retry.MaxElapsedTime, 1*time.Minute), + Enabled: coalesce(cfg.Retry.Enabled, true), + InitialInterval: coalesce(cfg.Retry.InitialInterval, 5*time.Second), + MaxInterval: coalesce(cfg.Retry.MaxInterval, 30*time.Second), + MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, 1*time.Minute), }), } @@ -260,17 +284,16 @@ func (ec *exportConfig) buildOtlpGrpcMetricExporter( func (ec *exportConfig) buildOtlpGrpcSpanExporter( cfg *otlpGrpcSpanExporter, ) (sdktrace.SpanExporter, error) { - dopts := cfg.Connection.dialOpts() opts := []otlptracegrpc.Option{ otlptracegrpc.WithEndpoint(cfg.Connection.Endpoint), otlptracegrpc.WithHeaders(cfg.Headers), - otlptracegrpc.WithTimeout(valueOrDefault(cfg.Timeout, 10*time.Second)), - otlptracegrpc.WithDialOption(dopts...), + otlptracegrpc.WithTimeout(coalesce(cfg.Timeout, 10*time.Second)), + otlptracegrpc.WithDialOption(cfg.Connection.dialOpts()...), otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ - Enabled: valueOrDefault(cfg.Retry.Enabled, true), - InitialInterval: valueOrDefault(cfg.Retry.InitialInterval, 5*time.Second), - MaxInterval: valueOrDefault(cfg.Retry.MaxInterval, 30*time.Second), - MaxElapsedTime: valueOrDefault(cfg.Retry.MaxElapsedTime, 1*time.Minute), + Enabled: coalesce(cfg.Retry.Enabled, true), + InitialInterval: coalesce(cfg.Retry.InitialInterval, 5*time.Second), + MaxInterval: coalesce(cfg.Retry.MaxInterval, 30*time.Second), + MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, 1*time.Minute), }), } @@ -293,22 +316,7 @@ func (ec *exportConfig) buildOtlpGrpcSpanExporter( }, nil } -// sharedConnSpanExporter exists to wrap a span exporter that uses a shared -// *grpc.ClientConn so that the grpc.Dial call doesn't happen until Start() is -// called. Without this wrapper the grpc.ClientConn (which can only be created -// via grpc.Dial or grpc.DialContext) would need to exist at exporter -// _construction_ time, meaning that we would need to dial at construction -// rather then during the start phase. -type sharedConnSpanExporter struct { - baseOpts []otlptracegrpc.Option - dialer interface { - Dial(context.Context) (*grpc.ClientConn, error) - } - startOnce sync.Once - sdktrace.SpanExporter -} - -// Start +// Start initiates the connection to an upstream grpc OTLP server func (scse *sharedConnSpanExporter) Start(ctx context.Context) error { var err error scse.startOnce.Do(func() { @@ -323,15 +331,7 @@ func (scse *sharedConnSpanExporter) Start(ctx context.Context) error { return err } -type sharedConnMetricExporter struct { - baseOpts []otlpmetricgrpc.Option - dialer interface { - Dial(context.Context) (*grpc.ClientConn, error) - } - startOnce sync.Once - sdkmetricexp.Exporter -} - +// Start initiates the connection to an upstream grpc OTLP server func (scme *sharedConnMetricExporter) Start(ctx context.Context) error { var err error scme.startOnce.Do(func() { @@ -408,10 +408,12 @@ func (e *exporter) UnmarshalYAML(n *yaml.Node) error { return obj.Spec.Decode(e.Spec) } -func valueOrDefault[T comparable](v, defval T) T { +func coalesce[T comparable](vals ...T) T { var zero T - if v == zero { - return defval + for _, v := range vals { + if v != zero { + return v + } } - return v + return zero } From 4cd4aed4561d4264fa356901ecd5c147dcc9e457 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Mon, 6 Jun 2022 07:23:21 -0400 Subject: [PATCH 20/27] Shrink PR --- common/telemetry/attribute.go | 93 ----------------------------------- 1 file changed, 93 deletions(-) delete mode 100644 common/telemetry/attribute.go diff --git a/common/telemetry/attribute.go b/common/telemetry/attribute.go deleted file mode 100644 index 0f89a31bd4b..00000000000 --- a/common/telemetry/attribute.go +++ /dev/null @@ -1,93 +0,0 @@ -// The MIT License -// -// Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. -// -// Copyright (c) 2020 Uber Technologies, Inc. -// -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to deal -// in the Software without restriction, including without limitation the rights -// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -// THE SOFTWARE. - -package telemetry - -import ( - "fmt" - "strings" - - "go.opentelemetry.io/otel/attribute" - logtag "go.temporal.io/server/common/log/tag" - "go.temporal.io/server/common/metrics" -) - -type ( - // LogKey is a type wrapper for the key part of a common/log/tag.Tag that - // can be used to create full common/log/tag.Tag instances. It is intended - // to be used as a const global and shared throughout the codebase. - LogKey string - - // MetricsKey is a type wrapper for the key part of a common/metrics.Tag that - // can be used to create full common/metrics.Tag instances. It is intended - // to be used as a const global and shared throughout the codebase. - MetricsKey string - - // Key is a type that brings some consistency to the server's use of log, - // trace, and metrics annotations. The three tagging systems currently in - // use are not compatible so we provide this type to at least normalize - // naming. Consumers are expected to create package-global Key instances and - // then use those instances to create values as appropriate to the context. - Key struct { - OTEL attribute.Key - Log LogKey - Metrics MetricsKey - } - - mtag struct { - k, v string - } -) - -func (t mtag) Key() string { return t.k } -func (t mtag) Value() string { return t.v } - -// NewKey creates a new Key instance, inserting the "io.temporal." prefix for -// OTEL attributes if it is absent. -func NewKey(str string) Key { - fqstr := str - if !strings.HasPrefix(str, "io.temporal") { - fqstr = fmt.Sprintf("io.temporal.%s", str) - } - return Key{ - OTEL: attribute.Key(fqstr), - Log: LogKey(str), - Metrics: MetricsKey(str), - } -} - -// String creates a new string-valued logging tag -func (k LogKey) String(v string) logtag.Tag { - return logtag.NewStringTag(string(k), v) -} - -// Int creates a new int-valued logging tag -func (k LogKey) Int(v int) logtag.Tag { - return logtag.NewInt(string(k), v) -} - -// String creates a new string-valued metrics tag -func (k MetricsKey) String(v string) metrics.Tag { - return mtag{k: string(k), v: v} -} From 255136d4614fe10571f3c8a7f2a54bdcfa51f2c1 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Mon, 6 Jun 2022 07:59:56 -0400 Subject: [PATCH 21/27] Plumb through the client interceptors --- common/resource/fx.go | 15 +++++++++++- common/rpc/grpc.go | 11 +++++---- common/rpc/rpc.go | 21 +++++++++-------- common/rpc/test/rpc_localstore_tls_test.go | 27 ++++++++++++---------- 4 files changed, 48 insertions(+), 26 deletions(-) diff --git a/common/resource/fx.go b/common/resource/fx.go index 645e59640a3..ffdc778542f 100644 --- a/common/resource/fx.go +++ b/common/resource/fx.go @@ -32,6 +32,7 @@ import ( "time" "go.uber.org/fx" + "google.golang.org/grpc" "go.temporal.io/api/workflowservice/v1" @@ -62,6 +63,7 @@ import ( "go.temporal.io/server/common/rpc/encryption" "go.temporal.io/server/common/sdk" "go.temporal.io/server/common/searchattribute" + "go.temporal.io/server/common/telemetry" ) type ( @@ -416,7 +418,18 @@ func RPCFactoryProvider( tlsConfigProvider encryption.TLSConfigProvider, dc *dynamicconfig.Collection, clusterMetadata *cluster.Config, + traceInterceptor telemetry.ClientTraceInterceptor, ) common.RPCFactory { svcCfg := cfg.Services[string(svcName)] - return rpc.NewFactory(&svcCfg.RPC, string(svcName), logger, tlsConfigProvider, dc, clusterMetadata) + return rpc.NewFactory( + &svcCfg.RPC, + string(svcName), + logger, + tlsConfigProvider, + dc, + clusterMetadata, + []grpc.UnaryClientInterceptor{ + grpc.UnaryClientInterceptor(traceInterceptor), + }, + ) } diff --git a/common/rpc/grpc.go b/common/rpc/grpc.go index 01cab7890a8..fabdbc5c3c6 100644 --- a/common/rpc/grpc.go +++ b/common/rpc/grpc.go @@ -61,7 +61,7 @@ const ( // The hostName syntax is defined in // https://github.com/grpc/grpc/blob/master/doc/naming.md. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target. -func Dial(hostName string, tlsConfig *tls.Config, logger log.Logger) (*grpc.ClientConn, error) { +func Dial(hostName string, tlsConfig *tls.Config, logger log.Logger, interceptors ...grpc.UnaryClientInterceptor) (*grpc.ClientConn, error) { // Default to insecure grpcSecureOpt := grpc.WithInsecure() if tlsConfig != nil { @@ -83,9 +83,12 @@ func Dial(hostName string, tlsConfig *tls.Config, logger log.Logger) (*grpc.Clie grpcSecureOpt, grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(maxInternodeRecvPayloadSize)), grpc.WithChainUnaryInterceptor( - versionHeadersInterceptor, - metrics.NewClientMetricsTrailerPropagatorInterceptor(logger), - errorInterceptor, + append( + interceptors, + versionHeadersInterceptor, + metrics.NewClientMetricsTrailerPropagatorInterceptor(logger), + errorInterceptor, + )..., ), grpc.WithDefaultServiceConfig(DefaultServiceConfig), grpc.WithDisableServiceConfig(), diff --git a/common/rpc/rpc.go b/common/rpc/rpc.go index 868bf5e4411..38ac6b677d1 100644 --- a/common/rpc/rpc.go +++ b/common/rpc/rpc.go @@ -50,8 +50,9 @@ type RPCFactory struct { clusterMetadata *cluster.Config sync.Mutex - grpcListener net.Listener - tlsFactory encryption.TLSConfigProvider + grpcListener net.Listener + tlsFactory encryption.TLSConfigProvider + clientInterceptors []grpc.UnaryClientInterceptor } // NewFactory builds a new RPCFactory @@ -63,14 +64,16 @@ func NewFactory( tlsProvider encryption.TLSConfigProvider, dc *dynamicconfig.Collection, clusterMetadata *cluster.Config, + clientInterceptors []grpc.UnaryClientInterceptor, ) *RPCFactory { return &RPCFactory{ - config: cfg, - serviceName: sName, - logger: logger, - dc: dc, - tlsFactory: tlsProvider, - clusterMetadata: clusterMetadata, + config: cfg, + serviceName: sName, + logger: logger, + dc: dc, + tlsFactory: tlsProvider, + clusterMetadata: clusterMetadata, + clientInterceptors: clientInterceptors, } } @@ -225,7 +228,7 @@ func (d *RPCFactory) CreateInternodeGRPCConnection(hostName string) *grpc.Client } func (d *RPCFactory) dial(hostName string, tlsClientConfig *tls.Config) *grpc.ClientConn { - connection, err := Dial(hostName, tlsClientConfig, d.logger) + connection, err := Dial(hostName, tlsClientConfig, d.logger, d.clientInterceptors...) if err != nil { d.logger.Fatal("Failed to create gRPC connection", tag.Error(err)) return nil diff --git a/common/rpc/test/rpc_localstore_tls_test.go b/common/rpc/test/rpc_localstore_tls_test.go index 355a6754987..e913c8d99d3 100644 --- a/common/rpc/test/rpc_localstore_tls_test.go +++ b/common/rpc/test/rpc_localstore_tls_test.go @@ -34,6 +34,7 @@ import ( "github.com/golang/mock/gomock" "github.com/stretchr/testify/require" "github.com/stretchr/testify/suite" + "google.golang.org/grpc" "google.golang.org/grpc/credentials" "go.temporal.io/server/common/config" @@ -50,6 +51,8 @@ const ( frontendServerCertSerialNumber = 150 ) +var noExtraInterceptors = []grpc.UnaryClientInterceptor{} + type localStoreRPCSuite struct { *require.Assertions suite.Suite @@ -128,7 +131,7 @@ func (s *localStoreRPCSuite) SetupSuite() { provider, err := encryption.NewTLSConfigProviderFromConfig(serverCfgInsecure.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - insecureFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + insecureFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(insecureFactory) s.insecureRPCFactory = i(insecureFactory) @@ -336,22 +339,22 @@ func (s *localStoreRPCSuite) setupFrontend() { provider, err := encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - frontendMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + frontendMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(frontendMutualTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreServerTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - frontendServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + frontendServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(frontendServerTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLSSystemWorker.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - frontendSystemWorkerMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + frontendSystemWorkerMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(frontendSystemWorkerMutualTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLSWithRefresh.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - frontendMutualTLSRefreshFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + frontendMutualTLSRefreshFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(frontendMutualTLSRefreshFactory) s.frontendMutualTLSRPCFactory = f(frontendMutualTLSFactory) @@ -365,7 +368,7 @@ func (s *localStoreRPCSuite) setupFrontend() { s.frontendRollingCerts, s.dynamicCACertPool, s.wrongCACertPool) - dynamicServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, s.dynamicConfigProvider, dynamicconfig.NewNoopCollection(), clusterMetadata) + dynamicServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, s.dynamicConfigProvider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.frontendDynamicTLSFactory = f(dynamicServerTLSFactory) s.internodeDynamicTLSFactory = i(dynamicServerTLSFactory) @@ -373,13 +376,13 @@ func (s *localStoreRPCSuite) setupFrontend() { provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreRootCAForceTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - frontendRootCAForceTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + frontendRootCAForceTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(frontendServerTLSFactory) s.frontendConfigRootCAForceTLSFactory = f(frontendRootCAForceTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLSRemoteCluster.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - remoteClusterMutualTLSRPCFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + remoteClusterMutualTLSRPCFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(remoteClusterMutualTLSRPCFactory) s.remoteClusterMutualTLSRPCFactory = r(remoteClusterMutualTLSRPCFactory) } @@ -415,22 +418,22 @@ func (s *localStoreRPCSuite) setupInternode() { provider, err := encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - internodeMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + internodeMutualTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(internodeMutualTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreServerTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - internodeServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + internodeServerTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(internodeServerTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreAltMutualTLS.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - internodeMutualAltTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + internodeMutualAltTLSFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(internodeMutualAltTLSFactory) provider, err = encryption.NewTLSConfigProviderFromConfig(localStoreMutualTLSWithRefresh.TLS, metrics.NoopClient, s.logger, nil) s.NoError(err) - internodeMutualTLSRefreshFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata) + internodeMutualTLSRefreshFactory := rpc.NewFactory(rpcTestCfgDefault, "tester", s.logger, provider, dynamicconfig.NewNoopCollection(), clusterMetadata, noExtraInterceptors) s.NotNil(internodeMutualTLSRefreshFactory) s.internodeMutualTLSRPCFactory = i(internodeMutualTLSFactory) From e685282ee7038d2ec025b44047476daed17b81e7 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Mon, 6 Jun 2022 13:07:23 -0400 Subject: [PATCH 22/27] Lift more constants out from inline --- common/telemetry/config.go | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index dd44857309e..a3f5f1e020f 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -42,12 +42,20 @@ import ( ) const ( - // these defaults were taken from the grpc docs as of grpc v1.46. + // the following defaults were taken from the grpc docs as of grpc v1.46. // they are not available programatically defaultReadBufferSize = 32 * 1024 defaultWriteBufferSize = 32 * 1024 defaultMinConnectTimeout = 10 * time.Second + + // the following defaults were taken from the otel library as of v1.7. + // they are not available programatically + + retryDefaultEnabled = true + retryDefaultInitialInterval = 5 * time.Second + retryDefaultMaxInterval = 30 * time.Second + retryDefaultMaxElapsedTime = 1 * time.Minute ) type ( @@ -255,10 +263,10 @@ func (ec *exportConfig) buildOtlpGrpcMetricExporter( otlpmetricgrpc.WithTimeout(coalesce(cfg.Timeout, 10*time.Second)), otlpmetricgrpc.WithDialOption(dopts...), otlpmetricgrpc.WithRetry(otlpmetricgrpc.RetryConfig{ - Enabled: coalesce(cfg.Retry.Enabled, true), - InitialInterval: coalesce(cfg.Retry.InitialInterval, 5*time.Second), - MaxInterval: coalesce(cfg.Retry.MaxInterval, 30*time.Second), - MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, 1*time.Minute), + Enabled: coalesce(cfg.Retry.Enabled, retryDefaultEnabled), + InitialInterval: coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval), + MaxInterval: coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval), + MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime), }), } @@ -290,10 +298,10 @@ func (ec *exportConfig) buildOtlpGrpcSpanExporter( otlptracegrpc.WithTimeout(coalesce(cfg.Timeout, 10*time.Second)), otlptracegrpc.WithDialOption(cfg.Connection.dialOpts()...), otlptracegrpc.WithRetry(otlptracegrpc.RetryConfig{ - Enabled: coalesce(cfg.Retry.Enabled, true), - InitialInterval: coalesce(cfg.Retry.InitialInterval, 5*time.Second), - MaxInterval: coalesce(cfg.Retry.MaxInterval, 30*time.Second), - MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, 1*time.Minute), + Enabled: coalesce(cfg.Retry.Enabled, retryDefaultEnabled), + InitialInterval: coalesce(cfg.Retry.InitialInterval, retryDefaultInitialInterval), + MaxInterval: coalesce(cfg.Retry.MaxInterval, retryDefaultMaxInterval), + MaxElapsedTime: coalesce(cfg.Retry.MaxElapsedTime, retryDefaultMaxElapsedTime), }), } From 8d8f54b7c5fa60d717bf2215fd02d5d487818ec2 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 7 Jun 2022 08:12:07 -0400 Subject: [PATCH 23/27] Rename 'proto' -> 'protocol' in OTEL config --- common/telemetry/config.go | 12 ++++++------ common/telemetry/config_test.go | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index a3f5f1e020f..23edc1e105b 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -93,9 +93,9 @@ type ( exporter struct { Kind struct { - Signal string - Model string - Proto string + Signal string + Model string + Protocol string } Metadata metadata Spec interface{} `yaml:"-"` @@ -399,7 +399,7 @@ func (e *exporter) UnmarshalYAML(n *yaml.Node) error { if err != nil { return err } - descriptor := fmt.Sprintf("%v+%v+%v", e.Kind.Signal, e.Kind.Model, e.Kind.Proto) + descriptor := fmt.Sprintf("%v+%v+%v", e.Kind.Signal, e.Kind.Model, e.Kind.Protocol) switch descriptor { case "traces+otlp+grpc", "trace+otlp+grpc": e.Spec = new(otlpGrpcSpanExporter) @@ -407,10 +407,10 @@ func (e *exporter) UnmarshalYAML(n *yaml.Node) error { e.Spec = new(otlpGrpcMetricExporter) default: return fmt.Errorf( - "unsupported exporter kind: signal=%q; model=%q; proto=%q", + "unsupported exporter kind: signal=%q; model=%q; protocol=%q", e.Kind.Signal, e.Kind.Model, - e.Kind.Proto, + e.Kind.Protocol, ) } return obj.Spec.Decode(e.Spec) diff --git a/common/telemetry/config_test.go b/common/telemetry/config_test.go index cab57fd12fe..eb5a45a114f 100644 --- a/common/telemetry/config_test.go +++ b/common/telemetry/config_test.go @@ -38,7 +38,7 @@ exporters: - kind: signal: traces model: otlp - proto: grpc + protocol: grpc spec: headers: a: b @@ -67,13 +67,13 @@ otel: - kind: signal: traces model: otlp - proto: grpc + protocol: grpc spec: connection_name: conn1 - kind: signal: metrics model: otlp - proto: grpc + protocol: grpc spec: connection_name: conn1 ` @@ -111,7 +111,7 @@ func TestSharedConn(t *testing.T) { exp := cfg.Exporters[0] require.Equal(t, exp.Kind.Signal, "traces") require.Equal(t, exp.Kind.Model, "otlp") - require.Equal(t, exp.Kind.Proto, "grpc") + require.Equal(t, exp.Kind.Protocol, "grpc") require.NotNil(t, exp.Spec) sspec, ok := exp.Spec.(*telemetry.OTLPGRPCSpanExporter) require.True(t, ok) @@ -120,7 +120,7 @@ func TestSharedConn(t *testing.T) { exp = cfg.Exporters[1] require.Equal(t, exp.Kind.Signal, "metrics") require.Equal(t, exp.Kind.Model, "otlp") - require.Equal(t, exp.Kind.Proto, "grpc") + require.Equal(t, exp.Kind.Protocol, "grpc") require.NotNil(t, exp.Spec) mspec, ok := exp.Spec.(*telemetry.OTLPGRPCMetricExporter) require.True(t, ok) @@ -137,7 +137,7 @@ func TestOTLPTraceGRPC(t *testing.T) { exp := cfg.Exporters[0] require.Equal(t, exp.Kind.Signal, "traces") require.Equal(t, exp.Kind.Model, "otlp") - require.Equal(t, exp.Kind.Proto, "grpc") + require.Equal(t, exp.Kind.Protocol, "grpc") require.NotNil(t, exp.Spec) spec, ok := exp.Spec.(*telemetry.OTLPGRPCSpanExporter) From e823655bdfdcaddb6a05cd7fa078420879e0a3e3 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 7 Jun 2022 08:13:34 -0400 Subject: [PATCH 24/27] Disable tracing by default --- config/development-cass.yaml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/config/development-cass.yaml b/config/development-cass.yaml index 9c25a67b89e..43710853b5d 100644 --- a/config/development-cass.yaml +++ b/config/development-cass.yaml @@ -1,14 +1,3 @@ -otel: - exporters: - - kind: - signal: traces - model: otlp - proto: grpc - spec: - connection: - insecure: true - endpoint: localhost:4317 - log: stdout: true level: info From c2f26d08b62542cfa48c6aec5da9994a94753e83 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Tue, 7 Jun 2022 08:17:03 -0400 Subject: [PATCH 25/27] Rename sdktrace -> otelsdktrace Avoid possible confusion with temporal sdk --- common/telemetry/config.go | 24 ++++++++++++------------ temporal/fx.go | 30 +++++++++++++++--------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/common/telemetry/config.go b/common/telemetry/config.go index 23edc1e105b..36bee9ad1ea 100644 --- a/common/telemetry/config.go +++ b/common/telemetry/config.go @@ -33,8 +33,8 @@ import ( "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" - sdkmetricexp "go.opentelemetry.io/otel/sdk/metric/export" - sdktrace "go.opentelemetry.io/otel/sdk/trace" + otelsdkmetricexp "go.opentelemetry.io/otel/sdk/metric/export" + otelsdktrace "go.opentelemetry.io/otel/sdk/trace" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials/insecure" @@ -140,7 +140,7 @@ type ( Dial(context.Context) (*grpc.ClientConn, error) } startOnce sync.Once - sdktrace.SpanExporter + otelsdktrace.SpanExporter } sharedConnMetricExporter struct { @@ -149,7 +149,7 @@ type ( Dial(context.Context) (*grpc.ClientConn, error) } startOnce sync.Once - sdkmetricexp.Exporter + otelsdkmetricexp.Exporter } // ExportConfig represents YAML structured configuration for a set of OTEL @@ -164,11 +164,11 @@ func (ec *ExportConfig) UnmarshalYAML(n *yaml.Node) error { return n.Decode(&ec.inner) } -func (ec *ExportConfig) SpanExporters() ([]sdktrace.SpanExporter, error) { +func (ec *ExportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) { return ec.inner.SpanExporters() } -func (ec *ExportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { +func (ec *ExportConfig) MetricExporters() ([]otelsdkmetricexp.Exporter, error) { return ec.inner.MetricExporters() } @@ -212,8 +212,8 @@ func (g *grpcconn) dialOpts() []grpc.DialOption { // SpanExporters builds the set of OTEL SpanExporter objects defined by the YAML // unmarshaled into this ExportConfig object. The returned SpanExporters have // not been started. -func (ec *exportConfig) SpanExporters() ([]sdktrace.SpanExporter, error) { - out := make([]sdktrace.SpanExporter, 0, len(ec.Exporters)) +func (ec *exportConfig) SpanExporters() ([]otelsdktrace.SpanExporter, error) { + out := make([]otelsdktrace.SpanExporter, 0, len(ec.Exporters)) for _, expcfg := range ec.Exporters { if !strings.HasPrefix(expcfg.Kind.Signal, "trace") { continue @@ -232,8 +232,8 @@ func (ec *exportConfig) SpanExporters() ([]sdktrace.SpanExporter, error) { return out, nil } -func (ec *exportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { - out := make([]sdkmetricexp.Exporter, 0, len(ec.Exporters)) +func (ec *exportConfig) MetricExporters() ([]otelsdkmetricexp.Exporter, error) { + out := make([]otelsdkmetricexp.Exporter, 0, len(ec.Exporters)) for _, expcfg := range ec.Exporters { if !strings.HasPrefix(expcfg.Kind.Signal, "metric") { continue @@ -255,7 +255,7 @@ func (ec *exportConfig) MetricExporters() ([]sdkmetricexp.Exporter, error) { func (ec *exportConfig) buildOtlpGrpcMetricExporter( cfg *otlpGrpcMetricExporter, -) (sdkmetricexp.Exporter, error) { +) (otelsdkmetricexp.Exporter, error) { dopts := cfg.Connection.dialOpts() opts := []otlpmetricgrpc.Option{ otlpmetricgrpc.WithEndpoint(cfg.Connection.Endpoint), @@ -291,7 +291,7 @@ func (ec *exportConfig) buildOtlpGrpcMetricExporter( func (ec *exportConfig) buildOtlpGrpcSpanExporter( cfg *otlpGrpcSpanExporter, -) (sdktrace.SpanExporter, error) { +) (otelsdktrace.SpanExporter, error) { opts := []otlptracegrpc.Option{ otlptracegrpc.WithEndpoint(cfg.Connection.Endpoint), otlptracegrpc.WithHeaders(cfg.Headers), diff --git a/temporal/fx.go b/temporal/fx.go index 976f48c61a1..8b0f9b9cc5d 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -36,7 +36,7 @@ import ( "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/propagation" otelresource "go.opentelemetry.io/otel/sdk/resource" - sdktrace "go.opentelemetry.io/otel/sdk/trace" + otelsdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.10.0" "go.opentelemetry.io/otel/trace" "google.golang.org/grpc" @@ -339,7 +339,7 @@ type ( Authorizer authorization.Authorizer ClaimMapper authorization.ClaimMapper DataStoreFactory persistenceClient.AbstractDataStoreFactory - SpanExporters []sdktrace.SpanExporter + SpanExporters []otelsdktrace.SpanExporter InstanceID resource.InstanceID `optional:"true"` } ) @@ -798,7 +798,7 @@ var TraceExportModule = fx.Options( ) }), - fx.Provide(func(lc fx.Lifecycle, c *config.Config) ([]sdktrace.SpanExporter, error) { + fx.Provide(func(lc fx.Lifecycle, c *config.Config) ([]otelsdktrace.SpanExporter, error) { exporters, err := c.ExporterConfig.SpanExporters() if err != nil { return nil, err @@ -829,13 +829,13 @@ var TraceExportModule = fx.Options( // - telemetry.ServerTraceInterceptor // - telemetry.ClientTraceInterceptor var ServiceTracingModule = fx.Options( - fx.Supply([]sdktrace.BatchSpanProcessorOption{}), + fx.Supply([]otelsdktrace.BatchSpanProcessorOption{}), fx.Provide( fx.Annotate( - func(exps []sdktrace.SpanExporter, opts []sdktrace.BatchSpanProcessorOption) []sdktrace.SpanProcessor { - sps := make([]sdktrace.SpanProcessor, 0, len(exps)) + func(exps []otelsdktrace.SpanExporter, opts []otelsdktrace.BatchSpanProcessorOption) []otelsdktrace.SpanProcessor { + sps := make([]otelsdktrace.SpanProcessor, 0, len(exps)) for _, exp := range exps { - sps = append(sps, sdktrace.NewBatchSpanProcessor(exp, opts...)) + sps = append(sps, otelsdktrace.NewBatchSpanProcessor(exp, opts...)) } return sps }, @@ -868,17 +868,17 @@ var ServiceTracingModule = fx.Options( ), ), fx.Provide( - func(r *otelresource.Resource, sps []sdktrace.SpanProcessor) []sdktrace.TracerProviderOption { - opts := make([]sdktrace.TracerProviderOption, 0, len(sps)+1) - opts = append(opts, sdktrace.WithResource(r)) + func(r *otelresource.Resource, sps []otelsdktrace.SpanProcessor) []otelsdktrace.TracerProviderOption { + opts := make([]otelsdktrace.TracerProviderOption, 0, len(sps)+1) + opts = append(opts, otelsdktrace.WithResource(r)) for _, sp := range sps { - opts = append(opts, sdktrace.WithSpanProcessor(sp)) + opts = append(opts, otelsdktrace.WithSpanProcessor(sp)) } return opts }, ), - fx.Provide(func(lc fx.Lifecycle, opts []sdktrace.TracerProviderOption) trace.TracerProvider { - tp := sdktrace.NewTracerProvider(opts...) + fx.Provide(func(lc fx.Lifecycle, opts []otelsdktrace.TracerProviderOption) trace.TracerProvider { + tp := otelsdktrace.NewTracerProvider(opts...) lc.Append(fx.Hook{OnStop: tp.Shutdown}) return tp }), @@ -888,7 +888,7 @@ var ServiceTracingModule = fx.Options( fx.Provide(telemetry.NewClientTraceInterceptor), ) -func startAll(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { +func startAll(exporters []otelsdktrace.SpanExporter) func(ctx context.Context) error { type starter interface{ Start(context.Context) error } return func(ctx context.Context) error { for _, e := range exporters { @@ -903,7 +903,7 @@ func startAll(exporters []sdktrace.SpanExporter) func(ctx context.Context) error } } -func shutdownAll(exporters []sdktrace.SpanExporter) func(ctx context.Context) error { +func shutdownAll(exporters []otelsdktrace.SpanExporter) func(ctx context.Context) error { return func(ctx context.Context) error { for _, e := range exporters { err := e.Shutdown(ctx) From fde3ea7dbdd50b4d5e266012293380f38398c9d6 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Wed, 22 Jun 2022 11:58:10 -0700 Subject: [PATCH 26/27] Coalesce go.mod require sections --- go.mod | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index 42d88e90f59..9361c4c95a1 100644 --- a/go.mod +++ b/go.mod @@ -78,17 +78,9 @@ require ( github.com/aws/aws-sdk-go-v2/service/sso v1.11.4 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.16.4 // indirect github.com/aws/smithy-go v1.11.2 // indirect - github.com/cenkalti/backoff/v4 v4.1.3 // indirect - github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0 // indirect - go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect - go.opentelemetry.io/proto/otlp v0.16.0 // indirect -) - -require ( github.com/benbjohnson/clock v1.3.0 // indirect github.com/beorn7/perks v1.0.1 // indirect + github.com/cenkalti/backoff/v4 v4.1.3 // indirect github.com/cespare/xxhash/v2 v2.1.2 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect @@ -102,6 +94,7 @@ require ( github.com/googleapis/gax-go/v2 v2.4.0 // indirect github.com/googleapis/go-type-adapters v1.0.0 // indirect github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -124,7 +117,11 @@ require ( github.com/twmb/murmur3 v1.1.6 // indirect github.com/uber-common/bark v1.3.0 // indirect go.opencensus.io v0.23.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.7.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.30.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.7.0 // indirect go.opentelemetry.io/otel/trace v1.7.0 + go.opentelemetry.io/proto/otlp v0.16.0 // indirect go.uber.org/dig v1.14.1 // indirect golang.org/x/crypto v0.0.0-20220411220226-7b82a4e95df4 // indirect golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect From 28031888290469adaff90ff60a290f40b9f139e3 Mon Sep 17 00:00:00 2001 From: Matt McShane Date: Wed, 22 Jun 2022 12:01:47 -0700 Subject: [PATCH 27/27] Prefer otelsdktrace to sdktrace Avoids potential naming confusion with Temporal SDK related packages. --- host/onebox.go | 6 +++--- temporal/fx.go | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/host/onebox.go b/host/onebox.go index 3333af3a1ba..d53d94842cf 100644 --- a/host/onebox.go +++ b/host/onebox.go @@ -39,7 +39,7 @@ import ( "go.temporal.io/api/operatorservice/v1" "go.temporal.io/api/workflowservice/v1" - sdktrace "go.opentelemetry.io/otel/sdk/trace" + otelsdktrace "go.opentelemetry.io/otel/sdk/trace" "go.temporal.io/server/api/adminservice/v1" "go.temporal.io/server/api/historyservice/v1" "go.temporal.io/server/client" @@ -129,7 +129,7 @@ type ( workerConfig *WorkerConfig mockAdminClient map[string]adminservice.AdminServiceClient namespaceReplicationTaskExecutor namespace.ReplicationTaskExecutor - spanExporters []sdktrace.SpanExporter + spanExporters []otelsdktrace.SpanExporter } // HistoryConfig contains configs for history service @@ -161,7 +161,7 @@ type ( WorkerConfig *WorkerConfig MockAdminClient map[string]adminservice.AdminServiceClient NamespaceReplicationTaskExecutor namespace.ReplicationTaskExecutor - SpanExporters []sdktrace.SpanExporter + SpanExporters []otelsdktrace.SpanExporter } ) diff --git a/temporal/fx.go b/temporal/fx.go index 8b0f9b9cc5d..cc2e72a2445 100644 --- a/temporal/fx.go +++ b/temporal/fx.go @@ -817,13 +817,13 @@ var TraceExportModule = fx.Options( // - []go.opentelemetry.io/otel/sdk/trace.BatchSpanProcessorOption // default: empty slice // - []go.opentelemetry.io/otel/sdk/trace.SpanProcessor -// default: wrap each sdktrace.SpanExporter with sdktrace.NewBatchSpanProcessor +// default: wrap each otelsdktrace.SpanExporter with otelsdktrace.NewBatchSpanProcessor // - *go.opentelemetry.io/otel/sdk/resource.Resource // default: resource.Default() augmented with the supplied serviceName // - []go.opentelemetry.io/otel/sdk/trace.TracerProviderOption -// default: the provided resource.Resource and each of the sdktrace.SpanExporter +// default: the provided resource.Resource and each of the otelsdktrace.SpanExporter // - go.opentelemetry.io/otel/trace.TracerProvider -// default: sdktrace.NewTracerProvider with each of the sdktrace.TracerProviderOption +// default: otelsdktrace.NewTracerProvider with each of the otelsdktrace.TracerProviderOption // - go.opentelemetry.io/otel/ppropagation.TextMapPropagator // default: propagation.TraceContext{} // - telemetry.ServerTraceInterceptor