Skip to content

Commit

Permalink
feat(tracing): Misc. OTel tracing improvements
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Henderson <dhenderson@gmail.com>
  • Loading branch information
hairyhenderson committed Oct 9, 2024
1 parent 453bdb1 commit bd6c73c
Show file tree
Hide file tree
Showing 9 changed files with 407 additions and 48 deletions.
169 changes: 144 additions & 25 deletions cmd/relayproxy/api/opentelemetry/otel.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,187 @@ package opentelemetry

import (
"context"
"net/url"
"fmt"
"os"
"strconv"
"time"

"github.com/thomaspoignant/go-feature-flag/cmd/relayproxy/config"
"go.opentelemetry.io/contrib/exporters/autoexport"
"go.opentelemetry.io/contrib/samplers/jaegerremote"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace/noop"
"go.uber.org/zap"
)

type OtelService struct {
otelTraceProvider *sdktrace.TracerProvider
otelExporter *otlptrace.Exporter
otelExporter sdktrace.SpanExporter
}

func NewOtelService() OtelService {
return OtelService{}
}

// Init the OpenTelemetry service
func (s *OtelService) Init(ctx context.Context, config config.Config) error {
// parsing the OpenTelemetry endpoint
u, err := url.Parse(config.OpenTelemetryOtlpEndpoint)
func (s *OtelService) Init(ctx context.Context, zapLog *zap.Logger, config config.Config) error {
// OTEL_SDK_DISABLED is not supported by the Go SDK, but is a standard env
// var defined by the OTel spec. We'll use it to disable the trace provider.
if disabled, _ := strconv.ParseBool(os.Getenv("OTEL_SDK_DISABLED")); disabled {
otel.SetTracerProvider(noop.NewTracerProvider())
return nil
}

// support the openTelemetryOtlpEndpoint config element
if config.OpenTelemetryOtlpEndpoint != "" &&
os.Getenv("OTEL_EXPORTER_OTLP_ENDPOINT") == "" {
os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", config.OpenTelemetryOtlpEndpoint)
}

exporter, err := autoexport.NewSpanExporter(ctx)
if err != nil {
return err
return fmt.Errorf("initializing OTel exporter: %w", err)

Check warning on line 47 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L47

Added line #L47 was not covered by tests
}

var opts []otlptracehttp.Option
if u.Scheme == "http" {
opts = append(opts, otlptracehttp.WithInsecure())
serviceName := "go-feature-flag"
if v := os.Getenv("OTEL_SERVICE_NAME"); v != "" {
serviceName = v

Check warning on line 52 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L52

Added line #L52 was not covered by tests
}
opts = append(opts, otlptracehttp.WithEndpoint(u.Host))
client := otlptracehttp.NewClient(opts...)

s.otelExporter, err = otlptrace.New(ctx, client)
sampler, err := initSampler(serviceName)
if err != nil {
return err
return fmt.Errorf("initializing OTel sampler: %w", err)

Check warning on line 57 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L57

Added line #L57 was not covered by tests
}

resource, err := initResource(ctx, serviceName, config.Version)
if err != nil {
return fmt.Errorf("initializing OTel resources: %w", err)

Check warning on line 62 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L62

Added line #L62 was not covered by tests
}

s.otelExporter = exporter
s.otelTraceProvider = sdktrace.NewTracerProvider(
sdktrace.WithSampler(sdktrace.AlwaysSample()),
sdktrace.WithBatcher(s.otelExporter),
sdktrace.WithResource(resource.NewSchemaless(
attribute.String("service.name", "go-feature-flag"),
attribute.String("service.version", config.Version),
)),
sdktrace.WithBatcher(exporter),
sdktrace.WithSampler(sampler),
sdktrace.WithResource(resource),
)

otel.SetTracerProvider(s.otelTraceProvider)

// log OTel errors to zap rather than the default log package
otel.SetErrorHandler(otelErrHandler(func(err error) {
zapLog.Error("OTel error", zap.Error(err))
}))

return nil
}

type otelErrHandler func(err error)

func (o otelErrHandler) Handle(err error) {
o(err)
}

var _ otel.ErrorHandler = otelErrHandler(nil)

func initResource(ctx context.Context, serviceName, version string) (*resource.Resource, error) {
return resource.New(ctx,
resource.WithFromEnv(),
resource.WithProcessPID(),
resource.WithProcessExecutableName(),
resource.WithProcessExecutablePath(),
resource.WithProcessOwner(),
resource.WithProcessRuntimeName(),
resource.WithProcessRuntimeVersion(),
resource.WithProcessRuntimeDescription(),
resource.WithHost(),
resource.WithTelemetrySDK(),
resource.WithOS(),
resource.WithContainer(),
resource.WithAttributes(
semconv.ServiceNameKey.String(serviceName),
semconv.ServiceVersionKey.String(version),
),
)
}

// initSampler determines which sampling strategy to use. If OTEL_TRACES_SAMPLER
// is unset, we'll always sample.
// If it's set to jaeger_remote, we'll use the Jaeger sampling server (supports
// JAEGER_SAMPLER_MANAGER_HOST_PORT, JAEGER_SAMPLER_REFRESH_INTERVAL, and
// JAEGER_SAMPLER_MAX_OPERATIONS).
// If it's set to any other value, we return nil and sdktrace.NewTracerProvider
// will set up the initSampler from the environment.
func initSampler(serviceName string) (sdktrace.Sampler, error) {
sampler, ok := os.LookupEnv("OTEL_TRACES_SAMPLER")
if !ok {
return sdktrace.AlwaysSample(), nil
}

if sampler != "jaeger_remote" {
return nil, nil
}

samplerURL, samplerRefreshInterval, maxOperations, err := jaegerRemoteSamplerOpts()
if err != nil {
return nil, err

Check warning on line 130 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L130

Added line #L130 was not covered by tests
}

return jaegerremote.New(
serviceName,
jaegerremote.WithSamplingServerURL(samplerURL),
jaegerremote.WithSamplingRefreshInterval(samplerRefreshInterval),
jaegerremote.WithMaxOperations(maxOperations),
jaegerremote.WithInitialSampler(sdktrace.AlwaysSample()),
), nil
}

const (
defaultSamplerURL = "http://localhost:5778/sampling"
defaultSamplingRefreshInterval = 1 * time.Minute
defaultSamplingMaxOperations = 256
)

func jaegerRemoteSamplerOpts() (string, time.Duration, int, error) {
samplerURL := defaultSamplerURL
if v := os.Getenv("JAEGER_SAMPLER_MANAGER_HOST_PORT"); v != "" {
samplerURL = v
}

samplerRefreshInterval := defaultSamplingRefreshInterval
if v := os.Getenv("JAEGER_SAMPLER_REFRESH_INTERVAL"); v != "" {
d, err := time.ParseDuration(v)
if err != nil {
return "", 0, 0, fmt.Errorf("parsing JAEGER_SAMPLER_REFRESH_INTERVAL: %w", err)
}
samplerRefreshInterval = d
}

maxOperations := defaultSamplingMaxOperations
if v := os.Getenv("JAEGER_SAMPLER_MAX_OPERATIONS"); v != "" {
i, err := strconv.Atoi(v)
if err != nil {
return "", 0, 0, fmt.Errorf("parsing JAEGER_SAMPLER_MAX_OPERATIONS: %w", err)
}
maxOperations = i
}

return samplerURL, samplerRefreshInterval, maxOperations, nil
}

// Stop the OpenTelemetry service
func (s *OtelService) Stop() error {
func (s *OtelService) Stop(ctx context.Context) error {

Check warning on line 176 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L176

Added line #L176 was not covered by tests
if s.otelExporter != nil {
err := s.otelExporter.Shutdown(context.Background())
err := s.otelExporter.Shutdown(ctx)

Check warning on line 178 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L178

Added line #L178 was not covered by tests
if err != nil {
return err
}
}

if s.otelTraceProvider != nil {
err := s.otelTraceProvider.Shutdown(context.Background())
err := s.otelTraceProvider.Shutdown(ctx)

Check warning on line 185 in cmd/relayproxy/api/opentelemetry/otel.go

View check run for this annotation

Codecov / codecov/patch

cmd/relayproxy/api/opentelemetry/otel.go#L185

Added line #L185 was not covered by tests
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit bd6c73c

Please sign in to comment.