diff --git a/CHANGELOG.md b/CHANGELOG.md index 73b25f5..ce711da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ The following emojis are used to highlight certain changes: ### Added +- Tracing per request with auth header (see `RAINBOW_TRACING_AUTH`) or a fraction of requests (see `RAINBOW_SAMPLING_FRACTION`) + ### Changed - go-libp2p 0.35 diff --git a/docs/environment-variables.md b/docs/environment-variables.md index 977bad5..6061bdc 100644 --- a/docs/environment-variables.md +++ b/docs/environment-variables.md @@ -30,6 +30,8 @@ - [Testing](#testing) - [`GATEWAY_CONFORMANCE_TEST`](#gateway_conformance_test) - [`IPFS_NS_MAP`](#ipfs_ns_map) +- [Tracing](#tracing) + - [`RAINBOW_TRACING_AUTH`](#rainbow_tracing_auth) ## Configuration @@ -289,3 +291,33 @@ $ IPFS_NS_MAP="dnslink-test1.example.com:/ipfs/bafkreicysg23kiwv34eg2d7qweipxwos $ curl -is http://127.0.0.1:8081/dnslink-test2.example.com/ | grep Etag Etag: "bafkreicysg23kiwv34eg2d7qweipxwosdo2py4ldv42nbauguluen5v6am" ``` + +## Tracing + +Tracing across the stack follows, as much as possible, the [Open Telemetry] +specifications. Configuration environment variables are specified in the +[OpenTelemetry Environment Variable Specification] where possible. The +[Boxo Tracing] documentation is the basis for tracing here. + +A major distinction from the more general tracing enabled in boxo is that when +tracing is enabled it is restricted to flows through HTTP Gateway requests, rather +than also included background processes. + +Note: requests are also traced when there is a `Traceparent` header passed that is valid +According to the [Trace Context] specification, even if the sampling fraction is set to 0. + +### `RAINBOW_TRACING_AUTH` + +The ability to pass `Traceparent` or `Tracestate` headers is guarded by an +`Authorization` header. The value of the `Authorization` header should match +the value in the `RAINBOW_TRACING_AUTH` environment variable. + +### `RAINBOW_SAMPLING_FRACTION` + +The fraction (between 0 and 1) of requests that should be sampled. +This is calculated independently of any Traceparent based sampling. + +[Boxo Tracing]: https://github.com/ipfs/boxo/blob/main/docs/tracing.md +[Open Telemetry]: https://opentelemetry.io/ +[OpenTelemetry Environment Variable Specification]: https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/sdk-environment-variables.md +[Trace Context]: https://www.w3.org/TR/trace-context diff --git a/docs/headers.md b/docs/headers.md new file mode 100644 index 0000000..42fe4de --- /dev/null +++ b/docs/headers.md @@ -0,0 +1,9 @@ +## `Authorization` + +Optional request header that guards per-request tracing features. + +See [`RAINBOW_TRACING_AUTH`](./environment-variables.md#rainbow_tracing_auth) + +## `Traceparent` + +See [`RAINBOW_TRACING_AUTH`](./environment-variables.md#rainbow_tracing_auth) diff --git a/go.mod b/go.mod index b1d46c7..f91aa38 100644 --- a/go.mod +++ b/go.mod @@ -24,7 +24,7 @@ require ( github.com/libp2p/go-libp2p v0.35.1 github.com/libp2p/go-libp2p-kad-dht v0.25.2 github.com/libp2p/go-libp2p-record v0.2.0 - github.com/libp2p/go-libp2p-routing-helpers v0.7.3 + github.com/libp2p/go-libp2p-routing-helpers v0.7.4 github.com/libp2p/go-libp2p-testing v0.12.0 github.com/mitchellh/go-server-timing v1.0.1 github.com/mr-tron/base58 v1.2.0 diff --git a/go.sum b/go.sum index 82eaeb2..4858555 100644 --- a/go.sum +++ b/go.sum @@ -391,8 +391,8 @@ github.com/libp2p/go-libp2p-kbucket v0.6.3/go.mod h1:RCseT7AH6eJWxxk2ol03xtP9pEH github.com/libp2p/go-libp2p-peerstore v0.1.4/go.mod h1:+4BDbDiiKf4PzpANZDAT+knVdLxvqh7hXOujessqdzs= github.com/libp2p/go-libp2p-record v0.2.0 h1:oiNUOCWno2BFuxt3my4i1frNrt7PerzB3queqa1NkQ0= github.com/libp2p/go-libp2p-record v0.2.0/go.mod h1:I+3zMkvvg5m2OcSdoL0KPljyJyvNDFGKX7QdlpYUcwk= -github.com/libp2p/go-libp2p-routing-helpers v0.7.3 h1:u1LGzAMVRK9Nqq5aYDVOiq/HaB93U9WWczBzGyAC5ZY= -github.com/libp2p/go-libp2p-routing-helpers v0.7.3/go.mod h1:cN4mJAD/7zfPKXBcs9ze31JGYAZgzdABEm+q/hkswb8= +github.com/libp2p/go-libp2p-routing-helpers v0.7.4 h1:6LqS1Bzn5CfDJ4tzvP9uwh42IB7TJLNFJA6dEeGBv84= +github.com/libp2p/go-libp2p-routing-helpers v0.7.4/go.mod h1:we5WDj9tbolBXOuF1hGOkR+r7Uh1408tQbAKaT5n1LE= github.com/libp2p/go-libp2p-testing v0.12.0 h1:EPvBb4kKMWO29qP4mZGyhVzUyR25dvfUIK5WDu6iPUA= github.com/libp2p/go-libp2p-testing v0.12.0/go.mod h1:KcGDRXyN7sQCllucn1cOOS+Dmm7ujhfEyXQL5lvkcPg= github.com/libp2p/go-libp2p-xor v0.1.0 h1:hhQwT4uGrBcuAkUGXADuPltalOdpf9aag9kaYNT2tLA= diff --git a/handlers.go b/handlers.go index 9040649..ebddfb8 100644 --- a/handlers.go +++ b/handlers.go @@ -84,7 +84,7 @@ func withRequestLogger(next http.Handler) http.Handler { }) } -func setupGatewayHandler(cfg Config, nd *Node) (http.Handler, error) { +func setupGatewayHandler(cfg Config, nd *Node, tracingAuth string) (http.Handler, error) { var ( backend gateway.IPFSBackend err error @@ -208,6 +208,20 @@ func setupGatewayHandler(cfg Config, nd *Node) (http.Handler, error) { // Add tracing. handler = otelhttp.NewHandler(handler, "Gateway") + // Remove tracing headers if not authorized + prevHandler := handler + handler = http.HandlerFunc(func(writer http.ResponseWriter, request *http.Request) { + if request.Header.Get("Authorization") != tracingAuth { + if request.Header.Get("Traceparent") != "" { + request.Header.Del("Traceparent") + } + if request.Header.Get("Tracestate") != "" { + request.Header.Del("Tracestate") + } + } + prevHandler.ServeHTTP(writer, request) + }) + return handler, nil } diff --git a/main.go b/main.go index b5ac77f..84f3d2e 100644 --- a/main.go +++ b/main.go @@ -297,6 +297,18 @@ Generate an identity seed and launch a gateway: EnvVars: []string{"RAINBOW_LIBP2P_LISTEN_ADDRS"}, Usage: "Multiaddresses for libp2p bitswap client to listen on (comma-separated)", }, + &cli.StringFlag{ + Name: "tracing-auth", + Value: "", + EnvVars: []string{"RAINBOW_TRACING_AUTH"}, + Usage: "If set the key gates use of the Traceparent header by requiring the key to be passed in the Authorization header", + }, + &cli.Float64Flag{ + Name: "sampling-fraction", + Value: 0, + EnvVars: []string{"RAINBOW_SAMPLING_FRACTION"}, + Usage: "Rate at which to sample gateway requests. Does not include traceheaders which will always sample", + }, } app.Commands = []*cli.Command{ @@ -459,7 +471,8 @@ share the same seed as long as the indexes are different. gatewayListen := cctx.String("gateway-listen-address") ctlListen := cctx.String("ctl-listen-address") - handler, err := setupGatewayHandler(cfg, gnd) + tracingAuth := cctx.String("tracing-auth") + handler, err := setupGatewayHandler(cfg, gnd, tracingAuth) if err != nil { return err } @@ -480,7 +493,7 @@ share the same seed as long as the indexes are different. registerVersionMetric(version) registerIpfsNodeCollector(gnd) - tp, shutdown, err := newTracerProvider(cctx.Context) + tp, shutdown, err := newTracerProvider(cctx.Context, cctx.Float64("sampling-fraction")) if err != nil { return err } diff --git a/main_test.go b/main_test.go index 33c0354..a4eb39b 100644 --- a/main_test.go +++ b/main_test.go @@ -71,7 +71,7 @@ func mustTestNodeWithKey(t *testing.T, cfg Config, sk ic.PrivKey) *Node { func mustTestServer(t *testing.T, cfg Config) (*httptest.Server, *Node) { nd := mustTestNode(t, cfg) - handler, err := setupGatewayHandler(cfg, nd) + handler, err := setupGatewayHandler(cfg, nd, "") if err != nil { require.NoError(t, err) } diff --git a/tracing.go b/tracing.go index 9f9db17..94c74ac 100644 --- a/tracing.go +++ b/tracing.go @@ -2,16 +2,16 @@ package main import ( "context" - "github.com/ipfs/boxo/tracing" "go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.4.0" traceapi "go.opentelemetry.io/otel/trace" tracenoop "go.opentelemetry.io/otel/trace/noop" + "strings" ) -func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(context.Context) error, error) { +func newTracerProvider(ctx context.Context, traceFraction float64) (traceapi.TracerProvider, func(context.Context) error, error) { exporters, err := tracing.NewSpanExporters(ctx) if err != nil { return nil, nil, err @@ -37,8 +37,57 @@ func newTracerProvider(ctx context.Context) (traceapi.TracerProvider, func(conte if err != nil { return nil, nil, err } - options = append(options, trace.WithResource(r)) + + var baseSampler trace.Sampler + if traceFraction == 0 { + baseSampler = trace.NeverSample() + } else { + baseSampler = trace.TraceIDRatioBased(traceFraction) + } + + // Sample all children whose parents are sampled + // Probabilistically sample if the span is a root which is a Gateway request + sampler := trace.ParentBased( + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return !traceapi.SpanContextFromContext(parameters.ParentContext).IsValid() + }, "root sampler", + CascadingSamplerFunc(func(parameters trace.SamplingParameters) bool { + return strings.HasPrefix(parameters.Name, "Gateway") + }, "gateway request sampler", + baseSampler))) + options = append(options, trace.WithResource(r), trace.WithSampler(sampler)) tp := trace.NewTracerProvider(options...) return tp, tp.Shutdown, nil } + +type funcSampler struct { + next trace.Sampler + fn func(trace.SamplingParameters) trace.SamplingResult + description string +} + +func (f funcSampler) ShouldSample(parameters trace.SamplingParameters) trace.SamplingResult { + return f.fn(parameters) +} + +func (f funcSampler) Description() string { + return f.description +} + +// CascadingSamplerFunc will sample with the next tracer if the condition is met, otherwise the sample will be dropped +func CascadingSamplerFunc(shouldSample func(parameters trace.SamplingParameters) bool, description string, next trace.Sampler) trace.Sampler { + return funcSampler{ + next: next, + fn: func(parameters trace.SamplingParameters) trace.SamplingResult { + if shouldSample(parameters) { + return next.ShouldSample(parameters) + } + return trace.SamplingResult{ + Decision: trace.Drop, + Tracestate: traceapi.SpanContextFromContext(parameters.ParentContext).TraceState(), + } + }, + description: description, + } +}