Skip to content

Commit

Permalink
Add otel gRPC tracing support
Browse files Browse the repository at this point in the history
The feature is disabled by default and hidden behind a the flag
--enable-otel-tracing. When enabled, all the gRPC calls made by the
driver will be instrumented and can be forwarded to an opentelemetry-
compatible collector.
The configuration of opentelemetry tracing can be done in the chart with
values `controller.otelTracing` and `node.otelTracing`. The service name
and the endpoint that will be used by the sdk can be configured with
`otelServiceName` and `otelExporterEndpoint` for both components.
  • Loading branch information
Fricounet committed Aug 16, 2023
1 parent 122ab67 commit 85f0103
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 10 deletions.
9 changes: 9 additions & 0 deletions charts/aws-ebs-csi-driver/templates/controller.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ spec:
{{- with .Values.controller.userAgentExtra }}
- --user-agent-extra={{ . }}
{{- end }}
{{- if .Values.controller.otelTracing }}
- --enable-otel-tracing=true
{{- end}}
- --v={{ .Values.controller.logLevel }}
{{- range .Values.controller.additionalArgs }}
- {{ . }}
Expand Down Expand Up @@ -134,6 +137,12 @@ spec:
{{- with .Values.controller.env }}
{{- . | toYaml | nindent 12 }}
{{- end }}
{{- with .Values.controller.otelTracing }}
- name: OTEL_SERVICE_NAME
value: {{ .otelServiceName }}
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: {{ .otelExporterEndpoint }}
{{- end }}
{{- with .Values.controller.envFrom }}
envFrom:
{{- . | toYaml | nindent 12 }}
Expand Down
9 changes: 9 additions & 0 deletions charts/aws-ebs-csi-driver/templates/node-windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ spec:
- --logging-format={{ . }}
{{- end }}
- --v={{ .Values.node.logLevel }}
{{- if .Values.node.otelTracing }}
- --enable-otel-tracing=true
{{- end}}
env:
- name: CSI_ENDPOINT
value: unix:/csi/csi.sock
Expand All @@ -68,6 +71,12 @@ spec:
{{- if .Values.proxy.http_proxy }}
{{- include "aws-ebs-csi-driver.http-proxy" . | nindent 12 }}
{{- end }}
{{- with .Values.node.otelTracing }}
- name: OTEL_SERVICE_NAME
value: {{ .otelServiceName }}
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: {{ .otelExporterEndpoint }}
{{- end }}
{{- with .Values.node.env }}
{{- . | toYaml | nindent 12 }}
{{- end }}
Expand Down
17 changes: 13 additions & 4 deletions charts/aws-ebs-csi-driver/templates/node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ spec:
operator: "Exists"
{{- end }}
{{- with .Values.node.securityContext }}
securityContext:
securityContext:
{{- toYaml . | nindent 8 }}
{{- end }}
containers:
Expand All @@ -64,6 +64,9 @@ spec:
- --logging-format={{ . }}
{{- end }}
- --v={{ .Values.node.logLevel }}
{{- if .Values.node.otelTracing }}
- --enable-otel-tracing=true
{{- end}}
env:
- name: CSI_ENDPOINT
value: unix:/csi/csi.sock
Expand All @@ -74,6 +77,12 @@ spec:
{{- if .Values.proxy.http_proxy }}
{{- include "aws-ebs-csi-driver.http-proxy" . | nindent 12 }}
{{- end }}
{{- with .Values.node.otelTracing }}
- name: OTEL_SERVICE_NAME
value: {{ .otelServiceName }}
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: {{ .otelExporterEndpoint }}
{{- end }}
{{- with .Values.node.env }}
{{- . | toYaml | nindent 12 }}
{{- end }}
Expand Down Expand Up @@ -109,7 +118,7 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.node.containerSecurityContext }}
securityContext:
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
- name: node-driver-registrar
Expand Down Expand Up @@ -158,7 +167,7 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.sidecars.nodeDriverRegistrar.securityContext }}
securityContext:
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
- name: liveness-probe
Expand All @@ -181,7 +190,7 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.sidecars.livenessProbe.securityContext }}
securityContext:
securityContext:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.imagePullSecrets }}
Expand Down
8 changes: 8 additions & 0 deletions charts/aws-ebs-csi-driver/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,10 @@ controller:
# - name: wait
# image: busybox
# command: [ 'sh', '-c', "sleep 20" ]
# Enable opentelemetry tracing for the plugin running on the daemonset
otelTracing: {}
# otelServiceName: ebs-csi-controller
# otelExporterEndpoint: "http://localhost:4317"

node:
env: []
Expand Down Expand Up @@ -354,6 +358,10 @@ node:
containerSecurityContext:
readOnlyRootFilesystem: true
privileged: true
# Enable opentelemetry tracing for the plugin running on the daemonset
otelTracing: {}
# otelServiceName: ebs-csi-node
# otelExporterEndpoint: "http://localhost:4317"

storageClasses: []
# Add StorageClass resources like:
Expand Down
20 changes: 20 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@ limitations under the License.
package main

import (
"context"
"net/http"
"time"

flag "github.com/spf13/pflag"

Expand All @@ -39,6 +41,23 @@ func main() {

options := GetOptions(fs)

// Start tracing as soon as possible
if options.ServerOptions.EnableOtelTracing {
exporter, err := driver.InitOtelTracing()
if err != nil {
klog.ErrorS(err, "failed to initialize otel tracing")
klog.FlushAndExit(klog.ExitFlushTimeout, 1)
}
// Exporter will flush traces on shutdown
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := exporter.Shutdown(ctx); err != nil {
klog.ErrorS(err, "could not shutdown otel exporter")
}
}()
}

cloud.RegisterMetrics()
if options.ServerOptions.HttpEndpoint != "" {
mux := http.NewServeMux()
Expand All @@ -62,6 +81,7 @@ func main() {
driver.WithAwsSdkDebugLog(options.ControllerOptions.AwsSdkDebugLog),
driver.WithWarnOnInvalidTag(options.ControllerOptions.WarnOnInvalidTag),
driver.WithUserAgentExtra(options.ControllerOptions.UserAgentExtra),
driver.WithOtelTracing(options.ServerOptions.EnableOtelTracing),
)
if err != nil {
klog.ErrorS(err, "failed to create driver")
Expand Down
3 changes: 3 additions & 0 deletions cmd/options/server_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,12 @@ type ServerOptions struct {
Endpoint string
// HttpEndpoint is the endpoint that the HTTP server for metrics should listen on.
HttpEndpoint string
// EnableOtelTracing enables opentelemetry tracing.
EnableOtelTracing bool
}

func (s *ServerOptions) AddFlags(fs *flag.FlagSet) {
fs.StringVar(&s.Endpoint, "endpoint", driver.DefaultCSIEndpoint, "Endpoint for the CSI driver server")
fs.StringVar(&s.HttpEndpoint, "http-endpoint", "", "The TCP network address where the HTTP server for metrics will listen (example: `:8080`). The default is empty string, which means the server is disabled.")
fs.BoolVar(&s.EnableOtelTracing, "enable-otel-tracing", false, "To enable opentelemetry tracing for the driver. The tracing is disabled by default. Configure the exporter endpoint with OTEL_EXPORTER_OTLP_ENDPOINT and other env variables, see https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration.")
}
7 changes: 7 additions & 0 deletions cmd/options_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,16 @@ func TestGetOptions(t *testing.T) {
var VolumeAttachLimit int64 = 42
userAgentExtraFlag := "user-agent-extra"
userAgentExtraFlagValue := "test"
otelTracingFlagName := "enable-otel-tracing"
otelTracingFlagValue := true

args := append([]string{
"aws-ebs-csi-driver",
}, additionalArgs...)

if withServerOptions {
args = append(args, "--"+endpointFlagName+"="+endpoint)
args = append(args, "--"+otelTracingFlagName+"="+strconv.FormatBool(otelTracingFlagValue))
}
if withControllerOptions {
args = append(args, "--"+extraTagsFlagName+"="+extraTagKey+"="+extraTagValue)
Expand All @@ -83,6 +86,10 @@ func TestGetOptions(t *testing.T) {
if options.ServerOptions.Endpoint != endpoint {
t.Fatalf("expected endpoint to be %q but it is %q", endpoint, options.ServerOptions.Endpoint)
}
otelTracingFlag := flagSet.Lookup(otelTracingFlagName)
if otelTracingFlag == nil {
t.Fatalf("expected %q flag to be added but it is not", otelTracingFlagName)
}
}

if withControllerOptions {
Expand Down
2 changes: 2 additions & 0 deletions docs/options.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Driver Options

There are a couple of driver options that can be passed as arguments when starting the driver container.

| Option argument | value sample | default | Description |
Expand All @@ -10,3 +11,4 @@ There are a couple of driver options that can be passed as arguments when starti
| aws-sdk-debug-log | true | false | If set to true, the driver will enable the aws sdk debug log level|
| logging-format | json | text | Sets the log format. Permitted formats: text, json|
| user-agent-extra | csi-ebs | helm | Extra string appended to user agent|
| enable-otel-tracing | true | false | If set to true, the driver will enable opentelemetry tracing. Might need [additional env variables](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration) to export the traces to the right collector|
10 changes: 5 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ require (
github.com/onsi/gomega v1.27.8
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.8.4
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.1
go.opentelemetry.io/otel v1.15.1
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.15.1
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.15.1
go.opentelemetry.io/otel/sdk v1.15.1
golang.org/x/sys v0.11.0
google.golang.org/grpc v1.56.2
google.golang.org/protobuf v1.31.0
Expand Down Expand Up @@ -89,14 +94,9 @@ require (
go.etcd.io/etcd/api/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.9 // indirect
go.etcd.io/etcd/client/v3 v3.5.9 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.41.1 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.41.1 // indirect
go.opentelemetry.io/otel v1.15.1 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.15.1 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.15.1 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.15.1 // indirect
go.opentelemetry.io/otel/metric v0.38.1 // indirect
go.opentelemetry.io/otel/sdk v1.15.1 // indirect
go.opentelemetry.io/otel/trace v1.15.1 // indirect
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
go.uber.org/atomic v1.11.0 // indirect
Expand Down
16 changes: 15 additions & 1 deletion pkg/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/awslabs/volume-modifier-for-k8s/pkg/rpc"
csi "github.com/container-storage-interface/spec/lib/go/csi"
"github.com/kubernetes-sigs/aws-ebs-csi-driver/pkg/util"
"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
"google.golang.org/grpc"
"k8s.io/klog/v2"
)
Expand Down Expand Up @@ -69,6 +70,7 @@ type DriverOptions struct {
awsSdkDebugLog bool
warnOnInvalidTag bool
userAgentExtra string
otelTracing bool
}

func NewDriver(options ...func(*DriverOptions)) (*Driver, error) {
Expand Down Expand Up @@ -123,8 +125,14 @@ func (d *Driver) Run() error {
}
return resp, err
}

grpcInterceptor := grpc.UnaryInterceptor(logErr)
if d.options.otelTracing {
grpcInterceptor = grpc.ChainUnaryInterceptor(logErr, otelgrpc.UnaryServerInterceptor())
}

opts := []grpc.ServerOption{
grpc.UnaryInterceptor(logErr),
grpcInterceptor,
}
d.srv = grpc.NewServer(opts...)

Expand Down Expand Up @@ -208,3 +216,9 @@ func WithUserAgentExtra(userAgentExtra string) func(*DriverOptions) {
o.userAgentExtra = userAgentExtra
}
}

func WithOtelTracing(enableOtelTracing bool) func(*DriverOptions) {
return func(o *DriverOptions) {
o.otelTracing = enableOtelTracing
}
}
9 changes: 9 additions & 0 deletions pkg/driver/driver_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,12 @@ func TestWithUserAgentExtra(t *testing.T) {
t.Fatalf("expected userAgentExtra option got set to %s but is set to %s", userAgentExtra, options.userAgentExtra)
}
}

func TestWithOtelTracing(t *testing.T) {
var enableOtelTracing bool = true
options := &DriverOptions{}
WithOtelTracing(enableOtelTracing)(options)
if options.otelTracing != enableOtelTracing {
t.Fatalf("expected otelTracing option got set to %v but is set to %v", enableOtelTracing, options.otelTracing)
}
}
56 changes: 56 additions & 0 deletions pkg/driver/trace.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package driver

import (
"context"
"fmt"

"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
"k8s.io/klog/v2"
)

func InitOtelTracing() (*otlptrace.Exporter, error) {
// Setup OTLP exporter
ctx := context.Background()
exporter, err := otlptracegrpc.New(ctx)
if err != nil {
return nil, fmt.Errorf("failed to create the OTLP exporter: %w", err)
}

// Resource will auto populate spans with common attributes
resource, err := resource.New(ctx,
resource.WithFromEnv(), // pull attributes from OTEL_RESOURCE_ATTRIBUTES and OTEL_SERVICE_NAME environment variables
resource.WithProcess(),
resource.WithOS(),
resource.WithContainer(),
resource.WithHost(),
)
if err != nil {
klog.ErrorS(err, "failed to create the OTLP resource, spans will lack some metadata")
}

// Create a trace provider with the exporter.
// Use propagator and sampler defined in environment variables.
traceProvider := trace.NewTracerProvider(trace.WithBatcher(exporter), trace.WithResource(resource))

// Register the trace provider as global.
otel.SetTracerProvider(traceProvider)

return exporter, nil
}

0 comments on commit 85f0103

Please sign in to comment.