Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[exporter/kafkaexporter] feat/partition by trace #29660

Merged
merged 9 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .chloggen/kafka-exporter-key-by-traceid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
component: kafkaexporter

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: add ability to publish kafka messages with message key of TraceID - it will allow partitioning of the kafka Topic.

# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
issues: [12318]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# If your change doesn't affect end users or the exported elements of any package,
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user, api]
1 change: 1 addition & 0 deletions exporter/kafkaexporter/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The following settings can be optionally configured:
- `zipkin_json`: the payload is serialized to Zipkin v2 JSON Span.
- The following encodings are valid *only* for **logs**.
- `raw`: if the log record body is a byte array, it is sent as is. Otherwise, it is serialized to JSON. Resource and record attributes are discarded.
- `partition_traces_by_id` (default = false): configures the exporter to include the trace ID as the message key in trace messages sent to kafka. *Please note:* this setting does not have any effect on Jaeger encoding exporters since Jaeger exporters include trace ID as the message key by default.
- `auth`
- `plain_text`
- `username`: The username to use.
Expand Down
5 changes: 5 additions & 0 deletions exporter/kafkaexporter/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ type Config struct {
// Encoding of messages (default "otlp_proto")
Encoding string `mapstructure:"encoding"`

// PartitionTracesByID sets the message key of outgoing trace messages to the trace ID.
// Please note: does not have any effect on Jaeger encoding exporters since Jaeger exporters include
// trace ID as the message key by default.
PartitionTracesByID bool `mapstructure:"partition_traces_by_id"`

// Metadata is the namespace for metadata management properties used by the
// Client, and shared by the Producer/Consumer.
Metadata Metadata `mapstructure:"metadata"`
Expand Down
15 changes: 9 additions & 6 deletions exporter/kafkaexporter/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,10 @@ func TestLoadConfig(t *testing.T) {
NumConsumers: 2,
QueueSize: 10,
},
Topic: "spans",
Encoding: "otlp_proto",
Brokers: []string{"foo:123", "bar:456"},
Topic: "spans",
Encoding: "otlp_proto",
PartitionTracesByID: true,
Brokers: []string{"foo:123", "bar:456"},
Authentication: kafka.Authentication{
PlainText: &kafka.PlainTextConfig{
Username: "jdoe",
Expand Down Expand Up @@ -106,9 +107,10 @@ func TestLoadConfig(t *testing.T) {
NumConsumers: 2,
QueueSize: 10,
},
Topic: "spans",
Encoding: "otlp_proto",
Brokers: []string{"foo:123", "bar:456"},
Topic: "spans",
Encoding: "otlp_proto",
PartitionTracesByID: true,
Brokers: []string{"foo:123", "bar:456"},
Authentication: kafka.Authentication{
PlainText: &kafka.PlainTextConfig{
Username: "jdoe",
Expand Down Expand Up @@ -159,6 +161,7 @@ func TestLoadConfig(t *testing.T) {
},
Topic: "spans",
Encoding: "otlp_proto",
PartitionTracesByID: true,
Brokers: []string{"foo:123", "bar:456"},
ResolveCanonicalBootstrapServersOnly: true,
Authentication: kafka.Authentication{
Expand Down
3 changes: 3 additions & 0 deletions exporter/kafkaexporter/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/jaegertracing/jaeger v1.51.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/coreinternal v0.91.0
github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka v0.91.0
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/batchpersignal v0.90.1
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger v0.91.0
github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/zipkin v0.91.0
github.com/openzipkin/zipkin-go v0.4.2
Expand Down Expand Up @@ -84,6 +85,8 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/corei

replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka => ../../internal/kafka

replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/batchpersignal => ../../pkg/batchpersignal

replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger => ../../pkg/translator/jaeger

retract (
Expand Down
5 changes: 5 additions & 0 deletions exporter/kafkaexporter/kafka_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,11 @@ func newTracesExporter(config Config, set exporter.CreateSettings, marshalers ma
if marshaler == nil {
return nil, errUnrecognizedEncoding
}
if config.PartitionTracesByID {
if keyableMarshaler, ok := marshaler.(KeyableTracesMarshaler); ok {
keyableMarshaler.Key()
}
}
producer, err := newSaramaProducer(config)
if err != nil {
return nil, err
Expand Down
224 changes: 208 additions & 16 deletions exporter/kafkaexporter/marshaler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"testing"
"time"

"github.com/IBM/sarama"
zipkin "github.com/openzipkin/zipkin-go/model"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -85,6 +86,8 @@ func TestOTLPTracesJsonMarshaling(t *testing.T) {
ils := rs.ScopeSpans().At(0)
ils.SetSchemaUrl(conventions.SchemaURL)
ils.Spans().AppendEmpty()
ils.Spans().AppendEmpty()
ils.Spans().AppendEmpty()

span := ils.Spans().At(0)
span.SetKind(ptrace.SpanKindServer)
Expand All @@ -95,9 +98,27 @@ func TestOTLPTracesJsonMarshaling(t *testing.T) {
span.SetSpanID([8]byte{0, 1, 2, 3, 4, 5, 6, 7})
span.SetParentSpanID([8]byte{8, 9, 10, 11, 12, 13, 14})

span = ils.Spans().At(1)
span.SetKind(ptrace.SpanKindClient)
span.SetName("bar")
span.SetStartTimestamp(pcommon.NewTimestampFromTime(now))
span.SetEndTimestamp(pcommon.NewTimestampFromTime(now.Add(time.Second)))
span.SetTraceID([16]byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16})
span.SetSpanID([8]byte{15, 16, 17, 18, 19, 20, 21})
span.SetParentSpanID([8]byte{0, 1, 2, 3, 4, 5, 6, 7})

span = ils.Spans().At(2)
span.SetKind(ptrace.SpanKindServer)
span.SetName("baz")
span.SetStartTimestamp(pcommon.NewTimestampFromTime(now))
span.SetEndTimestamp(pcommon.NewTimestampFromTime(now.Add(time.Second)))
span.SetTraceID([16]byte{17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32})
span.SetSpanID([8]byte{22, 23, 24, 25, 26, 27, 28})
span.SetParentSpanID([8]byte{29, 30, 31, 32, 33, 34, 35, 36})

// Since marshaling json is not guaranteed to be in order
// within a string, using a map to compare that the expected values are there
otlpJSON := map[string]any{
unkeyedOtlpJSON := map[string]any{
"resourceSpans": []any{
map[string]any{
"resource": map[string]any{},
Expand All @@ -115,6 +136,95 @@ func TestOTLPTracesJsonMarshaling(t *testing.T) {
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"spanId": "0f10111213141500",
"parentSpanId": "0001020304050607",
"name": "bar",
"kind": float64(ptrace.SpanKindClient),
"startTimeUnixNano": fmt.Sprint(now.UnixNano()),
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
map[string]any{
"traceId": "1112131415161718191a1b1c1d1e1f20",
"spanId": "161718191a1b1c00",
"parentSpanId": "1d1e1f2021222324",
"name": "baz",
"kind": float64(ptrace.SpanKindServer),
"startTimeUnixNano": fmt.Sprint(now.UnixNano()),
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
},
"schemaUrl": conventions.SchemaURL,
},
},
"schemaUrl": conventions.SchemaURL,
},
},
}

unkeyedOtlpJSONResult := make([]any, 1)
unkeyedOtlpJSONResult[0] = unkeyedOtlpJSON

keyedOtlpJSON1 := map[string]any{
"resourceSpans": []any{
map[string]any{
"resource": map[string]any{},
"scopeSpans": []any{
map[string]any{
"scope": map[string]any{},
"spans": []any{
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"spanId": "0001020304050607",
"parentSpanId": "08090a0b0c0d0e00",
"name": "foo",
"kind": float64(ptrace.SpanKindServer),
"startTimeUnixNano": fmt.Sprint(now.UnixNano()),
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"spanId": "0f10111213141500",
"parentSpanId": "0001020304050607",
"name": "bar",
"kind": float64(ptrace.SpanKindClient),
"startTimeUnixNano": fmt.Sprint(now.UnixNano()),
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
},
"schemaUrl": conventions.SchemaURL,
},
},
"schemaUrl": conventions.SchemaURL,
},
},
}

unkeyedMessageKey := []sarama.Encoder{nil}

keyedOtlpJSON2 := map[string]any{
"resourceSpans": []any{
map[string]any{
"resource": map[string]any{},
"scopeSpans": []any{
map[string]any{
"scope": map[string]any{},
"spans": []any{
map[string]any{
"traceId": "1112131415161718191a1b1c1d1e1f20",
"spanId": "161718191a1b1c00",
"parentSpanId": "1d1e1f2021222324",
"name": "baz",
"kind": float64(ptrace.SpanKindServer),
"startTimeUnixNano": fmt.Sprint(now.UnixNano()),
"endTimeUnixNano": fmt.Sprint(now.Add(time.Second).UnixNano()),
"status": map[string]any{},
},
},
"schemaUrl": conventions.SchemaURL,
},
Expand All @@ -124,7 +234,13 @@ func TestOTLPTracesJsonMarshaling(t *testing.T) {
},
}

zipkinJSON := []any{
keyedOtlpJSONResult := make([]any, 2)
keyedOtlpJSONResult[0] = keyedOtlpJSON1
keyedOtlpJSONResult[1] = keyedOtlpJSON2

keyedMessageKey := []sarama.Encoder{sarama.ByteEncoder("0102030405060708090a0b0c0d0e0f10"), sarama.ByteEncoder("1112131415161718191a1b1c1d1e1f20")}

unkeyedZipkinJSON := []any{
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"id": "0001020304050607",
Expand All @@ -135,35 +251,111 @@ func TestOTLPTracesJsonMarshaling(t *testing.T) {
"kind": string(zipkin.Server),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"id": "0f10111213141500",
"parentId": "0001020304050607",
"name": "bar",
"timestamp": float64(time.Second.Microseconds()),
"duration": float64(time.Second.Microseconds()),
"kind": string(zipkin.Client),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
map[string]any{
"traceId": "1112131415161718191a1b1c1d1e1f20",
"id": "161718191a1b1c00",
"parentId": "1d1e1f2021222324",
"name": "baz",
"timestamp": float64(time.Second.Microseconds()),
"duration": float64(time.Second.Microseconds()),
"kind": string(zipkin.Server),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
}

unkeyedZipkinJSONResult := make([]any, 1)
unkeyedZipkinJSONResult[0] = unkeyedZipkinJSON

keyedZipkinJSON1 := []any{
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"id": "0001020304050607",
"parentId": "08090a0b0c0d0e00",
"name": "foo",
"timestamp": float64(time.Second.Microseconds()),
"duration": float64(time.Second.Microseconds()),
"kind": string(zipkin.Server),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
map[string]any{
"traceId": "0102030405060708090a0b0c0d0e0f10",
"id": "0f10111213141500",
"parentId": "0001020304050607",
"name": "bar",
"timestamp": float64(time.Second.Microseconds()),
"duration": float64(time.Second.Microseconds()),
"kind": string(zipkin.Client),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
}

keyedZipkinJSON2 := []any{
map[string]any{
"traceId": "1112131415161718191a1b1c1d1e1f20",
"id": "161718191a1b1c00",
"parentId": "1d1e1f2021222324",
"name": "baz",
"timestamp": float64(time.Second.Microseconds()),
"duration": float64(time.Second.Microseconds()),
"kind": string(zipkin.Server),
"localEndpoint": map[string]any{"serviceName": "otlpresourcenoservicename"},
},
}

keyedZipkinJSONResult := make([]any, 2)
keyedZipkinJSONResult[0] = keyedZipkinJSON1
keyedZipkinJSONResult[1] = keyedZipkinJSON2

tests := []struct {
encoding string
expectedJSON any
unmarshaled any
encoding string
keyed bool
numExpectedMessages int
expectedJSON []any
expectedMessageKey []sarama.Encoder
unmarshaled any
}{
{encoding: "otlp_json", expectedJSON: otlpJSON, unmarshaled: map[string]any{}},
{encoding: "zipkin_json", expectedJSON: zipkinJSON, unmarshaled: []map[string]any{}},
{encoding: "otlp_json", numExpectedMessages: 1, expectedJSON: unkeyedOtlpJSONResult, expectedMessageKey: unkeyedMessageKey, unmarshaled: map[string]any{}},
{encoding: "otlp_json", keyed: true, numExpectedMessages: 2, expectedJSON: keyedOtlpJSONResult, expectedMessageKey: keyedMessageKey, unmarshaled: map[string]any{}},
{encoding: "zipkin_json", numExpectedMessages: 1, expectedJSON: unkeyedZipkinJSONResult, expectedMessageKey: unkeyedMessageKey, unmarshaled: []map[string]any{}},
{encoding: "zipkin_json", keyed: true, numExpectedMessages: 2, expectedJSON: keyedZipkinJSONResult, expectedMessageKey: keyedMessageKey, unmarshaled: []map[string]any{}},
}

for _, test := range tests {

marshaler, ok := tracesMarshalers()[test.encoding]
require.True(t, ok, fmt.Sprintf("Must have %s marshaller", test.encoding))

if test.keyed {
keyableMarshaler, ok := marshaler.(KeyableTracesMarshaler)
require.True(t, ok, "Must be a KeyableTracesMarshaler")
keyableMarshaler.Key()
}

msg, err := marshaler.Marshal(traces, t.Name())
require.NoError(t, err, "Must have marshaled the data without error")
require.Len(t, msg, 1, "Must have one entry in the message")

data, err := msg[0].Value.Encode()
require.NoError(t, err, "Must not error when encoding value")
require.NotNil(t, data, "Must have valid data to test")
require.Len(t, msg, test.numExpectedMessages, "Expected number of messages in the message")

unmarshaled := test.unmarshaled
err = json.Unmarshal(data, &unmarshaled)
require.NoError(t, err, "Must not error marshaling expected data")
for idx, singleMsg := range msg {
data, err := singleMsg.Value.Encode()
require.NoError(t, err, "Must not error when encoding value")
require.NotNil(t, data, "Must have valid data to test")

assert.Equal(t, test.expectedJSON, unmarshaled, "Must match the expected value")
unmarshaled := test.unmarshaled
err = json.Unmarshal(data, &unmarshaled)
require.NoError(t, err, "Must not error marshaling expected data")

assert.Equal(t, test.expectedJSON[idx], unmarshaled, "Must match the expected value")
assert.Equal(t, test.expectedMessageKey[idx], singleMsg.Key)
}
}
}
Loading