diff --git a/collect/collect.go b/collect/collect.go index 59a5f85d53..4faad8681e 100644 --- a/collect/collect.go +++ b/collect/collect.go @@ -547,7 +547,7 @@ func (i *InMemCollector) processSpan(sp *types.Span) { } // if this is a root span, say so and send the trace - if i.isRootSpan(sp) { + if sp.IsRoot { markTraceForSending = true trace.RootSpan = sp } @@ -673,8 +673,7 @@ func (i *InMemCollector) dealWithSentTrace(ctx context.Context, tr cache.TraceSe i.Logger.Debug().WithField("trace_id", sp.TraceID).Logf("Sending span because of previous decision to send trace") mergeTraceAndSpanSampleRates(sp, tr.Rate(), isDryRun) // if this span is a late root span, possibly update it with our current span count - isRootSpan := i.isRootSpan(sp) - if isRootSpan { + if sp.IsRoot { if i.Config.GetAddCountsToRoot() { sp.Data["meta.span_event_count"] = int64(tr.SpanEventCount()) sp.Data["meta.span_link_count"] = int64(tr.SpanLinkCount()) @@ -684,7 +683,7 @@ func (i *InMemCollector) dealWithSentTrace(ctx context.Context, tr cache.TraceSe sp.Data["meta.span_count"] = int64(tr.DescendantCount()) } } - otelutil.AddSpanField(span, "is_root_span", isRootSpan) + otelutil.AddSpanField(span, "is_root_span", sp.IsRoot) i.Metrics.Increment(TraceSendLateSpan) i.addAdditionalAttributes(sp) i.Transmission.EnqueueSpan(sp) @@ -720,21 +719,6 @@ func mergeTraceAndSpanSampleRates(sp *types.Span, traceSampleRate uint, dryRunMo } } -func (i *InMemCollector) isRootSpan(sp *types.Span) bool { - // log event should never be considered a root span, check for that first - if signalType := sp.Data["meta.signal_type"]; signalType == "log" { - return false - } - // check if the event has a parent id using the configured parent id field names - for _, parentIdFieldName := range i.Config.GetParentIdFieldNames() { - parentId := sp.Data[parentIdFieldName] - if _, ok := parentId.(string); ok && parentId != "" { - return false - } - } - return true -} - func (i *InMemCollector) send(trace *types.Trace, sendReason string) { if trace.Sent { // someone else already sent this so we shouldn't also send it. @@ -830,7 +814,7 @@ func (i *InMemCollector) send(trace *types.Trace, sendReason string) { // update the root span (if we have one, which we might not if the trace timed out) // with the final total as of our send time - if i.isRootSpan(sp) { + if sp.IsRoot { if i.Config.GetAddCountsToRoot() { sp.Data["meta.span_event_count"] = int64(trace.SpanEventCount()) sp.Data["meta.span_link_count"] = int64(trace.SpanLinkCount()) diff --git a/collect/collect_test.go b/collect/collect_test.go index 68ee4020f9..fe78fb602d 100644 --- a/collect/collect_test.go +++ b/collect/collect_test.go @@ -120,6 +120,7 @@ func TestAddRootSpan(t *testing.T) { Dataset: "aoeu", APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(span) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -141,6 +142,7 @@ func TestAddRootSpan(t *testing.T) { Dataset: "aoeu", APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpanFromPeer(span) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -233,6 +235,7 @@ func TestOriginalSampleRateIsNotedInMetaField(t *testing.T) { SampleRate: 0, // no upstream sampling Data: make(map[string]interface{}), }, + IsRoot: true, }) require.NoError(t, err, "must be able to add the span") @@ -293,6 +296,7 @@ func TestTransmittedSpansShouldHaveASampleRateOfAtLeastOne(t *testing.T) { SampleRate: 0, // This should get lifted to 1 Data: make(map[string]interface{}), }, + IsRoot: true, } coll.AddSpan(span) @@ -378,6 +382,7 @@ func TestAddSpan(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 5) @@ -452,6 +457,7 @@ func TestDryRunMode(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(span) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -488,6 +494,7 @@ func TestDryRunMode(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpanFromPeer(span) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -511,6 +518,7 @@ func TestDryRunMode(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(span) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -642,6 +650,7 @@ func TestSampleConfigReload(t *testing.T) { Dataset: dataset, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(span) @@ -669,6 +678,7 @@ func TestSampleConfigReload(t *testing.T) { Dataset: dataset, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(span) @@ -930,6 +940,7 @@ func TestAddCountsToRoot(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -1021,6 +1032,7 @@ func TestLateRootGetsCounts(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -1101,6 +1113,7 @@ func TestAddSpanCount(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -1176,6 +1189,7 @@ func TestLateRootGetsSpanCount(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -1247,6 +1261,7 @@ func TestLateSpanNotDecorated(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) @@ -1317,6 +1332,7 @@ func TestAddAdditionalAttributes(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 5) @@ -1393,6 +1409,7 @@ func TestStressReliefSampleRate(t *testing.T) { APIKey: legacyAPIKey, SampleRate: 10, }, + IsRoot: true, } processed2, kept2 := coll.ProcessSpanImmediately(rootSpan) @@ -1473,6 +1490,7 @@ func TestStressReliefDecorateHostname(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) time.Sleep(conf.GetTracesConfig().GetSendTickerValue() * 2) @@ -1591,6 +1609,7 @@ func TestSpanWithRuleReasons(t *testing.T) { }, APIKey: legacyAPIKey, }, + IsRoot: true, } if i == 0 { rootSpan.Data["test"] = int64(1) @@ -1624,72 +1643,6 @@ func TestSpanWithRuleReasons(t *testing.T) { transmission.Mux.RUnlock() } -func TestIsRootSpan(t *testing.T) { - tesCases := []struct { - name string - span *types.Span - expected bool - }{ - { - name: "root span - no parent id", - span: &types.Span{ - Event: types.Event{ - Data: map[string]interface{}{}, - }, - }, - expected: true, - }, - { - name: "root span - empty parent id", - span: &types.Span{ - Event: types.Event{ - Data: map[string]interface{}{ - "trace.parent_id": "", - }, - }, - }, - expected: true, - }, - { - name: "non-root span - parent id", - span: &types.Span{ - Event: types.Event{ - Data: map[string]interface{}{ - "trace.parent_id": "some-id", - }, - }, - }, - expected: false, - }, - { - name: "non-root span - no parent id but has signal_type of log", - span: &types.Span{ - Event: types.Event{ - Data: map[string]interface{}{ - "meta.signal_type": "log", - }, - }, - }, - expected: false, - }, - } - - collector := &InMemCollector{ - Config: &config.MockConfig{ - ParentIdFieldNames: []string{"trace.parent_id", "parentId"}, - GetCollectionConfigVal: config.CollectionConfig{ - ShutdownDelay: config.Duration(1 * time.Millisecond), - }, - }, - } - - for _, tc := range tesCases { - t.Run(tc.name, func(t *testing.T) { - assert.Equal(t, tc.expected, collector.isRootSpan(tc.span)) - }) - } -} - func TestRedistributeTraces(t *testing.T) { conf := &config.MockConfig{ GetTracesConfigVal: config.TracesConfig{ @@ -1942,6 +1895,7 @@ func TestBigTracesGoEarly(t *testing.T) { Data: map[string]interface{}{}, APIKey: legacyAPIKey, }, + IsRoot: true, } coll.AddSpan(rootSpan) diff --git a/types/event.go b/types/event.go index c6fc76039b..f80849e69d 100644 --- a/types/event.go +++ b/types/event.go @@ -2,6 +2,7 @@ package types import ( "context" + "slices" "time" huskyotlp "github.com/honeycombio/husky/otlp" @@ -102,6 +103,12 @@ func (t *Trace) GetSpans() []*Span { return t.spans } +func (t *Trace) RemoveDecisionSpans() { + t.spans = slices.DeleteFunc(t.spans, func(sp *Span) bool { + return sp.IsDecisionSpan() + }) +} + func (t *Trace) ID() string { return t.TraceID } @@ -172,8 +179,8 @@ func (t *Trace) GetSamplerKey() (string, bool) { env := "" for _, sp := range t.GetSpans() { - if sp.Event.Environment != "" { - env = sp.Event.Environment + if sp.Environment != "" { + env = sp.Environment break } } @@ -187,6 +194,43 @@ type Span struct { TraceID string DataSize int ArrivalTime time.Time + IsRoot bool +} + +// IsDecicionSpan returns true if the span is a decision span based on +// a flag set in the span's metadata. +func (sp *Span) IsDecisionSpan() bool { + if sp.Data == nil { + return false + } + v, ok := sp.Data["meta.refinery.min_span"] + if !ok { + return false + } + isDecisionSpan, ok := v.(bool) + if !ok { + return false + } + + return isDecisionSpan +} + +// ExtractDecisionContext returns a new Event that contains only the data that is +// relevant to the decision-making process. +func (sp *Span) ExtractDecisionContext() *Event { + decisionCtx := sp.Event + dataSize := sp.DataSize + if dataSize == 0 { + dataSize = sp.GetDataSize() + } + decisionCtx.Data = map[string]interface{}{ + "trace_id": sp.TraceID, + "meta.refinery.root": sp.IsRoot, + "meta.refinery.min_span": true, + "meta.annotation_type": sp.AnnotationType(), + "meta.refinery.span_data_size": dataSize, + } + return &decisionCtx } // GetDataSize computes the size of the Data element of the Span. @@ -194,6 +238,12 @@ type Span struct { // relative ordering, not absolute calculations. func (sp *Span) GetDataSize() int { total := 0 + + if sp.IsDecisionSpan() { + if v, ok := sp.Data["meta.refinery.span_data_size"]; ok { + return v.(int) + } + } // the data types we should be getting from JSON are: // float64, int64, bool, string, []byte for _, v := range sp.Data { diff --git a/types/event_test.go b/types/event_test.go index 0f949b4639..b1ac8881ac 100644 --- a/types/event_test.go +++ b/types/event_test.go @@ -4,6 +4,9 @@ import ( "strconv" "strings" "testing" + "time" + + "github.com/stretchr/testify/assert" ) func TestSpan_GetDataSize(t *testing.T) { @@ -65,6 +68,69 @@ func TestSpan_AnnotationType(t *testing.T) { } } +func TestSpan_ExtractDecisionContext(t *testing.T) { + ev := Event{ + APIHost: "test.api.com", + APIKey: "test-api-key", + Dataset: "test-dataset", + Environment: "test-environment", + SampleRate: 5, + Timestamp: time.Now(), + Data: map[string]interface{}{ + "test": "test", + "meta.annotation_type": "span_event", + }, + } + sp := &Span{ + Event: ev, + TraceID: "test-trace-id", + ArrivalTime: time.Now(), + IsRoot: true, + } + + got := sp.ExtractDecisionContext() + assert.Equal(t, ev.APIHost, got.APIHost) + assert.Equal(t, ev.APIKey, got.APIKey) + assert.Equal(t, ev.Dataset, got.Dataset) + assert.Equal(t, ev.Environment, got.Environment) + assert.Equal(t, ev.SampleRate, got.SampleRate) + assert.Equal(t, ev.Timestamp, got.Timestamp) + assert.Equal(t, map[string]interface{}{ + "trace_id": sp.TraceID, + "meta.refinery.root": true, + "meta.refinery.min_span": true, + "meta.annotation_type": SpanAnnotationTypeSpanEvent, + "meta.refinery.span_data_size": 14, + }, got.Data) +} + +func TestSpan_IsDecisionSpan(t *testing.T) { + tests := []struct { + name string + data map[string]any + want bool + }{ + {"nil meta", nil, false}, + {"no meta", map[string]any{}, false}, + {"no meta.refinery.min_span", map[string]any{"meta.annotation_type": "span_event"}, false}, + {"invalid min_span", map[string]any{"meta.annotation_type": "span_event", "meta.refinery.mi_span": true}, false}, + {"is decision span", map[string]any{"meta.annotation_type": "span_event", "meta.refinery.min_span": true}, true}, + {"is not decision span", map[string]any{"meta.annotation_type": "span_event", "meta.refinery.min_span": false}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sp := &Span{ + Event: Event{ + Data: tt.data, + }, + } + got := sp.IsDecisionSpan() + assert.Equal(t, tt.want, got) + }) + } +} + // These benchmarks were just to verify that the size calculation is acceptable // even on big spans. The P99 for normal (20-field) spans shows that it will take ~1 // microsecond (on an m1 laptop) but a 1000-field span (extremely rare!) will take