-
Notifications
You must be signed in to change notification settings - Fork 2.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[feat] Deduplicate spans based on their hashcode (#6009)
## Which problem is this PR solving? - Resolves #6001 ## Description of the changes - Rename the Zipkin-legacy span "deduper" to a less confusing name - Add a real span deduper that deduplicates based on span hashcodes - Sort tags in spans before we attempt deduplication, so hashcode is deterministic ## How was this change tested? - Unit tested ## Checklist - [x] I have read https://github.com/jaegertracing/jaeger/blob/master/CONTRIBUTING_GUIDELINES.md - [x] I have signed all commits - [x] I have added unit tests for the new functionality - [x] I have run lint and test steps successfully - for `jaeger`: `make lint test` - for `jaeger-ui`: `yarn lint` and `yarn test` --------- Signed-off-by: Chris Danis <cdanis@wikimedia.org> Signed-off-by: Chris Danis <cdanis@gmail.com> Co-authored-by: Yuri Shkuro <yurishkuro@users.noreply.github.com>
- Loading branch information
1 parent
d243452
commit 7c6ed87
Showing
10 changed files
with
355 additions
and
110 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
// Copyright (c) 2019 The Jaeger Authors. | ||
// Copyright (c) 2017 Uber Technologies, Inc. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package adjuster | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
"github.com/stretchr/testify/require" | ||
|
||
"github.com/jaegertracing/jaeger/model" | ||
) | ||
|
||
func TestSortTagsAndLogFieldsDoesSortFields(t *testing.T) { | ||
testCases := []struct { | ||
fields model.KeyValues | ||
expected model.KeyValues | ||
}{ | ||
{ | ||
fields: model.KeyValues{ | ||
model.String("event", "some event"), // event already in the first position, and no other fields | ||
}, | ||
expected: model.KeyValues{ | ||
model.String("event", "some event"), | ||
}, | ||
}, | ||
{ | ||
fields: model.KeyValues{ | ||
model.Int64("event", 42), // non-string event field | ||
model.Int64("a", 41), | ||
}, | ||
expected: model.KeyValues{ | ||
model.Int64("a", 41), | ||
model.Int64("event", 42), | ||
}, | ||
}, | ||
{ | ||
fields: model.KeyValues{ | ||
model.String("nonsense", "42"), // no event field | ||
}, | ||
expected: model.KeyValues{ | ||
model.String("nonsense", "42"), | ||
}, | ||
}, | ||
{ | ||
fields: model.KeyValues{ | ||
model.String("event", "some event"), | ||
model.Int64("a", 41), | ||
}, | ||
expected: model.KeyValues{ | ||
model.String("event", "some event"), | ||
model.Int64("a", 41), | ||
}, | ||
}, | ||
{ | ||
fields: model.KeyValues{ | ||
model.Int64("x", 1), | ||
model.Int64("a", 2), | ||
model.String("event", "some event"), | ||
}, | ||
expected: model.KeyValues{ | ||
model.String("event", "some event"), | ||
model.Int64("a", 2), | ||
model.Int64("x", 1), | ||
}, | ||
}, | ||
} | ||
for _, testCase := range testCases { | ||
trace := &model.Trace{ | ||
Spans: []*model.Span{ | ||
{ | ||
Logs: []model.Log{ | ||
{ | ||
Fields: testCase.fields, | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
trace, err := SortTagsAndLogFields().Adjust(trace) | ||
require.NoError(t, err) | ||
assert.Equal(t, testCase.expected, model.KeyValues(trace.Spans[0].Logs[0].Fields)) | ||
} | ||
} | ||
|
||
func TestSortTagsAndLogFieldsDoesSortTags(t *testing.T) { | ||
testCases := []model.KeyValues{ | ||
{ | ||
model.String("http.method", "GET"), | ||
model.String("http.url", "http://wikipedia.org"), | ||
model.Int64("http.status_code", 200), | ||
model.String("guid:x-request-id", "f61defd2-7a77-11ef-b54f-4fbb67a6d181"), | ||
}, | ||
{ | ||
// empty | ||
}, | ||
{ | ||
model.String("argv", "mkfs.ext4 /dev/sda1"), | ||
}, | ||
} | ||
for _, tags := range testCases { | ||
trace := &model.Trace{ | ||
Spans: []*model.Span{ | ||
{ | ||
Tags: tags, | ||
}, | ||
}, | ||
} | ||
sorted, err := SortTagsAndLogFields().Adjust(trace) | ||
require.NoError(t, err) | ||
assert.ElementsMatch(t, tags, sorted.Spans[0].Tags) | ||
adjustedKeys := make([]string, len(sorted.Spans[0].Tags)) | ||
for i, kv := range sorted.Spans[0].Tags { | ||
adjustedKeys[i] = kv.Key | ||
} | ||
assert.IsIncreasing(t, adjustedKeys) | ||
} | ||
} | ||
|
||
func TestSortTagsAndLogFieldsDoesSortProcessTags(t *testing.T) { | ||
trace := &model.Trace{ | ||
Spans: []*model.Span{ | ||
{ | ||
Process: &model.Process{ | ||
Tags: model.KeyValues{ | ||
model.String("process.argv", "mkfs.ext4 /dev/sda1"), | ||
model.Int64("process.pid", 42), | ||
model.Int64("process.uid", 0), | ||
model.String("k8s.node.roles", "control-plane,etcd,master"), | ||
}, | ||
}, | ||
}, | ||
}, | ||
} | ||
sorted, err := SortTagsAndLogFields().Adjust(trace) | ||
require.NoError(t, err) | ||
assert.ElementsMatch(t, trace.Spans[0].Process.Tags, sorted.Spans[0].Process.Tags) | ||
adjustedKeys := make([]string, len(sorted.Spans[0].Process.Tags)) | ||
for i, kv := range sorted.Spans[0].Process.Tags { | ||
adjustedKeys[i] = kv.Key | ||
} | ||
assert.IsIncreasing(t, adjustedKeys) | ||
} | ||
|
||
func TestSortTagsAndLogFieldsHandlesNilProcessTags(t *testing.T) { | ||
trace := &model.Trace{ | ||
Spans: []*model.Span{ | ||
{}, | ||
}, | ||
} | ||
_, err := SortTagsAndLogFields().Adjust(trace) | ||
require.NoError(t, err) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package adjuster | ||
|
||
import ( | ||
"github.com/jaegertracing/jaeger/model" | ||
) | ||
|
||
// DedupeBySpanHash returns an adjuster that removes all but one span with the same hashcode. | ||
// This is useful for when spans are duplicated in archival storage, as happens with | ||
// ElasticSearch archival. | ||
func DedupeBySpanHash() Adjuster { | ||
return Func(func(trace *model.Trace) (*model.Trace, error) { | ||
deduper := &spanHashDeduper{trace: trace} | ||
deduper.groupSpansByHash() | ||
return deduper.trace, nil | ||
}) | ||
} | ||
|
||
type spanHashDeduper struct { | ||
trace *model.Trace | ||
spansByHash map[uint64]*model.Span | ||
} | ||
|
||
func (d *spanHashDeduper) groupSpansByHash() { | ||
spansByHash := make(map[uint64]*model.Span) | ||
for _, span := range d.trace.Spans { | ||
hash, _ := model.HashCode(span) | ||
if _, ok := spansByHash[hash]; !ok { | ||
spansByHash[hash] = span | ||
} | ||
} | ||
d.spansByHash = spansByHash | ||
|
||
d.trace.Spans = nil | ||
for _, span := range d.spansByHash { | ||
d.trace.Spans = append(d.trace.Spans, span) | ||
} | ||
} |
Oops, something went wrong.