Skip to content

Commit

Permalink
fix: Dedup pattern tokens on output (#13534)
Browse files Browse the repository at this point in the history
  • Loading branch information
benclive authored Jul 17, 2024
1 parent 5fa9c4b commit e23598d
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 10 deletions.
14 changes: 5 additions & 9 deletions pkg/pattern/drain/drain.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ func New(config *Config, format string, metrics *Metrics) *Drain {
default:
tokenizer = newPunctuationTokenizer()
}

d.idToCluster = createLogClusterCache(config.MaxClusters, func(int, *LogCluster) {
if metrics != nil {
if d.pruning {
Expand All @@ -170,7 +171,10 @@ func New(config *Config, format string, metrics *Metrics) *Drain {
limiter.Evict()
}
})
d.tokenizer = tokenizer
d.tokenizer = &DedupingTokenizer{
LineTokenizer: tokenizer,
dedupParam: config.ParamString,
}
d.limiter = limiter
return d
}
Expand Down Expand Up @@ -297,14 +301,6 @@ func deduplicatePlaceholders(line string, placeholder string) string {
return unsafeString(builder)
}

func (d *Drain) PatternString(c *LogCluster) string {
s := deduplicatePlaceholders(d.tokenizer.Join(c.Tokens, c.TokenState), d.config.ParamString)
if s == d.config.ParamString {
return ""
}
return s
}

func (d *Drain) Prune() {
d.pruneTree(d.rootNode)
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/pattern/drain/drain_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) {
`I0507 <_> 1 defaultevictor.go:202] "Pod fails the following checks" pod="<_>" checks="[pod is a mirror pod, pod is a static pod, pod has system critical priority, pod has higher priority than specified priority class threshold, pod has local storage and descheduler is not configured with evictLocalStoragePods]"`,
`I0507 <_> 1 defaultevictor.go:202] "Pod fails the following checks" pod="<_>" checks="pod has local storage and descheduler is not configured with evictLocalStoragePods"`,
`I0507 <_> 1 defaultevictor.go:202] "Pod fails the following checks" pod="<_>" checks="pod is a DaemonSet pod"`,
`I0507 <_> 1 node.go:157] "Pod does not fit on any other node" pod:="<_>" node:="<_>" error:="[pod node selector does not match the node label, <_> <_><_> <_> <_><_> <_> <_>]"`,
`I0507 <_> 1 node.go:157] "Pod does not fit on any other node" pod:="<_>" node:="<_>" error:="[pod node selector does not match the node label, <_> <_> <_> <_> <_> <_>]"`,
`I0507 <_> 1 node.go:157] "Pod does not fit on any other node" pod:="<_>" node:="<_>" error:="[pod node selector does not match the node label, insufficient <_>, insufficient <_>]"`,
`I0507 <_> 1 node.go:157] "Pod does not fit on any other node" pod:="<_>" node:="<_>" error:="[pod node selector does not match the node label, insufficient <_>]"`,
`I0507 <_> 1 node.go:157] "Pod does not fit on any other node" pod:="<_>" node:="<_>" error:="[pod node selector does not match the node label, pod does not tolerate taints on the node, insufficient <_>, insufficient <_>]"`,
Expand Down
9 changes: 9 additions & 0 deletions pkg/pattern/drain/line_tokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,12 @@ func isVariableField(key []byte) bool {
bytes.EqualFold(key, []byte("time")) ||
bytes.EqualFold(key, []byte("timestamp"))
}

type DedupingTokenizer struct {
LineTokenizer
dedupParam string
}

func (d DedupingTokenizer) Join(tokens []string, state interface{}) string {
return deduplicatePlaceholders(d.LineTokenizer.Join(tokens, state), d.dedupParam)
}

0 comments on commit e23598d

Please sign in to comment.