Skip to content

Commit

Permalink
Optimize wildcard matchers for .* and .+ (thanos-io#8131)
Browse files Browse the repository at this point in the history
* optimize wildcard matchers for .* and .+

Signed-off-by: yeya24 <benye@amazon.com>

* add changelog

Signed-off-by: yeya24 <benye@amazon.com>

---------

Signed-off-by: yeya24 <benye@amazon.com>
  • Loading branch information
yeya24 authored Mar 3, 2025
1 parent 4ba7d59 commit c69f112
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#8000](https://github.com/thanos-io/thanos/pull/8000) Query: Bump promql-engine, pass partial response through options
- [#7353](https://github.com/thanos-io/thanos/pull/7353) [#8045](https://github.com/thanos-io/thanos/pull/8045) Receiver/StoreGateway: Add `--matcher-cache-size` option to enable caching for regex matchers in series calls.
- [#8017](https://github.com/thanos-io/thanos/pull/8017) Store Gateway: Use native histogram for binary reader load and download duration and fixed download duration metric. #8017
- [#8131](https://github.com/thanos-io/thanos/pull/8131) Store Gateway: Optimize regex matchers for .* and .+. #8131

### Changed

Expand Down
21 changes: 21 additions & 0 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -2958,6 +2958,16 @@ func matchersToPostingGroups(ctx context.Context, lvalsFn func(name string) ([]s

// NOTE: Derived from tsdb.postingsForMatcher. index.Merge is equivalent to map duplication.
func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, error), m *labels.Matcher) (*postingGroup, []string, error) {
// .* regexp matches any string.
if m.Type == labels.MatchRegexp && m.Value == ".*" {
return newPostingGroup(true, m.Name, nil, nil), nil, nil
}

// .* not regexp doesn't match any string.
if m.Type == labels.MatchNotRegexp && m.Value == ".*" {
return newPostingGroup(false, m.Name, nil, nil), nil, nil
}

// If the matcher selects an empty value, it selects all the series which don't
// have the label name set too. See: https://github.com/prometheus/prometheus/issues/3575
// and https://github.com/prometheus/prometheus/pull/3578#issuecomment-351653555.
Expand Down Expand Up @@ -2991,6 +3001,11 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er
return newPostingGroup(true, m.Name, nil, vals), vals, nil
}

// .+ regexp matches any non-empty string: get postings for all label values and remove them.
if m.Type == labels.MatchNotRegexp && m.Value == ".+" {
return newPostingGroup(true, m.Name, nil, vals), vals, nil
}

for i, val := range vals {
if (i+1)%checkContextEveryNIterations == 0 && ctx.Err() != nil {
return nil, nil, ctx.Err()
Expand All @@ -3002,6 +3017,7 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er

return newPostingGroup(true, m.Name, nil, toRemove), vals, nil
}

if m.Type == labels.MatchRegexp {
if vals := m.SetMatches(); len(vals) > 0 {
sort.Strings(vals)
Expand All @@ -3025,6 +3041,11 @@ func toPostingGroup(ctx context.Context, lvalsFn func(name string) ([]string, er
return newPostingGroup(false, m.Name, vals, nil), vals, nil
}

// .+ regexp matches any non-empty string: get postings for all label values.
if m.Type == labels.MatchRegexp && m.Value == ".+" {
return newPostingGroup(false, m.Name, vals, nil), vals, nil
}

var toAdd []string
for i, val := range vals {
if (i+1)%checkContextEveryNIterations == 0 && ctx.Err() != nil {
Expand Down
56 changes: 56 additions & 0 deletions pkg/store/bucket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3492,6 +3492,62 @@ func TestPostingGroupMerge(t *testing.T) {
}
}

func TestToPostingGroup(t *testing.T) {
ctx := context.Background()
for _, tc := range []struct {
name string
vals []string
matcher *labels.Matcher
addAll bool
addKeys []string
removeKeys []string
numberLabelValueCalls int
}{
{
name: "regexp .*",
matcher: labels.MustNewMatcher(labels.MatchRegexp, labels.MetricName, ".*"),
addAll: true,
numberLabelValueCalls: 0,
},
{
name: "not regexp .*",
matcher: labels.MustNewMatcher(labels.MatchNotRegexp, labels.MetricName, ".*"),
addAll: false,
numberLabelValueCalls: 0,
},
{
name: "regexp .+",
matcher: labels.MustNewMatcher(labels.MatchRegexp, labels.MetricName, ".+"),
addAll: false,
addKeys: []string{"foo"},
vals: []string{"foo"},
numberLabelValueCalls: 1,
},
{
name: "not regexp .+",
matcher: labels.MustNewMatcher(labels.MatchNotRegexp, labels.MetricName, ".+"),
addAll: true,
removeKeys: []string{"foo"},
vals: []string{"foo"},
numberLabelValueCalls: 1,
},
} {
t.Run(tc.name, func(t *testing.T) {
calls := 0
lvalsFn := func(name string) ([]string, error) {
calls++
return tc.vals, nil
}
pg, _, err := toPostingGroup(ctx, lvalsFn, tc.matcher)
testutil.Ok(t, err)
testutil.Equals(t, tc.addAll, pg.addAll)
testutil.Equals(t, tc.addKeys, pg.addKeys)
testutil.Equals(t, tc.removeKeys, pg.removeKeys)
testutil.Equals(t, tc.numberLabelValueCalls, calls)
})
}
}

// TestExpandedPostings is a test whether there is a race between multiple ExpandPostings() calls.
func TestExpandedPostingsRace(t *testing.T) {
t.Parallel()
Expand Down

0 comments on commit c69f112

Please sign in to comment.