From 4b00ee25ad1e24afce8da590721f292aff0565b3 Mon Sep 17 00:00:00 2001 From: Spade A <71589810+SpadeA-Tang@users.noreply.github.com> Date: Mon, 16 May 2022 13:26:37 +0800 Subject: [PATCH] copr: Split more accurately when buckey keys are not accurate. (#34290) close pingcap/tidb#34287 --- go.mod | 2 +- go.sum | 4 +- store/copr/coprocessor_test.go | 62 ++++++++++++++++++++++++--- store/copr/region_cache.go | 77 ++++++++++++++-------------------- 4 files changed, 91 insertions(+), 54 deletions(-) diff --git a/go.mod b/go.mod index b9dda9e44f2ef..2bf497f83f9e8 100644 --- a/go.mod +++ b/go.mod @@ -64,7 +64,7 @@ require ( github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.7.2-0.20220504104629-106ec21d14df github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 - github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869 + github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101 github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710 github.com/twmb/murmur3 v1.1.3 github.com/uber/jaeger-client-go v2.22.1+incompatible diff --git a/go.sum b/go.sum index 3ce6f9be0dfec..f8c17dafc79ac 100644 --- a/go.sum +++ b/go.sum @@ -751,8 +751,8 @@ github.com/stretchr/testify v1.7.2-0.20220504104629-106ec21d14df/go.mod h1:6Fq8o github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw= github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 h1:mbAskLJ0oJfDRtkanvQPiooDH8HvJ2FBh+iKT/OmiQQ= github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU= -github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869 h1:yOApqwZVzC1ne1v9Qc1OQtCe5Wtm1Vv6xqaOs6/y4NQ= -github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869/go.mod h1:0scaG+seu7L56apm+Gjz9vckyO7ABIzM6T7n00mrIXs= +github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101 h1:LEdgY/R6ir0V7mSuNW2m7ZgS0hbmBhsnnrLOxoBoC74= +github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101/go.mod h1:0scaG+seu7L56apm+Gjz9vckyO7ABIzM6T7n00mrIXs= github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710 h1:jxgmKOscXSjaFEKQGRyY5qOpK8hLqxs2irb/uDJMtwk= github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710/go.mod h1:AtvppPwkiyUgQlR1W9qSqfTB+OsOIu19jDCOxOsPkmU= github.com/tklauser/go-sysconf v0.3.9 h1:JeUVdAOWhhxVcU6Eqr/ATFHgXk/mmiItdKeJPev3vTo= diff --git a/store/copr/coprocessor_test.go b/store/copr/coprocessor_test.go index 84eb95cb73c58..7f2efa0e2db71 100644 --- a/store/copr/coprocessor_test.go +++ b/store/copr/coprocessor_test.go @@ -258,15 +258,43 @@ func TestBuildTasksByBuckets(t *testing.T) { taskEqual(t, task, regionIDs[0], regionIDs[0], expectedTaskRanges[i]...) } + // cross several buckets ranges + // region: n ----------------------------- x + // buckets: n -- q -- r -- t -- u -- v -- x + // ranges: n--o p--q s ------------ w + // tasks: n--o p--q + // s--t + // t -- u + // u -- v + // v--w + expectedTaskRanges = [][]string{ + {"n", "o", "p", "q"}, + {"s", "t"}, + {"t", "u"}, + {"u", "v"}, + {"v", "w"}, + } + cluster.SplitRegionBuckets(regionIDs[1], [][]byte{{'n'}, {'q'}, {'r'}, {'t'}, {'u'}, {'v'}, {'x'}}, regionIDs[1]) + cache = NewRegionCache(tikv.NewRegionCache(pdCli)) + defer cache.Close() + tasks, err = buildCopTasks(bo, cache, buildCopRanges("n", "o", "p", "q", "s", "w"), req, nil) + require.NoError(t, err) + require.Len(t, tasks, len(expectedTaskRanges)) + for i, task := range tasks { + taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...) + } + // out of range buckets // region: n------------------x - // buckets: q---s---u - // ranges: n-o p----s t---v w-x - // tasks: n-o p----s(it can be improved, i.e., n-o p-q, q-s) - // t-u - // u-v w-x + // buckets: q---s---u + // ranges: n-o p ----s t---v w-x + // tasks: n-o p-q + // q--s + // t-u + // u-v w-x expectedTaskRanges = [][]string{ - {"n", "o", "p", "s"}, + {"n", "o", "p", "q"}, + {"q", "s"}, {"t", "u"}, {"u", "v", "w", "x"}, } @@ -299,6 +327,28 @@ func TestBuildTasksByBuckets(t *testing.T) { for i, task := range tasks { taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...) } + + // cover the whole region + // region: n--------------x + // buckets: n -- q -- r -- x + // ranges: n--------------x + // tasks: o -- q + // q -- r + // r -- x + expectedTaskRanges = [][]string{ + {"n", "q"}, + {"q", "r"}, + {"r", "x"}, + } + cluster.SplitRegionBuckets(regionIDs[1], [][]byte{{'n'}, {'q'}, {'r'}, {'x'}}, regionIDs[1]) + cache = NewRegionCache(tikv.NewRegionCache(pdCli)) + defer cache.Close() + tasks, err = buildCopTasks(bo, cache, buildCopRanges("n", "x"), req, nil) + require.NoError(t, err) + require.Len(t, tasks, len(expectedTaskRanges)) + for i, task := range tasks { + taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...) + } } func TestSplitRegionRanges(t *testing.T) { diff --git a/store/copr/region_cache.go b/store/copr/region_cache.go index a28f282cd8686..4aa970aa458a4 100644 --- a/store/copr/region_cache.go +++ b/store/copr/region_cache.go @@ -77,56 +77,43 @@ func (l *LocationKeyRanges) splitKeyRangesByBuckets() []*LocationKeyRanges { loc := l.Location res := []*LocationKeyRanges{} for ranges.Len() > 0 { + // ranges must be in loc.region, so the bucket returned by loc.LocateBucket is guaranteed to be not nil bucket := loc.LocateBucket(ranges.At(0).StartKey) - if bucket == nil { - // TODO(youjiali1995): if it's overlapped with some buckets, it can be splitted. - // - // Buckets information may not be up-to-date and accurate. - // Find all ranges that can't be located in a bucket and make it one task. - i := 1 - for ; i < ranges.Len(); i++ { - if loc.LocateBucket(ranges.At(i).StartKey) != nil { - break - } - } - res = append(res, &LocationKeyRanges{l.Location, ranges.Slice(0, i)}) - ranges = ranges.Slice(i, ranges.Len()) - } else { - // Iterate to the first range that is not complete in the bucket. - var r kv.KeyRange - var i int - for ; i < ranges.Len(); i++ { - r = ranges.At(i) - if !(bucket.Contains(r.EndKey) || bytes.Equal(bucket.EndKey, r.EndKey)) { - break - } - } - // All rest ranges belong to the same bucket. - if i == ranges.Len() { - res = append(res, &LocationKeyRanges{l.Location, ranges}) + + // Iterate to the first range that is not complete in the bucket. + var r kv.KeyRange + var i int + for ; i < ranges.Len(); i++ { + r = ranges.At(i) + if !(bucket.Contains(r.EndKey) || bytes.Equal(bucket.EndKey, r.EndKey)) { break } + } + // All rest ranges belong to the same bucket. + if i == ranges.Len() { + res = append(res, &LocationKeyRanges{l.Location, ranges}) + break + } + + if bucket.Contains(r.StartKey) { + // Part of r is not in the bucket. We need to split it. + taskRanges := ranges.Slice(0, i) + taskRanges.last = &kv.KeyRange{ + StartKey: r.StartKey, + EndKey: bucket.EndKey, + } + res = append(res, &LocationKeyRanges{l.Location, taskRanges}) - if bucket.Contains(r.StartKey) { - // Part of r is not in the bucket. We need to split it. - taskRanges := ranges.Slice(0, i) - taskRanges.last = &kv.KeyRange{ - StartKey: r.StartKey, - EndKey: bucket.EndKey, - } - res = append(res, &LocationKeyRanges{l.Location, taskRanges}) - - ranges = ranges.Slice(i+1, ranges.Len()) - ranges.first = &kv.KeyRange{ - StartKey: bucket.EndKey, - EndKey: r.EndKey, - } - } else { - // ranges[i] is not in the bucket. - taskRanges := ranges.Slice(0, i) - res = append(res, &LocationKeyRanges{l.Location, taskRanges}) - ranges = ranges.Slice(i, ranges.Len()) + ranges = ranges.Slice(i+1, ranges.Len()) + ranges.first = &kv.KeyRange{ + StartKey: bucket.EndKey, + EndKey: r.EndKey, } + } else { + // ranges[i] is not in the bucket. + taskRanges := ranges.Slice(0, i) + res = append(res, &LocationKeyRanges{l.Location, taskRanges}) + ranges = ranges.Slice(i, ranges.Len()) } } return res