From 4b00ee25ad1e24afce8da590721f292aff0565b3 Mon Sep 17 00:00:00 2001
From: Spade  A <71589810+SpadeA-Tang@users.noreply.github.com>
Date: Mon, 16 May 2022 13:26:37 +0800
Subject: [PATCH] copr: Split more accurately when buckey keys are not
 accurate. (#34290)

close pingcap/tidb#34287
---
 go.mod                         |  2 +-
 go.sum                         |  4 +-
 store/copr/coprocessor_test.go | 62 ++++++++++++++++++++++++---
 store/copr/region_cache.go     | 77 ++++++++++++++--------------------
 4 files changed, 91 insertions(+), 54 deletions(-)

diff --git a/go.mod b/go.mod
index b9dda9e44f2ef..2bf497f83f9e8 100644
--- a/go.mod
+++ b/go.mod
@@ -64,7 +64,7 @@ require (
 	github.com/spf13/pflag v1.0.5
 	github.com/stretchr/testify v1.7.2-0.20220504104629-106ec21d14df
 	github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2
-	github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869
+	github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101
 	github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710
 	github.com/twmb/murmur3 v1.1.3
 	github.com/uber/jaeger-client-go v2.22.1+incompatible
diff --git a/go.sum b/go.sum
index 3ce6f9be0dfec..f8c17dafc79ac 100644
--- a/go.sum
+++ b/go.sum
@@ -751,8 +751,8 @@ github.com/stretchr/testify v1.7.2-0.20220504104629-106ec21d14df/go.mod h1:6Fq8o
 github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2 h1:mbAskLJ0oJfDRtkanvQPiooDH8HvJ2FBh+iKT/OmiQQ=
 github.com/tiancaiamao/appdash v0.0.0-20181126055449-889f96f722a2/go.mod h1:2PfKggNGDuadAa0LElHrByyrz4JPZ9fFx6Gs7nx7ZZU=
-github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869 h1:yOApqwZVzC1ne1v9Qc1OQtCe5Wtm1Vv6xqaOs6/y4NQ=
-github.com/tikv/client-go/v2 v2.0.1-0.20220510032238-ff5e35ac2869/go.mod h1:0scaG+seu7L56apm+Gjz9vckyO7ABIzM6T7n00mrIXs=
+github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101 h1:LEdgY/R6ir0V7mSuNW2m7ZgS0hbmBhsnnrLOxoBoC74=
+github.com/tikv/client-go/v2 v2.0.1-0.20220516035221-e007187e5101/go.mod h1:0scaG+seu7L56apm+Gjz9vckyO7ABIzM6T7n00mrIXs=
 github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710 h1:jxgmKOscXSjaFEKQGRyY5qOpK8hLqxs2irb/uDJMtwk=
 github.com/tikv/pd/client v0.0.0-20220307081149-841fa61e9710/go.mod h1:AtvppPwkiyUgQlR1W9qSqfTB+OsOIu19jDCOxOsPkmU=
 github.com/tklauser/go-sysconf v0.3.9 h1:JeUVdAOWhhxVcU6Eqr/ATFHgXk/mmiItdKeJPev3vTo=
diff --git a/store/copr/coprocessor_test.go b/store/copr/coprocessor_test.go
index 84eb95cb73c58..7f2efa0e2db71 100644
--- a/store/copr/coprocessor_test.go
+++ b/store/copr/coprocessor_test.go
@@ -258,15 +258,43 @@ func TestBuildTasksByBuckets(t *testing.T) {
 		taskEqual(t, task, regionIDs[0], regionIDs[0], expectedTaskRanges[i]...)
 	}
 
+	// cross several buckets ranges
+	// region:    n  -----------------------------  x
+	// buckets:   n   --   q -- r --  t -- u -- v -- x
+	// ranges:    n--o  p--q       s  ------------ w
+	// tasks:     n--o  p--q
+	//                             s--t
+	//								  t -- u
+	//									   u -- v
+	//											v--w
+	expectedTaskRanges = [][]string{
+		{"n", "o", "p", "q"},
+		{"s", "t"},
+		{"t", "u"},
+		{"u", "v"},
+		{"v", "w"},
+	}
+	cluster.SplitRegionBuckets(regionIDs[1], [][]byte{{'n'}, {'q'}, {'r'}, {'t'}, {'u'}, {'v'}, {'x'}}, regionIDs[1])
+	cache = NewRegionCache(tikv.NewRegionCache(pdCli))
+	defer cache.Close()
+	tasks, err = buildCopTasks(bo, cache, buildCopRanges("n", "o", "p", "q", "s", "w"), req, nil)
+	require.NoError(t, err)
+	require.Len(t, tasks, len(expectedTaskRanges))
+	for i, task := range tasks {
+		taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...)
+	}
+
 	// out of range buckets
 	// region:  n------------------x
-	// buckets:      q---s---u
-	// ranges:  n-o p----s t---v w-x
-	// tasks:   n-o p----s(it can be improved, i.e., n-o p-q, q-s)
-	//                     t-u
-	//                       u-v w-x
+	// buckets:       q---s---u
+	// ranges:  n-o p ----s t---v w-x
+	// tasks:   n-o p-q
+	//                 q--s
+	//                      t-u
+	//                        u-v w-x
 	expectedTaskRanges = [][]string{
-		{"n", "o", "p", "s"},
+		{"n", "o", "p", "q"},
+		{"q", "s"},
 		{"t", "u"},
 		{"u", "v", "w", "x"},
 	}
@@ -299,6 +327,28 @@ func TestBuildTasksByBuckets(t *testing.T) {
 	for i, task := range tasks {
 		taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...)
 	}
+
+	// cover the whole region
+	// region:    n--------------x
+	// buckets:   n -- q -- r -- x
+	// ranges:    n--------------x
+	// tasks:     o -- q
+	//                 q -- r
+	//						r -- x
+	expectedTaskRanges = [][]string{
+		{"n", "q"},
+		{"q", "r"},
+		{"r", "x"},
+	}
+	cluster.SplitRegionBuckets(regionIDs[1], [][]byte{{'n'}, {'q'}, {'r'}, {'x'}}, regionIDs[1])
+	cache = NewRegionCache(tikv.NewRegionCache(pdCli))
+	defer cache.Close()
+	tasks, err = buildCopTasks(bo, cache, buildCopRanges("n", "x"), req, nil)
+	require.NoError(t, err)
+	require.Len(t, tasks, len(expectedTaskRanges))
+	for i, task := range tasks {
+		taskEqual(t, task, regionIDs[1], regionIDs[1], expectedTaskRanges[i]...)
+	}
 }
 
 func TestSplitRegionRanges(t *testing.T) {
diff --git a/store/copr/region_cache.go b/store/copr/region_cache.go
index a28f282cd8686..4aa970aa458a4 100644
--- a/store/copr/region_cache.go
+++ b/store/copr/region_cache.go
@@ -77,56 +77,43 @@ func (l *LocationKeyRanges) splitKeyRangesByBuckets() []*LocationKeyRanges {
 	loc := l.Location
 	res := []*LocationKeyRanges{}
 	for ranges.Len() > 0 {
+		// ranges must be in loc.region, so the bucket returned by loc.LocateBucket is guaranteed to be not nil
 		bucket := loc.LocateBucket(ranges.At(0).StartKey)
-		if bucket == nil {
-			// TODO(youjiali1995): if it's overlapped with some buckets, it can be splitted.
-			//
-			// Buckets information may not be up-to-date and accurate.
-			// Find all ranges that can't be located in a bucket and make it one task.
-			i := 1
-			for ; i < ranges.Len(); i++ {
-				if loc.LocateBucket(ranges.At(i).StartKey) != nil {
-					break
-				}
-			}
-			res = append(res, &LocationKeyRanges{l.Location, ranges.Slice(0, i)})
-			ranges = ranges.Slice(i, ranges.Len())
-		} else {
-			// Iterate to the first range that is not complete in the bucket.
-			var r kv.KeyRange
-			var i int
-			for ; i < ranges.Len(); i++ {
-				r = ranges.At(i)
-				if !(bucket.Contains(r.EndKey) || bytes.Equal(bucket.EndKey, r.EndKey)) {
-					break
-				}
-			}
-			// All rest ranges belong to the same bucket.
-			if i == ranges.Len() {
-				res = append(res, &LocationKeyRanges{l.Location, ranges})
+
+		// Iterate to the first range that is not complete in the bucket.
+		var r kv.KeyRange
+		var i int
+		for ; i < ranges.Len(); i++ {
+			r = ranges.At(i)
+			if !(bucket.Contains(r.EndKey) || bytes.Equal(bucket.EndKey, r.EndKey)) {
 				break
 			}
+		}
+		// All rest ranges belong to the same bucket.
+		if i == ranges.Len() {
+			res = append(res, &LocationKeyRanges{l.Location, ranges})
+			break
+		}
+
+		if bucket.Contains(r.StartKey) {
+			// Part of r is not in the bucket. We need to split it.
+			taskRanges := ranges.Slice(0, i)
+			taskRanges.last = &kv.KeyRange{
+				StartKey: r.StartKey,
+				EndKey:   bucket.EndKey,
+			}
+			res = append(res, &LocationKeyRanges{l.Location, taskRanges})
 
-			if bucket.Contains(r.StartKey) {
-				// Part of r is not in the bucket. We need to split it.
-				taskRanges := ranges.Slice(0, i)
-				taskRanges.last = &kv.KeyRange{
-					StartKey: r.StartKey,
-					EndKey:   bucket.EndKey,
-				}
-				res = append(res, &LocationKeyRanges{l.Location, taskRanges})
-
-				ranges = ranges.Slice(i+1, ranges.Len())
-				ranges.first = &kv.KeyRange{
-					StartKey: bucket.EndKey,
-					EndKey:   r.EndKey,
-				}
-			} else {
-				// ranges[i] is not in the bucket.
-				taskRanges := ranges.Slice(0, i)
-				res = append(res, &LocationKeyRanges{l.Location, taskRanges})
-				ranges = ranges.Slice(i, ranges.Len())
+			ranges = ranges.Slice(i+1, ranges.Len())
+			ranges.first = &kv.KeyRange{
+				StartKey: bucket.EndKey,
+				EndKey:   r.EndKey,
 			}
+		} else {
+			// ranges[i] is not in the bucket.
+			taskRanges := ranges.Slice(0, i)
+			res = append(res, &LocationKeyRanges{l.Location, taskRanges})
+			ranges = ranges.Slice(i, ranges.Len())
 		}
 	}
 	return res