Merge branch 'master' into dumpling-tls

pingcap · Dec 5, 2022 · b95082b · b95082b
2 parents b2808c5 + 0b1096e
commit b95082b
Show file tree

Hide file tree

Showing 184 changed files with 14,907 additions and 10,946 deletions.
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-![](docs/logo_with_text.png)
+<img src="docs/tidb-logo-with-text.png" alt="TiDB, a distributed SQL database" height=100></img>
 
 [![LICENSE](https://img.shields.io/github/license/pingcap/tidb.svg)](https://github.com/pingcap/tidb/blob/master/LICENSE)
 [![Language](https://img.shields.io/badge/Language-Go-blue.svg)](https://golang.org/)
@@ -12,15 +12,15 @@
 
 ## What is TiDB?
 
-TiDB ("Ti" stands for Titanium) is an open-source NewSQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability.
+TiDB (/’taɪdiːbi:/, "Ti" stands for Titanium) is an open-source distributed SQL database that supports Hybrid Transactional and Analytical Processing (HTAP) workloads. It is MySQL compatible and features horizontal scalability, strong consistency, and high availability.
 
 - [Key features](https://docs.pingcap.com/tidb/stable/overview#key-features)
 - [Architecture](#architecture)
-- [MySQL Compatibility](https://docs.pingcap.com/tidb/stable/mysql-compatibility)
+- [MySQL compatibility](https://docs.pingcap.com/tidb/stable/mysql-compatibility)
 
-For more details and latest updates, see [TiDB docs](https://docs.pingcap.com/tidb/stable) and [release notes](https://docs.pingcap.com/tidb/dev/release-notes).
+For more details and latest updates, see [TiDB documentation](https://docs.pingcap.com/tidb/stable) and [release notes](https://docs.pingcap.com/tidb/dev/release-notes).
 
-For future plans, see [TiDB Roadmap](roadmap.md).
+For future plans, see the [TiDB roadmap](roadmap.md).
 
 ## Quick start
 
@@ -38,40 +38,42 @@ See [TiDB Quick Start Guide](https://docs.pingcap.com/tidb/stable/quick-start-wi
 
 ### Start developing TiDB
 
-See [Get Started](https://pingcap.github.io/tidb-dev-guide/get-started/introduction.html) chapter of [TiDB Dev Guide](https://pingcap.github.io/tidb-dev-guide/index.html).
+See the [Get Started](https://pingcap.github.io/tidb-dev-guide/get-started/introduction.html) chapter of [TiDB Development Guide](https://pingcap.github.io/tidb-dev-guide/index.html).
 
 ## Community
 
-You can join these groups and chats to discuss and ask TiDB related questions:
+You can join the following groups or channels to discuss or ask questions about TiDB, and to keep yourself informed of the latest TiDB updates:
 
-- [TiDB Internals Forum](https://internals.tidb.io/)
-- [Slack Channel](https://slack.tidb.io/invite?team=tidb-community&channel=everyone&ref=pingcap-tidb)
-- [TiDB User Group Forum (Chinese)](https://asktug.com)
-
-In addition, you may enjoy following:
-
-- [@PingCAP](https://twitter.com/PingCAP) on Twitter
-- Question tagged [#tidb on StackOverflow](https://stackoverflow.com/questions/tagged/tidb)
-- The PingCAP Team [English Blog](https://en.pingcap.com/blog) and [Chinese Blog](https://pingcap.com/blog-cn/)
+- Discuss TiDB's implementation and design
+  - [TiDB Internals forum](https://internals.tidb.io/)
+- Seek help when you use TiDB
+  - Slack channels: [#everyone](https://slack.tidb.io/invite?team=tidb-community&channel=everyone&ref=pingcap-tidb) (English), [#tidb-japan](https://slack.tidb.io/invite?team=tidb-community&channel=tidb-japan&ref=github-tidb) (Japanese)
+  - [TiDB User Group forum](https://asktug.com) (Chinese)
+  - [Stack Overflow](https://stackoverflow.com/questions/tagged/tidb) (questions tagged with #tidb)
+- Get the latest TiDB news or updates
+    - Follow [@PingCAP](https://twitter.com/PingCAP) on Twitter
+    - Read the PingCAP [English Blog](https://www.pingcap.com/blog/?from=en) or [Chinese Blog](https://cn.pingcap.com/blog/)
 
 For support, please contact [PingCAP](http://bit.ly/contact_us_via_github).
 
 ## Contributing
 
-The [community repository](https://github.com/pingcap/community) hosts all information about the TiDB community, including how to contribute to TiDB, how TiDB community is governed, how special interest groups are organized, etc.
+The [community repository](https://github.com/pingcap/community) hosts all information about the TiDB community, including [how to contribute](https://github.com/pingcap/community/blob/master/contributors/README.md) to TiDB, how the TiDB community is governed, how [teams](https://github.com/pingcap/community/blob/master/teams/README.md) are organized.
+
+Contributions are welcomed and greatly appreciated. You can get started with one of the [good first issues](https://github.com/pingcap/tidb/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) or [help wanted issues](https://github.com/pingcap/tidb/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22). For more details on typical contribution workflows, see [Contribute to TiDB](https://pingcap.github.io/tidb-dev-guide/contribute-to-tidb/introduction.html). For more contributing information about where to start, click the contributor icon below.
 
 [<img src="docs/contribution-map.png" alt="contribution-map" width="180">](https://github.com/pingcap/tidb-map/blob/master/maps/contribution-map.md#tidb-is-an-open-source-distributed-htap-database-compatible-with-the-mysql-protocol)
 
-Contributions are welcomed and greatly appreciated. All the contributors are welcomed to claim your reward by filing this [form](https://forms.pingcap.com/f/tidb-contribution-swag). See [Contribution to TiDB](https://pingcap.github.io/tidb-dev-guide/contribute-to-tidb/introduction.html) for details on typical contribution workflows. For more contributing information, click on the contributor icon above.
+Every contributor is welcome to claim your contribution swag by filling in and submitting this [form](https://forms.pingcap.com/f/tidb-contribution-swag).
 
 ## Case studies
 
-- [English](https://pingcap.com/case-studies)
-- [简体中文](https://pingcap.com/cases-cn/)
+- [Case studies in English](https://www.pingcap.com/customers/)
+- [中文用户案例](https://cn.pingcap.com/case/)
 
 ## Architecture
 
-![architecture](./docs/architecture.png)
+![TiDB architecture](./docs/tidb-architecture.png)
 
 ## License
 

diff --git a/bindinfo/bind_cache.go b/bindinfo/bind_cache.go
@@ -158,7 +158,7 @@ func (c *bindCache) GetBindRecordBySQLDigest(sqlDigest string) (*BindRecord, err
 		return nil, errors.New("more than 1 binding matched")
 	}
 	if len(bindings) == 0 || len(bindings[0].Bindings) == 0 {
-		return nil, errors.New("can't find any binding for `" + sqlDigest + "`")
+		return nil, errors.New("can't find any binding for '" + sqlDigest + "'")
 	}
 	return bindings[0], nil
 }

diff --git a/bindinfo/bind_test.go b/bindinfo/bind_test.go
@@ -1385,6 +1385,89 @@ func TestDropBindBySQLDigest(t *testing.T) {
 	}
 
 	// exception cases
-	tk.MustGetErrMsg(fmt.Sprintf("drop binding for sql digest '%s'", "1"), "can't find any binding for `1`")
+	tk.MustGetErrMsg(fmt.Sprintf("drop binding for sql digest '%s'", "1"), "can't find any binding for '1'")
 	tk.MustGetErrMsg(fmt.Sprintf("drop binding for sql digest '%s'", ""), "sql digest is empty")
 }
+
+func TestCreateBindingFromHistory(t *testing.T) {
+	store := testkit.CreateMockStore(t)
+	tk := testkit.NewTestKit(t, store)
+	require.NoError(t, tk.Session().Auth(&auth.UserIdentity{Username: "root", Hostname: "%"}, nil, nil))
+
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t1(id int primary key, a int, b int, key(a))")
+	tk.MustExec("create table t2(id int primary key, a int, b int, key(a))")
+
+	var testCases = []struct {
+		sqls []string
+		hint string
+	}{
+		{
+			sqls: []string{
+				"select %s * from t1, t2 where t1.id = t2.id",
+				"select %s * from test.t1, t2 where t1.id = t2.id",
+				"select %s * from test.t1, test.t2 where t1.id = t2.id",
+				"select %s * from t1, test.t2 where t1.id = t2.id",
+			},
+			hint: "/*+ merge_join(t1, t2) */",
+		},
+		{
+			sqls: []string{
+				"select %s * from t1 where a = 1",
+				"select %s * from test.t1 where a = 1",
+			},
+			hint: "/*+ ignore_index(t, a) */",
+		},
+	}
+
+	for _, testCase := range testCases {
+		for _, bind := range testCase.sqls {
+			stmtsummary.StmtSummaryByDigestMap.Clear()
+			bindSQL := fmt.Sprintf(bind, testCase.hint)
+			tk.MustExec(bindSQL)
+			planDigest := tk.MustQuery(fmt.Sprintf("select plan_digest from information_schema.statements_summary where query_sample_text = '%s'", bindSQL)).Rows()
+			tk.MustExec(fmt.Sprintf("create session binding from history using plan digest '%s'", planDigest[0][0]))
+			showRes := tk.MustQuery("show bindings").Rows()
+			require.Equal(t, len(showRes), 1)
+			require.Equal(t, planDigest[0][0], showRes[0][10])
+			for _, sql := range testCase.sqls {
+				tk.MustExec(fmt.Sprintf(sql, ""))
+				tk.MustQuery("select @@last_plan_from_binding").Check(testkit.Rows("1"))
+			}
+		}
+		showRes := tk.MustQuery("show bindings").Rows()
+		require.Equal(t, len(showRes), 1)
+		tk.MustExec(fmt.Sprintf("drop binding for sql digest '%s'", showRes[0][9]))
+	}
+
+	// exception cases
+	tk.MustGetErrMsg(fmt.Sprintf("create binding from history using plan digest '%s'", "1"), "can't find any plans for '1'")
+	tk.MustGetErrMsg(fmt.Sprintf("create binding from history using plan digest '%s'", ""), "plan digest is empty")
+	tk.MustExec("create binding for select * from t1, t2 where t1.id = t2.id using select /*+ merge_join(t1, t2) */ * from t1, t2 where t1.id = t2.id")
+	showRes := tk.MustQuery("show bindings").Rows()
+	require.Equal(t, showRes[0][10], "") // plan digest should be nil by create for
+}
+
+func TestCreateBindingForPrepareFromHistory(t *testing.T) {
+	store := testkit.CreateMockStore(t)
+	tk := testkit.NewTestKit(t, store)
+	require.NoError(t, tk.Session().Auth(&auth.UserIdentity{Username: "root", Hostname: "%"}, nil, nil))
+
+	tk.MustExec("use test")
+	tk.MustExec("drop table if exists t")
+	tk.MustExec("create table t(id int primary key, a int, key(a))")
+
+	tk.MustExec("prepare stmt from 'select /*+ ignore_index(t,a) */ * from t where a = ?'")
+	tk.MustExec("set @a = 1")
+	tk.MustExec("execute stmt using @a")
+	planDigest := tk.MustQuery(fmt.Sprintf("select plan_digest from information_schema.statements_summary where query_sample_text = '%s'", "select /*+ ignore_index(t,a) */ * from t where a = ? [arguments: 1]")).Rows()
+	showRes := tk.MustQuery("show bindings").Rows()
+	require.Equal(t, len(showRes), 0)
+	tk.MustExec(fmt.Sprintf("create binding from history using plan digest '%s'", planDigest[0][0]))
+	showRes = tk.MustQuery("show bindings").Rows()
+	require.Equal(t, len(showRes), 1)
+	require.Equal(t, planDigest[0][0], showRes[0][10])
+	tk.MustExec("execute stmt using @a")
+	tk.MustQuery("select @@last_plan_from_binding").Check(testkit.Rows("1"))
+}
diff --git a/bindinfo/handle.go b/bindinfo/handle.go
@@ -971,12 +971,12 @@ func getHintsForSQL(sctx sessionctx.Context, sql string) (string, error) {
 }
 
 // GenerateBindSQL generates binding sqls from stmt node and plan hints.
-func GenerateBindSQL(ctx context.Context, stmtNode ast.StmtNode, planHint string, captured bool, defaultDB string) string {
+func GenerateBindSQL(ctx context.Context, stmtNode ast.StmtNode, planHint string, skipCheckIfHasParam bool, defaultDB string) string {
 	// If would be nil for very simple cases such as point get, we do not need to evolve for them.
 	if planHint == "" {
 		return ""
 	}
-	if !captured {
+	if !skipCheckIfHasParam {
 		paramChecker := &paramMarkerChecker{}
 		stmtNode.Accept(paramChecker)
 		// We need to evolve on current sql, but we cannot restore values for paramMarkers yet,

diff --git a/br/pkg/lightning/backend/local/duplicate.go b/br/pkg/lightning/backend/local/duplicate.go
@@ -553,19 +553,22 @@ func (m *DuplicateManager) buildDupTasks() ([]dupTask, error) {
 		return nil, errors.Trace(err)
 	}
 	tasks := make([]dupTask, 0, keyRanges.TotalRangeNum()*(1+len(m.tbl.Meta().Indices)))
-	putToTaskFunc := func(ranges []tidbkv.KeyRange) {
+	putToTaskFunc := func(ranges []tidbkv.KeyRange, indexInfo *model.IndexInfo) {
 		if len(ranges) == 0 {
 			return
 		}
 		tid := tablecodec.DecodeTableID(ranges[0].StartKey)
 		for _, r := range ranges {
 			tasks = append(tasks, dupTask{
-				KeyRange: r,
-				tableID:  tid,
+				KeyRange:  r,
+				tableID:   tid,
+				indexInfo: indexInfo,
 			})
 		}
 	}
-	keyRanges.ForEachPartition(putToTaskFunc)
+	keyRanges.ForEachPartition(func(ranges []tidbkv.KeyRange) {
+		putToTaskFunc(ranges, nil)
+	})
 	for _, indexInfo := range m.tbl.Meta().Indices {
 		if indexInfo.State != model.StatePublic {
 			continue
@@ -574,7 +577,9 @@ func (m *DuplicateManager) buildDupTasks() ([]dupTask, error) {
 		if err != nil {
 			return nil, errors.Trace(err)
 		}
-		keyRanges.ForEachPartition(putToTaskFunc)
+		keyRanges.ForEachPartition(func(ranges []tidbkv.KeyRange) {
+			putToTaskFunc(ranges, indexInfo)
+		})
 	}
 	return tasks, nil
 }

diff --git a/br/pkg/lightning/checkpoints/checkpoints.go b/br/pkg/lightning/checkpoints/checkpoints.go
@@ -262,6 +262,29 @@ func (ccp *ChunkCheckpoint) DeepCopy() *ChunkCheckpoint {
 	}
 }
 
+func (ccp *ChunkCheckpoint) UnfinishedSize() int64 {
+	if ccp.FileMeta.Compression == mydump.CompressionNone {
+		return ccp.Chunk.EndOffset - ccp.Chunk.Offset
+	}
+	return ccp.FileMeta.FileSize - ccp.Chunk.RealOffset
+}
+
+func (ccp *ChunkCheckpoint) TotalSize() int64 {
+	if ccp.FileMeta.Compression == mydump.CompressionNone {
+		return ccp.Chunk.EndOffset - ccp.Key.Offset
+	}
+	// TODO: compressed file won't be split into chunks, so using FileSize as TotalSize is ok
+	//  change this when we support split compressed file into chunks
+	return ccp.FileMeta.FileSize
+}
+
+func (ccp *ChunkCheckpoint) FinishedSize() int64 {
+	if ccp.FileMeta.Compression == mydump.CompressionNone {
+		return ccp.Chunk.Offset - ccp.Key.Offset
+	}
+	return ccp.Chunk.RealOffset - ccp.Key.Offset
+}
+
 type EngineCheckpoint struct {
 	Status CheckpointStatus
 	Chunks []*ChunkCheckpoint // a sorted array

diff --git a/br/pkg/lightning/mydump/parquet_parser.go b/br/pkg/lightning/mydump/parquet_parser.go
@@ -351,6 +351,12 @@ func (pp *ParquetParser) SetPos(pos int64, rowID int64) error {
 	return nil
 }
 
+// RealPos implements the Parser interface.
+// For parquet it's equal to Pos().
+func (pp *ParquetParser) RealPos() (int64, error) {
+	return pp.curStart + int64(pp.curIndex), nil
+}
+
 // Close closes the parquet file of the parser.
 // It implements the Parser interface.
 func (pp *ParquetParser) Close() error {

diff --git a/br/pkg/lightning/mydump/parser.go b/br/pkg/lightning/mydump/parser.go
@@ -94,6 +94,7 @@ type ChunkParser struct {
 type Chunk struct {
 	Offset       int64
 	EndOffset    int64
+	RealOffset   int64
 	PrevRowIDMax int64
 	RowIDMax     int64
 	Columns      []string
@@ -126,6 +127,7 @@ const (
 type Parser interface {
 	Pos() (pos int64, rowID int64)
 	SetPos(pos int64, rowID int64) error
+	RealPos() (int64, error)
 	Close() error
 	ReadRow() error
 	LastRow() Row
@@ -175,6 +177,11 @@ func (parser *blockParser) SetPos(pos int64, rowID int64) error {
 	return nil
 }
 
+// RealPos gets the read position of current reader.
+func (parser *blockParser) RealPos() (int64, error) {
+	return parser.reader.Seek(0, io.SeekCurrent)
+}
+
 // Pos returns the current file offset.
 // Attention: for compressed sql/csv files, pos is the position in uncompressed files
 func (parser *blockParser) Pos() (pos int64, lastRowID int64) {

diff --git a/br/pkg/lightning/mydump/region.go b/br/pkg/lightning/mydump/region.go
@@ -38,6 +38,9 @@ const (
 	// TableFileSizeINF for compressed size, for lightning 10TB is a relatively big value and will strongly affect efficiency
 	// It's used to make sure compressed files can be read until EOF. Because we can't get the exact decompressed size of the compressed files.
 	TableFileSizeINF = 10 * 1024 * tableRegionSizeWarningThreshold
+	// compressDataRatio is a relatively maximum compress ratio for normal compressed data
+	// It's used to estimate rowIDMax, we use a large value to try to avoid overlapping
+	compressDataRatio = 500
 )
 
 // TableRegion contains information for a table region during import.
@@ -302,7 +305,9 @@ func MakeSourceFileRegion(
 	// set fileSize to INF to make sure compressed files can be read until EOF. Because we can't get the exact size of the compressed files.
 	// TODO: update progress bar calculation for compressed files.
 	if fi.FileMeta.Compression != CompressionNone {
-		rowIDMax = fileSize * 100 / divisor // FIXME: this is not accurate. Need more tests and fix solution.
+		// FIXME: this is not accurate. Need sample ratio in the future and use sampled ratio to compute rowIDMax
+		//  currently we use 500 here. It's a relatively large value for most data.
+		rowIDMax = fileSize * compressDataRatio / divisor
 		fileSize = TableFileSizeINF
 	}
 	tableRegion := &TableRegion{