From dd62ce835845bb5532534582f454f66a61abe2fb Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Mon, 9 Jan 2023 17:49:27 +0800 Subject: [PATCH 1/8] fix: check buf size --- br/pkg/lightning/mydump/csv_parser.go | 4 ++++ config/config.go | 2 ++ tidb-server/main.go | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index 96de51bd49c73..e085972dac450 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/metric" "github.com/pingcap/tidb/br/pkg/lightning/worker" + tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/mathutil" ) @@ -336,6 +337,9 @@ func (parser *CSVParser) readUntil(chars *byteSet) ([]byte, byte, error) { var buf []byte for { buf = append(buf, parser.buf...) + if len(buf) > tidbconfig.MaxTxnEntrySizeLimit { + return buf, 0, errors.New("size of row cannot exceed the max value of txn-entry-size-limit") + } parser.buf = nil if err := parser.readBlock(); err != nil || len(parser.buf) == 0 { if err == nil { diff --git a/config/config.go b/config/config.go index bc25b8c9b9ec3..191911f138eab 100644 --- a/config/config.go +++ b/config/config.go @@ -46,6 +46,8 @@ import ( // Config number limitations const ( MaxLogFileSize = 4096 // MB + // MaxTxnEntrySize is the max value of TxnEntrySizeLimit. + MaxTxnEntrySizeLimit = 120 * 1024 * 1024 // 120MB // DefTxnEntrySizeLimit is the default value of TxnEntrySizeLimit. DefTxnEntrySizeLimit = 6 * 1024 * 1024 // DefTxnTotalSizeLimit is the default value of TxnTxnTotalSizeLimit. diff --git a/tidb-server/main.go b/tidb-server/main.go index 05e16e67db8be..08f0b1d9d95b8 100644 --- a/tidb-server/main.go +++ b/tidb-server/main.go @@ -656,7 +656,7 @@ func setGlobalVars() { } else { kv.TxnTotalSizeLimit = cfg.Performance.TxnTotalSizeLimit } - if cfg.Performance.TxnEntrySizeLimit > 120*1024*1024 { + if cfg.Performance.TxnEntrySizeLimit > config.MaxTxnEntrySizeLimit { log.Fatal("cannot set txn entry size limit larger than 120M") } kv.TxnEntrySizeLimit = cfg.Performance.TxnEntrySizeLimit From 158037043d16bea16cf3cfdac95ccace3d8416c3 Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 10:24:25 +0800 Subject: [PATCH 2/8] fix: terminate too long buf --- br/pkg/lightning/mydump/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/br/pkg/lightning/mydump/BUILD.bazel b/br/pkg/lightning/mydump/BUILD.bazel index dccd93f84e7ce..d265cad78bce6 100644 --- a/br/pkg/lightning/mydump/BUILD.bazel +++ b/br/pkg/lightning/mydump/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "//br/pkg/lightning/metric", "//br/pkg/lightning/worker", "//br/pkg/storage", + "//config", "//parser/mysql", "//types", "//util/filter", From d25ab9469259fd86fdce118be2a00114af59717d Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 14:29:35 +0800 Subject: [PATCH 3/8] add ut --- br/pkg/lightning/mydump/csv_parser_test.go | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 2696a6909c96c..11ddbe023a161 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -1,6 +1,7 @@ package mydump_test import ( + "bytes" "context" "encoding/csv" "fmt" @@ -16,6 +17,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/mydump" "github.com/pingcap/tidb/br/pkg/lightning/worker" + tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -680,6 +682,28 @@ func TestConsecutiveFields(t *testing.T) { }) } +func TestTooLargeRow(t *testing.T) { + cfg := config.MydumperRuntime{ + CSV: config.CSVConfig{ + Separator: ",", + Delimiter: `"`, + }, + } + var testCase bytes.Buffer + testCase.WriteString("a,b,c,d") + // WARN: will take up 120MB memory here. + for i := 0; i < tidbconfig.MaxTxnEntrySizeLimit; i++ { + testCase.WriteByte('d') + } + charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) + require.NoError(t, err) + parser, err := mydump.NewCSVParser(context.Background(), &cfg.CSV, mydump.NewStringReader(testCase.String()), int64(config.ReadBlockSize), ioWorkers, false, charsetConvertor) + require.NoError(t, err) + e := parser.ReadRow() + require.Error(t, e) + require.Contains(t, e.Error(), "size of row cannot exceed the max value of txn-entry-size-limit") +} + func TestSpecialChars(t *testing.T) { cfg := config.MydumperRuntime{ CSV: config.CSVConfig{Separator: ",", Delimiter: `"`}, From 8260894139bee169385a416745393cec9627e62c Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 14:39:43 +0800 Subject: [PATCH 4/8] fix dev --- br/pkg/lightning/mydump/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/br/pkg/lightning/mydump/BUILD.bazel b/br/pkg/lightning/mydump/BUILD.bazel index d265cad78bce6..0b132fb26d15a 100644 --- a/br/pkg/lightning/mydump/BUILD.bazel +++ b/br/pkg/lightning/mydump/BUILD.bazel @@ -70,6 +70,7 @@ go_test( "//br/pkg/lightning/worker", "//br/pkg/mock/storage", "//br/pkg/storage", + "//config", "//parser/mysql", "//testkit/testsetup", "//types", From cd6e96ec7d2d39a27f5b844854a109dadfe7d349 Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 16:10:53 +0800 Subject: [PATCH 5/8] local var --- br/pkg/lightning/mydump/csv_parser_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 11ddbe023a161..39715bc3f5447 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -26,6 +26,12 @@ import ( var ioWorkers = worker.NewPool(context.Background(), 5, "test_csv") +var largestEntryLimit int + +func init() { + largestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit +} + func assertPosEqual(t *testing.T, parser mydump.Parser, expectPos, expectRowID int64) { pos, rowID := parser.Pos() require.Equal(t, expectPos, pos) @@ -692,7 +698,7 @@ func TestTooLargeRow(t *testing.T) { var testCase bytes.Buffer testCase.WriteString("a,b,c,d") // WARN: will take up 120MB memory here. - for i := 0; i < tidbconfig.MaxTxnEntrySizeLimit; i++ { + for i := 0; i < largestEntryLimit; i++ { testCase.WriteByte('d') } charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) From dc597817d39dbac00e1b2266e8bcbe19d0660191 Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 16:46:52 +0800 Subject: [PATCH 6/8] fix ut --- br/pkg/lightning/mydump/csv_parser.go | 7 ++++++- br/pkg/lightning/mydump/csv_parser_test.go | 12 +++--------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index e085972dac450..a474a23c15470 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -34,8 +34,13 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") + LargestEntryLimit int ) +func init() { + LargestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit +} + // CSVParser is basically a copy of encoding/csv, but special-cased for MySQL-like input. type CSVParser struct { blockParser @@ -337,7 +342,7 @@ func (parser *CSVParser) readUntil(chars *byteSet) ([]byte, byte, error) { var buf []byte for { buf = append(buf, parser.buf...) - if len(buf) > tidbconfig.MaxTxnEntrySizeLimit { + if len(buf) > LargestEntryLimit { return buf, 0, errors.New("size of row cannot exceed the max value of txn-entry-size-limit") } parser.buf = nil diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 39715bc3f5447..da06c15ed39d9 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -17,7 +17,6 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/mydump" "github.com/pingcap/tidb/br/pkg/lightning/worker" - tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -26,12 +25,6 @@ import ( var ioWorkers = worker.NewPool(context.Background(), 5, "test_csv") -var largestEntryLimit int - -func init() { - largestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit -} - func assertPosEqual(t *testing.T, parser mydump.Parser, expectPos, expectRowID int64) { pos, rowID := parser.Pos() require.Equal(t, expectPos, pos) @@ -697,8 +690,9 @@ func TestTooLargeRow(t *testing.T) { } var testCase bytes.Buffer testCase.WriteString("a,b,c,d") - // WARN: will take up 120MB memory here. - for i := 0; i < largestEntryLimit; i++ { + // WARN: will take up 10KB memory here. + mydump.LargestEntryLimit = 10 * 1024 + for i := 0; i < mydump.LargestEntryLimit; i++ { testCase.WriteByte('d') } charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) From 04191c0581d2b663ea7f7d48534d26ec564a3364 Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 17:48:54 +0800 Subject: [PATCH 7/8] fix lint --- br/pkg/lightning/mydump/BUILD.bazel | 1 - br/pkg/lightning/mydump/csv_parser.go | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/br/pkg/lightning/mydump/BUILD.bazel b/br/pkg/lightning/mydump/BUILD.bazel index 0b132fb26d15a..d265cad78bce6 100644 --- a/br/pkg/lightning/mydump/BUILD.bazel +++ b/br/pkg/lightning/mydump/BUILD.bazel @@ -70,7 +70,6 @@ go_test( "//br/pkg/lightning/worker", "//br/pkg/mock/storage", "//br/pkg/storage", - "//config", "//parser/mysql", "//testkit/testsetup", "//types", diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index a474a23c15470..919fb98a33473 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -34,7 +34,8 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") - LargestEntryLimit int + // max size for reading file to buf + LargestEntryLimit int ) func init() { From 6d56b2b13d96ae0ee3f87472c6abb1d0e39844af Mon Sep 17 00:00:00 2001 From: buchuitoudegou <756541536@qq.com> Date: Tue, 10 Jan 2023 18:10:44 +0800 Subject: [PATCH 8/8] fix lint --- br/pkg/lightning/mydump/csv_parser.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index 919fb98a33473..bd59b96415da9 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -34,7 +34,7 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") - // max size for reading file to buf + // LargestEntryLimit is the max size for reading file to buf LargestEntryLimit int )