diff --git a/br/pkg/lightning/mydump/BUILD.bazel b/br/pkg/lightning/mydump/BUILD.bazel index dccd93f84e7ce..d265cad78bce6 100644 --- a/br/pkg/lightning/mydump/BUILD.bazel +++ b/br/pkg/lightning/mydump/BUILD.bazel @@ -23,6 +23,7 @@ go_library( "//br/pkg/lightning/metric", "//br/pkg/lightning/worker", "//br/pkg/storage", + "//config", "//parser/mysql", "//types", "//util/filter", diff --git a/br/pkg/lightning/mydump/csv_parser.go b/br/pkg/lightning/mydump/csv_parser.go index 2d7ec3f5d9c88..b7d6c6fc21903 100644 --- a/br/pkg/lightning/mydump/csv_parser.go +++ b/br/pkg/lightning/mydump/csv_parser.go @@ -25,6 +25,7 @@ import ( "github.com/pingcap/tidb/br/pkg/lightning/log" "github.com/pingcap/tidb/br/pkg/lightning/metric" "github.com/pingcap/tidb/br/pkg/lightning/worker" + tidbconfig "github.com/pingcap/tidb/config" "github.com/pingcap/tidb/types" "github.com/pingcap/tidb/util/mathutil" ) @@ -33,8 +34,14 @@ var ( errUnterminatedQuotedField = errors.NewNoStackError("syntax error: unterminated quoted field") errDanglingBackslash = errors.NewNoStackError("syntax error: no character after backslash") errUnexpectedQuoteField = errors.NewNoStackError("syntax error: cannot have consecutive fields without separator") + // LargestEntryLimit is the max size for reading file to buf + LargestEntryLimit int ) +func init() { + LargestEntryLimit = tidbconfig.MaxTxnEntrySizeLimit +} + // CSVParser is basically a copy of encoding/csv, but special-cased for MySQL-like input. type CSVParser struct { blockParser @@ -336,6 +343,9 @@ func (parser *CSVParser) readUntil(chars *byteSet) ([]byte, byte, error) { var buf []byte for { buf = append(buf, parser.buf...) + if len(buf) > LargestEntryLimit { + return buf, 0, errors.New("size of row cannot exceed the max value of txn-entry-size-limit") + } parser.buf = nil if err := parser.readBlock(); err != nil || len(parser.buf) == 0 { if err == nil { diff --git a/br/pkg/lightning/mydump/csv_parser_test.go b/br/pkg/lightning/mydump/csv_parser_test.go index 2696a6909c96c..da06c15ed39d9 100644 --- a/br/pkg/lightning/mydump/csv_parser_test.go +++ b/br/pkg/lightning/mydump/csv_parser_test.go @@ -1,6 +1,7 @@ package mydump_test import ( + "bytes" "context" "encoding/csv" "fmt" @@ -680,6 +681,29 @@ func TestConsecutiveFields(t *testing.T) { }) } +func TestTooLargeRow(t *testing.T) { + cfg := config.MydumperRuntime{ + CSV: config.CSVConfig{ + Separator: ",", + Delimiter: `"`, + }, + } + var testCase bytes.Buffer + testCase.WriteString("a,b,c,d") + // WARN: will take up 10KB memory here. + mydump.LargestEntryLimit = 10 * 1024 + for i := 0; i < mydump.LargestEntryLimit; i++ { + testCase.WriteByte('d') + } + charsetConvertor, err := mydump.NewCharsetConvertor(cfg.DataCharacterSet, cfg.DataInvalidCharReplace) + require.NoError(t, err) + parser, err := mydump.NewCSVParser(context.Background(), &cfg.CSV, mydump.NewStringReader(testCase.String()), int64(config.ReadBlockSize), ioWorkers, false, charsetConvertor) + require.NoError(t, err) + e := parser.ReadRow() + require.Error(t, e) + require.Contains(t, e.Error(), "size of row cannot exceed the max value of txn-entry-size-limit") +} + func TestSpecialChars(t *testing.T) { cfg := config.MydumperRuntime{ CSV: config.CSVConfig{Separator: ",", Delimiter: `"`}, diff --git a/config/config.go b/config/config.go index 54ed0cf44c0c6..68108267540b1 100644 --- a/config/config.go +++ b/config/config.go @@ -46,6 +46,8 @@ import ( // Config number limitations const ( MaxLogFileSize = 4096 // MB + // MaxTxnEntrySize is the max value of TxnEntrySizeLimit. + MaxTxnEntrySizeLimit = 120 * 1024 * 1024 // 120MB // DefTxnEntrySizeLimit is the default value of TxnEntrySizeLimit. DefTxnEntrySizeLimit = 6 * 1024 * 1024 // DefTxnTotalSizeLimit is the default value of TxnTxnTotalSizeLimit. diff --git a/tidb-server/main.go b/tidb-server/main.go index 41050db2e1f10..0843d624741d8 100644 --- a/tidb-server/main.go +++ b/tidb-server/main.go @@ -670,7 +670,7 @@ func setGlobalVars() { } else { kv.TxnTotalSizeLimit = cfg.Performance.TxnTotalSizeLimit } - if cfg.Performance.TxnEntrySizeLimit > 120*1024*1024 { + if cfg.Performance.TxnEntrySizeLimit > config.MaxTxnEntrySizeLimit { log.Fatal("cannot set txn entry size limit larger than 120M") } kv.TxnEntrySizeLimit = cfg.Performance.TxnEntrySizeLimit