diff --git a/executor/select_into.go b/executor/select_into.go index c4f86928737af..9541ce98eefea 100644 --- a/executor/select_into.go +++ b/executor/select_into.go @@ -33,12 +33,15 @@ type SelectIntoExec struct { baseExecutor intoOpt *ast.SelectIntoOption - lineBuf []byte - realBuf []byte - writer *bufio.Writer - dstFile *os.File - chk *chunk.Chunk - started bool + lineBuf []byte + realBuf []byte + fieldBuf []byte + escapeBuf []byte + enclosed bool + writer *bufio.Writer + dstFile *os.File + chk *chunk.Chunk + started bool } // Open implements the Executor Open interface. @@ -57,6 +60,8 @@ func (s *SelectIntoExec) Open(ctx context.Context) error { s.writer = bufio.NewWriter(s.dstFile) s.chk = newFirstChunk(s.children[0]) s.lineBuf = make([]byte, 0, 1024) + s.fieldBuf = make([]byte, 0, 64) + s.escapeBuf = make([]byte, 0, 64) return s.baseExecutor.Open(ctx) } @@ -82,6 +87,35 @@ func (s *SelectIntoExec) considerEncloseOpt(et types.EvalType) bool { et == types.ETJson } +func (s *SelectIntoExec) escapeField(f []byte) []byte { + if s.intoOpt.FieldsInfo.Escaped == 0 { + return f + } + s.escapeBuf = s.escapeBuf[:0] + for _, b := range f { + escape := false + switch { + case b == 0: + // we always escape 0 + escape = true + b = '0' + case b == s.intoOpt.FieldsInfo.Escaped || b == s.intoOpt.FieldsInfo.Enclosed: + escape = true + case !s.enclosed && len(s.intoOpt.FieldsInfo.Terminated) > 0 && b == s.intoOpt.FieldsInfo.Terminated[0]: + // if field is enclosed, we only escape line terminator, otherwise both field and line terminator will be escaped + escape = true + case len(s.intoOpt.LinesInfo.Terminated) > 0 && b == s.intoOpt.LinesInfo.Terminated[0]: + // we always escape line terminator + escape = true + } + if escape { + s.escapeBuf = append(s.escapeBuf, s.intoOpt.FieldsInfo.Escaped) + } + s.escapeBuf = append(s.escapeBuf, b) + } + return s.escapeBuf +} + func (s *SelectIntoExec) dumpToOutfile() error { lineTerm := "\n" if s.intoOpt.LinesInfo.Terminated != "" { @@ -102,6 +136,8 @@ func (s *SelectIntoExec) dumpToOutfile() error { nullTerm := []byte("\\N") if s.intoOpt.FieldsInfo.Escaped != byte(0) { nullTerm[0] = s.intoOpt.FieldsInfo.Escaped + } else { + nullTerm = []byte("NULL") } cols := s.children[0].Schema().Columns @@ -120,34 +156,42 @@ func (s *SelectIntoExec) dumpToOutfile() error { if (encloseFlag && !encloseOpt) || (encloseFlag && encloseOpt && s.considerEncloseOpt(et)) { s.lineBuf = append(s.lineBuf, encloseByte) + s.enclosed = true + } else { + s.enclosed = false } + s.fieldBuf = s.fieldBuf[:0] switch col.GetType().Tp { case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong: - s.lineBuf = strconv.AppendInt(s.lineBuf, row.GetInt64(j), 10) + s.fieldBuf = strconv.AppendInt(s.fieldBuf, row.GetInt64(j), 10) case mysql.TypeLonglong: if mysql.HasUnsignedFlag(col.GetType().Flag) { - s.lineBuf = strconv.AppendUint(s.lineBuf, row.GetUint64(j), 10) + s.fieldBuf = strconv.AppendUint(s.fieldBuf, row.GetUint64(j), 10) } else { - s.lineBuf = strconv.AppendInt(s.lineBuf, row.GetInt64(j), 10) + s.fieldBuf = strconv.AppendInt(s.fieldBuf, row.GetInt64(j), 10) } case mysql.TypeFloat, mysql.TypeDouble: - s.realBuf, s.lineBuf = DumpRealOutfile(s.realBuf, s.lineBuf, row.GetFloat64(j), col.RetType) + s.realBuf, s.fieldBuf = DumpRealOutfile(s.realBuf, s.fieldBuf, row.GetFloat64(j), col.RetType) case mysql.TypeNewDecimal: - s.lineBuf = append(s.lineBuf, row.GetMyDecimal(j).String()...) - case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeBit, + s.fieldBuf = append(s.fieldBuf, row.GetMyDecimal(j).String()...) + case mysql.TypeString, mysql.TypeVarString, mysql.TypeVarchar, mysql.TypeTinyBlob, mysql.TypeMediumBlob, mysql.TypeLongBlob, mysql.TypeBlob: + s.fieldBuf = append(s.fieldBuf, row.GetBytes(j)...) + case mysql.TypeBit: + // bit value won't be escaped anyway (verified on MySQL, test case added) s.lineBuf = append(s.lineBuf, row.GetBytes(j)...) case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp: - s.lineBuf = append(s.lineBuf, row.GetTime(j).String()...) + s.fieldBuf = append(s.fieldBuf, row.GetTime(j).String()...) case mysql.TypeDuration: - s.lineBuf = append(s.lineBuf, row.GetDuration(j, col.GetType().Decimal).String()...) + s.fieldBuf = append(s.fieldBuf, row.GetDuration(j, col.GetType().Decimal).String()...) case mysql.TypeEnum: - s.lineBuf = append(s.lineBuf, row.GetEnum(j).String()...) + s.fieldBuf = append(s.fieldBuf, row.GetEnum(j).String()...) case mysql.TypeSet: - s.lineBuf = append(s.lineBuf, row.GetSet(j).String()...) + s.fieldBuf = append(s.fieldBuf, row.GetSet(j).String()...) case mysql.TypeJSON: - s.lineBuf = append(s.lineBuf, row.GetJSON(j).String()...) + s.fieldBuf = append(s.fieldBuf, row.GetJSON(j).String()...) } + s.lineBuf = append(s.lineBuf, s.escapeField(s.fieldBuf)...) if (encloseFlag && !encloseOpt) || (encloseFlag && encloseOpt && s.considerEncloseOpt(et)) { s.lineBuf = append(s.lineBuf, encloseByte) diff --git a/executor/select_into_test.go b/executor/select_into_test.go index 33346d4330104..41ba595c89a69 100644 --- a/executor/select_into_test.go +++ b/executor/select_into_test.go @@ -144,6 +144,76 @@ func (s *testSuite1) TestSelectIntoOutfileConstant(c *C) { `, outfile, c) } +func (s *testSuite1) TestDeliminators(c *C) { + outfile := randomSelectFilePath("TestDeliminators") + tk := testkit.NewTestKit(c, s.store) + tk.MustExec("use test") + + tk.MustExec("CREATE TABLE `tx` (`a` varbinary(20) DEFAULT NULL,`b` int DEFAULT NULL)") + err := tk.ExecToErr(fmt.Sprintf("select * from `tx` into outfile %q fields enclosed by '\"\"'", outfile)) + // enclosed by must be a single character + c.Check(err, NotNil) + c.Assert(strings.Contains(err.Error(), "Field separator argument is not what is expected"), IsTrue, Commentf("err: %v", err)) + err = tk.ExecToErr(fmt.Sprintf("select * from `tx` into outfile %q fields escaped by 'gg'", outfile)) + // so does escaped by + c.Check(err, NotNil) + c.Assert(strings.Contains(err.Error(), "Field separator argument is not what is expected"), IsTrue, Commentf("err: %v", err)) + + // since the above two test cases failed, it should not has outfile remained on disk + _, err = os.Stat(outfile) + c.Check(os.IsNotExist(err), IsTrue, Commentf("err: %v", err)) + + tk.MustExec("insert into tx values (NULL, NULL);\n") + tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q fields escaped by ''", outfile)) + // if escaped by is set as empty, then NULL should not be escaped + cmpAndRm("NULL\tNULL\n", outfile, c) + + tk.MustExec("delete from tx") + tk.MustExec("insert into tx values ('d\",\"e\",', 3), ('\\\\', 2)") + tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n'", outfile)) + // enclosed by character & escaped by characters should be escaped, no matter what + cmpAndRm("\"d\\\",\\\"e\\\",\",\"3\"\n\"\\\\\",\"2\"\n", outfile, c) + + tk.MustExec("delete from tx") + tk.MustExec("insert into tx values ('a\tb', 1)") + tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' escaped by '\t' LINES TERMINATED BY '\\n'", outfile)) + // enclosed by character & escaped by characters should be escaped, no matter what + cmpAndRm("\"a\t\tb\",\"1\"\n", outfile, c) + + tk.MustExec("delete from tx") + tk.MustExec(`insert into tx values ('d","e",', 1)`) + tk.MustExec(`insert into tx values (unhex("00"), 2)`) + tk.MustExec(`insert into tx values ("\r\n\b\Z\t", 3)`) + tk.MustExec(`insert into tx values (null, 4)`) + tk.MustExec(fmt.Sprintf("select * from `tx` into outfile %q FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\\n'", outfile)) + // line terminator will be escaped + cmpAndRm("\"d\\\",\\\"e\\\",\",\"1\"\n"+"\"\\0\",\"2\"\n"+"\"\r\\\n\b\032\t\",\"3\"\n"+"\\N,\"4\"\n", outfile, c) + + tk.MustExec("create table tb (s char(10), b bit(48), bb blob(6))") + tk.MustExec("insert into tb values ('\\0\\b\\n\\r\\t\\Z', _binary '\\0\\b\\n\\r\\t\\Z', unhex('00080A0D091A'))") + tk.MustExec(fmt.Sprintf("select * from tb into outfile %q", outfile)) + // bit type won't be escaped (verified on MySQL) + cmpAndRm("\\0\b\\\n\r\\\t\032\t"+"\000\b\n\r\t\032\t"+"\\0\b\\\n\r\\\t\032\n", outfile, c) + + tk.MustExec("create table zero (a varchar(10), b varchar(10), c varchar(10))") + tk.MustExec("insert into zero values (unhex('00'), _binary '\\0', '\\0')") + tk.MustExec(fmt.Sprintf("select * from zero into outfile %q", outfile)) + // zero will always be escaped + cmpAndRm("\\0\t\\0\t\\0\n", outfile, c) + tk.MustExec(fmt.Sprintf("select * from zero into outfile %q fields enclosed by '\"'", outfile)) + // zero will always be escaped, including when being enclosed + cmpAndRm("\"\\0\"\t\"\\0\"\t\"\\0\"\n", outfile, c) + + tk.MustExec("create table tt (a char(10), b char(10), c char(10))") + tk.MustExec("insert into tt values ('abcd', 'abcd', 'abcd')") + tk.MustExec(fmt.Sprintf("select * from tt into outfile %q fields terminated by 'a-' lines terminated by 'b--'", outfile)) + // when not escaped, the first character of both terminators will be escaped + cmpAndRm("\\a\\bcda-\\a\\bcda-\\a\\bcdb--", outfile, c) + tk.MustExec(fmt.Sprintf("select * from tt into outfile %q fields terminated by 'a-' enclosed by '\"' lines terminated by 'b--'", outfile)) + // when escaped, only line terminator's first character will be escaped + cmpAndRm("\"a\\bcd\"a-\"a\\bcd\"a-\"a\\bcd\"b--", outfile, c) +} + func (s *testSuite1) TestDumpReal(c *C) { cases := []struct { val float64