Skip to content
This repository has been archived by the owner on Aug 21, 2023. It is now read-only.

support specialized csv separator and delimiter #116

Merged
merged 3 commits into from
Jul 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions cmd/dumpling/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ var (
caPath string
certPath string
keyPath string
csvSeparator string
csvDelimiter string

dumpEmptyDatabase bool
escapeBackslash bool
Expand Down Expand Up @@ -115,6 +117,8 @@ func main() {
pflag.StringVar(&caPath, "ca", "", "The path name to the certificate authority file for TLS connection")
pflag.StringVar(&certPath, "cert", "", "The path name to the client certificate file for TLS connection")
pflag.StringVar(&keyPath, "key", "", "The path name to the client private key file for TLS connection")
pflag.StringVar(&csvSeparator, "csv-separator", ",", "The separator for csv files, default ','")
pflag.StringVar(&csvDelimiter, "csv-delimiter", "\"", "The delimiter for values in csv files, default '\"'")

printVersion := pflag.BoolP("version", "V", false, "Print Dumpling version")

Expand Down Expand Up @@ -185,6 +189,8 @@ func main() {
conf.Security.CertPath = certPath
conf.Security.KeyPath = keyPath
conf.SessionParams["tidb_mem_quota_query"] = tidbMemQuotaQuery
conf.CsvSeparator = csvSeparator
conf.CsvDelimiter = csvDelimiter

err = export.Dump(context.Background(), conf)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions v4/export/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ type Config struct {
NoData bool
CsvNullValue string
Sql string
CsvSeparator string
CsvDelimiter string

TableFilter filter.Filter
Rows uint64
Expand Down
2 changes: 1 addition & 1 deletion v4/export/ir.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ type RowReceiverStringer interface {

type Stringer interface {
WriteToBuffer(*bytes.Buffer, bool)
WriteToBufferInCsv(*bytes.Buffer, bool, string)
WriteToBufferInCsv(*bytes.Buffer, bool, *csvOption)
}

type RowReceiver interface {
Expand Down
46 changes: 23 additions & 23 deletions v4/export/sql_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
var colTypeRowReceiverMap = map[string]func() RowReceiverStringer{}

var nullValue = "NULL"
var quotationMark byte = '\''
var doubleQuotationMark byte = '"'
var quotationMark = []byte{'\''}
var doubleQuotationMark = []byte{'"'}

func init() {
for _, s := range dataTypeString {
Expand Down Expand Up @@ -45,16 +45,16 @@ var dataTypeBin = []string{
"BIT",
}

func getEscapeQuotation(escapeBackSlash bool, escapeQuotation byte) byte {
func getEscapeQuotation(escapeBackSlash bool, escapeQuotation []byte) []byte {
if escapeBackSlash {
return 0
return nil
}
return escapeQuotation
}

func escape(s []byte, bf *bytes.Buffer, escapeQuotation byte) {
if escapeQuotation != 0 {
bf.Write(bytes.ReplaceAll(s, []byte{escapeQuotation}, []byte{escapeQuotation, escapeQuotation}))
func escape(s []byte, bf *bytes.Buffer, escapeQuotation []byte) {
if len(escapeQuotation) > 0 {
bf.Write(bytes.ReplaceAll(s, escapeQuotation, append(escapeQuotation, escapeQuotation...)))
return
}

Expand Down Expand Up @@ -153,11 +153,11 @@ func (r RowReceiverArr) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) {
bf.WriteByte(')')
}

func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) {
func (r RowReceiverArr) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) {
for i, receiver := range r {
receiver.WriteToBufferInCsv(bf, escapeBackslash, csvNullValue)
receiver.WriteToBufferInCsv(bf, escapeBackslash, opt)
if i != len(r)-1 {
bf.WriteByte(',')
bf.Write(opt.separator)
}
}
}
Expand All @@ -174,11 +174,11 @@ func (s SQLTypeNumber) WriteToBuffer(bf *bytes.Buffer, _ bool) {
}
}

func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) {
func (s SQLTypeNumber) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) {
if s.RawBytes != nil {
bf.Write(s.RawBytes)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}

Expand All @@ -198,21 +198,21 @@ func (s *SQLTypeString) ReportSize() uint64 {

func (s *SQLTypeString) WriteToBuffer(bf *bytes.Buffer, escapeBackslash bool) {
if s.RawBytes != nil {
bf.WriteByte(quotationMark)
bf.Write(quotationMark)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, quotationMark))
bf.WriteByte(quotationMark)
bf.Write(quotationMark)
} else {
bf.WriteString(nullValue)
}
}

func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, csvNullValue string) {
func (s *SQLTypeString) WriteToBufferInCsv(bf *bytes.Buffer, escapeBackslash bool, opt *csvOption) {
if s.RawBytes != nil {
bf.WriteByte(doubleQuotationMark)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, doubleQuotationMark))
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
escape(s.RawBytes, bf, getEscapeQuotation(escapeBackslash, opt.delimiter))
bf.Write(opt.delimiter)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}

Expand All @@ -231,12 +231,12 @@ func (s *SQLTypeBytes) WriteToBuffer(bf *bytes.Buffer, _ bool) {
fmt.Fprintf(bf, "x'%x'", s.RawBytes)
}

func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, csvNullValue string) {
func (s *SQLTypeBytes) WriteToBufferInCsv(bf *bytes.Buffer, _ bool, opt *csvOption) {
if s.RawBytes != nil {
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
bf.Write(s.RawBytes)
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
} else {
bf.WriteString(csvNullValue)
bf.WriteString(opt.nullValue)
}
}
14 changes: 1 addition & 13 deletions v4/export/test_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func (m *mockTableIR) EscapeBackSlash() bool {
return m.escapeBackSlash
}

func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) TableDataIR {
func newMockTableIR(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string) *mockTableIR {
return &mockTableIR{
dbName: databaseName,
tblName: tableName,
Expand All @@ -158,15 +158,3 @@ func newMockTableIR(databaseName, tableName string, data [][]driver.Value, speci
colTypes: colTypes,
}
}

func newMockTableIRWithError(databaseName, tableName string, data [][]driver.Value, specialComments, colTypes []string, err error) TableDataIR {
return &mockTableIR{
dbName: databaseName,
tblName: tableName,
data: data,
specCmt: specialComments,
selectedField: "*",
colTypes: colTypes,
rowErr: err,
}
}
14 changes: 13 additions & 1 deletion v4/export/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,12 @@ type outputFileNamer struct {
tableName string
}

type csvOption struct {
nullValue string
separator []byte
delimiter []byte
}

func newOutputFileNamer(ir TableDataIR) *outputFileNamer {
return &outputFileNamer{
chunkIndex: ir.ChunkIndex(),
Expand All @@ -142,10 +148,16 @@ func (f *CsvWriter) WriteTableData(ctx context.Context, ir TableDataIR) error {
chunksIter := buildChunksIter(ir, f.cfg.FileSize, f.cfg.StatementSize)
defer chunksIter.Rows().Close()

opt := &csvOption{
nullValue: f.cfg.CsvNullValue,
separator: []byte(f.cfg.CsvSeparator),
delimiter: []byte(f.cfg.CsvDelimiter),
}

for {
filePath := path.Join(f.cfg.OutputDirPath, fileName)
fileWriter, tearDown := buildInterceptFileWriter(filePath)
err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, f.cfg.CsvNullValue)
err := WriteInsertInCsv(ctx, chunksIter, fileWriter, f.cfg.NoHeader, opt)
tearDown()
if err != nil {
return err
Expand Down
12 changes: 6 additions & 6 deletions v4/export/writer_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func WriteInsert(pCtx context.Context, tblIR TableDataIR, w io.Writer) error {
return wp.Error()
}

func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, csvNullValue string) error {
func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHeader bool, opt *csvOption) error {
fileRowIter := tblIR.Rows()
if !fileRowIter.HasNext() {
return nil
Expand Down Expand Up @@ -223,11 +223,11 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe

if !noHeader && len(tblIR.ColumnNames()) != 0 {
for i, col := range tblIR.ColumnNames() {
bf.WriteByte(doubleQuotationMark)
escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, doubleQuotationMark))
bf.WriteByte(doubleQuotationMark)
bf.Write(opt.delimiter)
escape([]byte(col), bf, getEscapeQuotation(escapeBackSlash, opt.delimiter))
bf.Write(opt.delimiter)
if i != len(tblIR.ColumnTypes())-1 {
bf.WriteByte(',')
bf.Write(opt.separator)
}
}
bf.WriteByte('\n')
Expand All @@ -241,7 +241,7 @@ func WriteInsertInCsv(pCtx context.Context, tblIR TableDataIR, w io.Writer, noHe
return err
}

row.WriteToBufferInCsv(bf, escapeBackSlash, csvNullValue)
row.WriteToBufferInCsv(bf, escapeBackSlash, opt)
counter += 1

if bf.Len() >= lengthLimit {
Expand Down
43 changes: 41 additions & 2 deletions v4/export/writer_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func (s *testUtilSuite) TestWriteInsertReturnsError(c *C) {
}
// row errors at last line
rowErr := errors.New("mock row error")
tableIR := newMockTableIRWithError("test", "employee", data, specCmts, colTypes, rowErr)
tableIR := newMockTableIR("test", "employee", data, specCmts, colTypes)
tableIR.rowErr = rowErr
bf := &bytes.Buffer{}

err := WriteInsert(context.Background(), tableIR, bf)
Expand All @@ -112,13 +113,51 @@ func (s *testUtilSuite) TestWriteInsertInCsv(c *C) {
tableIR := newMockTableIR("test", "employee", data, nil, colTypes)
bf := &bytes.Buffer{}

err := WriteInsertInCsv(context.Background(), tableIR, bf, true, "\\N")
// test nullValue
opt := &csvOption{separator: []byte(","), delimiter: doubleQuotationMark, nullValue: "\\N"}
err := WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected := "1,\"male\",\"bob@mail.com\",\"020-1234\",\\N\n" +
"2,\"female\",\"sarah@mail.com\",\"020-1253\",\"healthy\"\n" +
"3,\"male\",\"john@mail.com\",\"020-1256\",\"healthy\"\n" +
"4,\"female\",\"sarah@mail.com\",\"020-1235\",\"healthy\"\n"
c.Assert(bf.String(), Equals, expected)

// test delimiter
bf.Reset()
opt.delimiter = quotationMark
err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected = "1,'male','bob@mail.com','020-1234',\\N\n" +
"2,'female','sarah@mail.com','020-1253','healthy'\n" +
"3,'male','john@mail.com','020-1256','healthy'\n" +
"4,'female','sarah@mail.com','020-1235','healthy'\n"
c.Assert(bf.String(), Equals, expected)

// test separator
bf.Reset()
opt.separator = []byte(";")
err = WriteInsertInCsv(context.Background(), tableIR, bf, true, opt)
c.Assert(err, IsNil)
expected = "1;'male';'bob@mail.com';'020-1234';\\N\n" +
"2;'female';'sarah@mail.com';'020-1253';'healthy'\n" +
"3;'male';'john@mail.com';'020-1256';'healthy'\n" +
"4;'female';'sarah@mail.com';'020-1235';'healthy'\n"
c.Assert(bf.String(), Equals, expected)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please include a test case where separator has multiple characters and delimiter is "ma" (male should become mamamalema, femail should become mafemamalema, xxx@mail.com should become maxxx@mamail.comma).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

addressed in 7eb9742


// test delimiter that included in values
bf.Reset()
opt.separator = []byte("&;,?")
opt.delimiter = []byte("ma")
tableIR.colNames = []string{"id", "gender", "email", "phone_number", "status"}
err = WriteInsertInCsv(context.Background(), tableIR, bf, false, opt)
c.Assert(err, IsNil)
expected = "maidma&;,?magenderma&;,?maemamailma&;,?maphone_numberma&;,?mastatusma\n" +
"1&;,?mamamalema&;,?mabob@mamail.comma&;,?ma020-1234ma&;,?\\N\n" +
"2&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1253ma&;,?mahealthyma\n" +
"3&;,?mamamalema&;,?majohn@mamail.comma&;,?ma020-1256ma&;,?mahealthyma\n" +
"4&;,?mafemamalema&;,?masarah@mamail.comma&;,?ma020-1235ma&;,?mahealthyma\n"
c.Assert(bf.String(), Equals, expected)
}

func (s *testUtilSuite) TestSQLDataTypes(c *C) {
Expand Down