Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

mydump: support multi bytes csv delimiter and separator #406

Merged
merged 8 commits into from
Sep 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions lightning/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,16 +384,12 @@ func (cfg *Config) LoadFromTOML(data []byte) error {
func (cfg *Config) Adjust() error {
// Reject problematic CSV configurations.
csv := &cfg.Mydumper.CSV
if len(csv.Separator) != 1 {
return errors.New("invalid config: `mydumper.csv.separator` must be exactly one byte long")
if len(csv.Separator) == 0 {
return errors.New("invalid config: `mydumper.csv.separator` must not be empty")
}

if len(csv.Delimiter) > 1 {
return errors.New("invalid config: `mydumper.csv.delimiter` must be one byte long or empty")
}

if csv.Separator == csv.Delimiter {
return errors.New("invalid config: cannot use the same character for both CSV delimiter and separator")
if len(csv.Delimiter) > 0 && (strings.HasPrefix(csv.Separator, csv.Delimiter) || strings.HasPrefix(csv.Delimiter, csv.Separator)) {
return errors.New("invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other")
}

if csv.BackslashEscape {
Expand Down
24 changes: 17 additions & 7 deletions lightning/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,23 @@ func (s *configTestSuite) TestInvalidCSV(c *C) {
[mydumper.csv]
separator = ''
`,
err: "invalid config: `mydumper.csv.separator` must be exactly one byte long",
err: "invalid config: `mydumper.csv.separator` must not be empty",
},
{
input: `
[mydumper.csv]
separator = 'hello'
delimiter = 'hel'
`,
err: "invalid config: `mydumper.csv.separator` must be exactly one byte long",
err: "invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other",
kennytm marked this conversation as resolved.
Show resolved Hide resolved
},
{
input: `
[mydumper.csv]
separator = 'hel'
delimiter = 'hello'
`,
err: "invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other",
},
{
input: `
Expand All @@ -297,7 +306,7 @@ func (s *configTestSuite) TestInvalidCSV(c *C) {
[mydumper.csv]
separator = ','
`,
err: "invalid config: `mydumper.csv.separator` must be exactly one byte long",
err: "",
},
{
input: `
Expand All @@ -311,7 +320,7 @@ func (s *configTestSuite) TestInvalidCSV(c *C) {
[mydumper.csv]
delimiter = 'hello'
`,
err: "invalid config: `mydumper.csv.delimiter` must be one byte long or empty",
err: "",
},
{
input: `
Expand All @@ -324,17 +333,18 @@ func (s *configTestSuite) TestInvalidCSV(c *C) {
{
input: `
[mydumper.csv]
delimiter = '“'
separator = '\s'
delimiter = '\d'
`,
err: "invalid config: `mydumper.csv.delimiter` must be one byte long or empty",
err: "",
},
{
input: `
[mydumper.csv]
separator = '|'
delimiter = '|'
`,
err: "invalid config: cannot use the same character for both CSV delimiter and separator",
err: "invalid config: `mydumper.csv.separator` and `mydumper.csv.delimiter` must not be prefix of each other",
},
{
input: `
Expand Down
2 changes: 1 addition & 1 deletion lightning/lightning_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (s *lightningServerSuite) TestRunServer(c *C) {
c.Assert(data["error"], Matches, "cannot parse task.*")
resp.Body.Close()

resp, err = http.Post(url, "application/toml", strings.NewReader("[mydumper.csv]\nseparator = 'fooo'"))
resp, err = http.Post(url, "application/toml", strings.NewReader("[mydumper.csv]\nseparator = 'fooo'\ndelimiter= 'foo'"))
c.Assert(err, IsNil)
c.Assert(resp.StatusCode, Equals, http.StatusBadRequest)
err = json.NewDecoder(resp.Body).Decode(&data)
Expand Down
32 changes: 10 additions & 22 deletions lightning/mydump/bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,43 +9,31 @@

package mydump

import "unicode/utf8"
// byteSet is a 32-byte value, where each bit represents the presence of a
// given byte value in the set.
type byteSet [8]uint32

// asciiSet is a 32-byte value, where each bit represents the presence of a
// given ASCII character in the set. The 128-bits of the lower 16 bytes,
// starting with the least-significant bit of the lowest word to the
// most-significant bit of the highest word, map to the full range of all
// 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
// ensuring that any non-ASCII character will be reported as not in the set.
type asciiSet [8]uint32

// makeASCIISet creates a set of ASCII characters and reports whether all
// characters in chars are ASCII.
func makeASCIISet(chars string) (as asciiSet, ok bool) {
// makeByteSet creates a set of byte value.
func makeByteSet(chars []byte) (as byteSet) {
for i := 0; i < len(chars); i++ {
c := chars[i]
if c >= utf8.RuneSelf {
return as, false
}
as[c>>5] |= 1 << uint(c&31)
}
return as, true
return as
}

// contains reports whether c is inside the set.
func (as *asciiSet) contains(c byte) bool {
func (as *byteSet) contains(c byte) bool {
return (as[c>>5] & (1 << uint(c&31))) != 0
}

// IndexAnyAscii returns the byte index of the first occurrence in s of any of the Unicode
// code points in chars. It returns -1 if there is no code
// point in common.
func IndexAnyAscii(s []byte, as *asciiSet) int {
// IndexAnyByte returns the byte index of the first occurrence in s of any of the byte
// points in chars. It returns -1 if there is no code point in common.
func IndexAnyByte(s []byte, as *byteSet) int {
for i, c := range s {
if as.contains(c) {
return i
}
}
return -1

}
Loading