Skip to content

Commit

Permalink
WIP on fix for #5407 #5231 #4865
Browse files Browse the repository at this point in the history
  • Loading branch information
e-dard committed Mar 16, 2016
1 parent 3645485 commit 68f96a3
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 25 deletions.
97 changes: 74 additions & 23 deletions models/points.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@ var (
' ': []byte(`\ `),
}

tagEscapeCodes = map[byte][]byte{
',': []byte(`\,`),
' ': []byte(`\ `),
'=': []byte(`\=`),
}

ErrPointMustHaveAField = errors.New("point without fields is unsupported")
ErrInvalidNumber = errors.New("invalid number")
)
Expand Down Expand Up @@ -435,24 +429,42 @@ func scanTagsKey(buf []byte, i int) (int, error) {
return i, fmt.Errorf("missing tag key")
}

var escaped bool
if buf[i] == '\\' {
escaped = true
}

// Examine each character in the tag key until we hit an unescaped
// equals (the tag value), or we hit an error (i.e., unescaped
// space or comma).
for {
i++

// Either we reached the end of the buffer or we hit an
// unescaped comma or space.
if i >= len(buf) ||
((buf[i] == ' ' || buf[i] == ',') && buf[i-1] != '\\') {
// cpu,tag{'', ' ', ','}
// End of buffer
if i >= len(buf) {
return i, fmt.Errorf("missing tag value")
}

if buf[i] == '\\' && !escaped {
escaped = true
continue
}

// Have we hit an unescaped comma or space?
if (buf[i] == ' ' || buf[i] == ',') && !escaped {
// cpu,tag{' ', ','}
return i, fmt.Errorf("missing tag value")
}

if buf[i] == '=' && buf[i-1] != '\\' {
// Found the tag value.
if buf[i] == '=' && !escaped {
// cpu,tag=
return i + 1, nil
}

if escaped {
escaped = false
}
}
}

Expand All @@ -464,32 +476,49 @@ func scanTagsValue(buf []byte, i int) (int, int, error) {
return -1, i, fmt.Errorf("missing tag value")
}

var escaped bool
if buf[i] == '\\' {
escaped = true
}

// Examine each character in the tag value until we hit an unescaped
// comma (move onto next tag key), an unescaped space (move onto
// fields), or we error out.
for {
i++

// Hit end of buffer.
if i >= len(buf) {
// cpu,tag=value
return -1, i, fmt.Errorf("missing fields")
}

if buf[i] == '\\' && !escaped {
escaped = true
continue
}

// An unescaped equals sign is an invalid tag value.
if buf[i] == '=' && buf[i-1] != '\\' {
if buf[i] == '=' && !escaped {
// cpu,tag={'=', 'fo=o'}
return -1, i, fmt.Errorf("invalid tag format")
}

if buf[i] == ',' && buf[i-1] != '\\' {
// Move onto next tag.
if buf[i] == ',' && !escaped {
// cpu,tag=foo,
return tagKeyState, i + 1, nil
}

// cpu,tag=foo value=1.0
// cpu, tag=foo\= value=1.0
if buf[i] == ' ' && buf[i-1] != '\\' {
if buf[i] == ' ' && !escaped {
return fieldsState, i, nil
}

if escaped {
escaped = false
}
}
}

Expand Down Expand Up @@ -1003,21 +1032,43 @@ func unescapeMeasurement(in []byte) []byte {
}

func escapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
if bytes.Contains(in, []byte{b}) {
in = bytes.Replace(in, []byte{b}, esc, -1)
if bytes.IndexAny(in, `\,= `) == -1 {
return in
}

var out []byte
for i := 0; i < len(in); i++ {
if in[i] == '\\' || in[i] == ',' || in[i] == '=' || in[i] == ' ' {
out = append(out, '\\')
}
out = append(out, in[i])
}
return in
return out
}

func unescapeTag(in []byte) []byte {
for b, esc := range tagEscapeCodes {
if bytes.Contains(in, []byte{b}) {
in = bytes.Replace(in, esc, []byte{b}, -1)
// out := in[:0] // re-use backing array of in
var (
out []byte
escaped bool
)

for i := 0; i < len(in); i++ {
if i == len(in)-1 {
out = append(out, in[i]) // last byte
return out
}

if !escaped && in[i] == '\\' && (in[i+1] == '\\' || in[i+1] == ',' || in[i+1] == '=' || in[i+1] == ' ') {
// don't append the backslash and turn on escaping.
escaped = true
continue
}

out = append(out, in[i])
escaped = false
}
return in
return out
}

// escapeStringField returns a copy of in with any double quotes or
Expand Down
13 changes: 11 additions & 2 deletions models/points_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,15 @@ func BenchmarkParsePointsTagsUnSorted10(b *testing.B) {
}
}

func BenchmarkParsePointsTagsEscape(b *testing.B) {
line := `cp\,u,region=us-west,ho\ st=serverA,env=prod,targe\\t=servers,zone=1c\\\=,tag1=value1,tag2=value2,tag3=value3,tag4=value4,ta\\\\g5=value5 value=1i 1000000000`
for i := 0; i < b.N; i++ {
pt, _ := models.ParsePoints([]byte(line))
b.SetBytes(int64(len(line)))
pt[0].Key()
}
}

// TestPoint wraps a models.Point but also makes available the raw
// arguments to the Point.
//
Expand Down Expand Up @@ -725,7 +734,7 @@ func TestParsePointUnescapeTags(t *testing.T) {
},
time.Unix(0, 0)))

// backslash literal in tag value
// escaped backslash literal in tag value
test(t, `cpu,regions=eas\\t value=1.0`,
NewTestPoint(
"cpu",
Expand All @@ -737,7 +746,7 @@ func TestParsePointUnescapeTags(t *testing.T) {
},
time.Unix(0, 0)))

// backslash literal in tag name
// escaped backslash in tag name
test(t, `cpu,regio\\ns=east value=1.0`,
NewTestPoint(
"cpu",
Expand Down

0 comments on commit 68f96a3

Please sign in to comment.