Skip to content

Commit

Permalink
[Filebeat] Fix panic in decode_cef when recovering from invalid data (#…
Browse files Browse the repository at this point in the history
…30038)

* Fix panic in decode_cef when recovering from invalid data

When recovering from an invalid extension value the escape sequence state was
not cleared. This caused the parser to attempt to unescape the next extension which
resulted in invalid data or a panic.

Fixes #30010

* Encapsulate non-ragel state

Document and encapsulate the non-ragel state variables.

```
$ benchcmp before.txt after.txt
benchmark                   old ns/op     new ns/op     delta
BenchmarkEventUnpack-12     1991          1544          -22.45%

benchmark                   old allocs     new allocs     delta
BenchmarkEventUnpack-12     13             13             +0.00%

benchmark                   old bytes     new bytes     delta
BenchmarkEventUnpack-12     642           642           +0.00%
```

(cherry picked from commit 47b8d02)
  • Loading branch information
andrewkroh authored and mergify-bot committed Jan 27, 2022
1 parent 7be5c87 commit 4134cc0
Show file tree
Hide file tree
Showing 5 changed files with 219 additions and 173 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ https://github.com/elastic/beats/compare/v7.0.0-alpha2...master[Check the HEAD d
- ibmmq: Fixed `@timestamp` not being populated with correct values. {pull}29773[29773]
- Fix using log_group_name_prefix in aws-cloudwatch input. {pull}29695[29695]
- aws-s3: Improve gzip detection to avoid false negatives. {issue}29968[29968]
- decode_cef: Fix panic when recovering from invalid CEF extensions that contain escape characters. {issue}30010[30010]

*Heartbeat*

Expand Down
23 changes: 14 additions & 9 deletions x-pack/filebeat/processors/decode_cef/cef/cef.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,28 +152,31 @@ func (e *Event) Unpack(data string, opts ...Option) error {
return multierr.Combine(errs...)
}

type escapePosition struct {
start, end int
}

// replaceEscapes replaces the escaped characters contained in v with their
// unescaped value.
func replaceEscapes(v string, startOffset int, escapes []int) string {
func replaceEscapes(v string, startOffset int, escapes []escapePosition) string {
if len(escapes) == 0 {
return v
}

// Adjust escape offsets relative to the start offset of v.
for i := 0; i < len(escapes); i++ {
escapes[i] = escapes[i] - startOffset
escapes[i].start = escapes[i].start - startOffset
escapes[i].end = escapes[i].end - startOffset
}

var buf strings.Builder
var end int
var prevEnd int

// Iterate over escapes and replace them.
for i := 0; i < len(escapes); i += 2 {
start := escapes[i]
buf.WriteString(v[end:start])
for _, escape := range escapes {
buf.WriteString(v[prevEnd:escape.start])

end = escapes[i+1]
value := v[start:end]
value := v[escape.start:escape.end]

switch value {
case `\n`:
Expand All @@ -186,8 +189,10 @@ func replaceEscapes(v string, startOffset int, escapes []int) string {
buf.WriteString(value[1:])
}
}

prevEnd = escape.end
}
buf.WriteString(v[end:])
buf.WriteString(v[prevEnd:])

return buf.String()
}
72 changes: 43 additions & 29 deletions x-pack/filebeat/processors/decode_cef/cef/cef.rl
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,31 @@ import (
variable pe pe;
}%%

type cefState struct {
key string // Extension key.
valueStart int // Start index of extension value.
valueEnd int // End index of extension value.
escapes []escapePosition // Array of escapes indices within the current value.
}

func (s *cefState) reset() {
s.key = ""
s.valueStart = 0
s.valueEnd = 0
s.escapes = s.escapes[:0]
}

func (s *cefState) pushEscape(start, end int) {
s.escapes = append(s.escapes, escapePosition{start, end})
}

// unpack unpacks a CEF message.
func (e *Event) unpack(data string) error {
cs, p, pe, eof := 0, 0, len(data), len(data)
mark, mark_slash := 0, 0
var escapes []int

// Extension key.
var extKey string

// Extension value start and end indices.
extValueStart, extValueEnd := 0, 0
// state related to CEF values.
var state cefState

// recoveredErrs are problems with the message that the parser was able to
// recover from (though the parsing might not be "correct").
Expand All @@ -42,62 +56,62 @@ func (e *Event) unpack(data string) error {
mark_slash = p
}
action mark_escape {
escapes = append(escapes, mark_slash, p)
state.pushEscape(mark_slash, p)
}
action version {
e.Version, _ = strconv.Atoi(data[mark:p])
}
action device_vendor {
e.DeviceVendor = replaceEscapes(data[mark:p], mark, escapes)
escapes = escapes[:0]
e.DeviceVendor = replaceEscapes(data[mark:p], mark, state.escapes)
state.reset()
}
action device_product {
e.DeviceProduct = replaceEscapes(data[mark:p], mark, escapes)
escapes = escapes[:0]
e.DeviceProduct = replaceEscapes(data[mark:p], mark, state.escapes)
state.reset()
}
action device_version {
e.DeviceVersion = replaceEscapes(data[mark:p], mark, escapes)
escapes = escapes[:0]
e.DeviceVersion = replaceEscapes(data[mark:p], mark, state.escapes)
state.reset()
}
action device_event_class_id {
e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, escapes)
escapes = escapes[:0]
e.DeviceEventClassID = replaceEscapes(data[mark:p], mark, state.escapes)
state.reset()
}
action name {
e.Name = replaceEscapes(data[mark:p], mark, escapes)
escapes = escapes[:0]
e.Name = replaceEscapes(data[mark:p], mark, state.escapes)
state.reset()
}
action severity {
e.Severity = data[mark:p]
}
action extension_key {
// A new extension key marks the end of the last extension value.
if len(extKey) > 0 && extValueStart <= mark - 1 {
e.pushExtension(extKey, replaceEscapes(data[extValueStart:mark-1], extValueStart, escapes))
extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0]
if len(state.key) > 0 && state.valueStart <= mark - 1 {
e.pushExtension(state.key, replaceEscapes(data[state.valueStart:mark-1], state.valueStart, state.escapes))
state.reset()
}
extKey = data[mark:p]
state.key = data[mark:p]
}
action extension_value_start {
extValueStart = p;
extValueEnd = p
state.valueStart = p;
state.valueEnd = p
}
action extension_value_mark {
extValueEnd = p+1
state.valueEnd = p+1
}
action extension_eof {
// Reaching the EOF marks the end of the final extension value.
if len(extKey) > 0 && extValueStart <= extValueEnd {
e.pushExtension(extKey, replaceEscapes(data[extValueStart:extValueEnd], extValueStart, escapes))
extKey, extValueStart, extValueEnd, escapes = "", 0, 0, escapes[:0]
if len(state.key) > 0 && state.valueStart <= state.valueEnd {
e.pushExtension(state.key, replaceEscapes(data[state.valueStart:state.valueEnd], state.valueStart, state.escapes))
state.reset()
}
}
action extension_err {
recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", extKey, p+1))
recoveredErrs = append(recoveredErrs, fmt.Errorf("malformed value for %s at pos %d", state.key, p+1))
fhold; fnext gobble_extension;
}
action recover_next_extension {
extKey, extValueStart, extValueEnd = "", 0, 0
state.reset()
// Resume processing at p, the start of the next extension key.
p = mark;
fnext extensions;
Expand Down
12 changes: 12 additions & 0 deletions x-pack/filebeat/processors/decode_cef/cef/cef_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,18 @@ func TestEventUnpack(t *testing.T) {
"msg": StringField("Newlines in messages\nare allowed.\r\nAnd so are carriage feeds\\newlines\\=."),
}, e.Extensions)
})

t.Run("error recovery with escape", func(t *testing.T) {
// Ensure no panic or regression of https://github.com/elastic/beats/issues/30010.
// key1 contains an escape, but then an invalid non-escaped =.
// This triggers the error recovery to try to read the next key.
var e Event
err := e.Unpack(`CEF:0|||||||key1=\\hi= key2=a`)
assert.Error(t, err)
assert.Equal(t, map[string]*Field{
"key2": UndocumentedField("a"),
}, e.Extensions)
})
}

func TestEventUnpackWithFullExtensionNames(t *testing.T) {
Expand Down
Loading

0 comments on commit 4134cc0

Please sign in to comment.