From 1f3b14fd1c860e0e30550b95eed8d38293ccde03 Mon Sep 17 00:00:00 2001 From: urso Date: Wed, 25 Jan 2017 17:26:18 +0100 Subject: [PATCH 1/6] Use matchers in processor conditionals --- libbeat/common/match/matcher.go | 43 ++++++ libbeat/processors/condition.go | 214 ++++++++++++++------------- libbeat/processors/condition_test.go | 55 +++++++ libbeat/processors/config.go | 1 + 4 files changed, 212 insertions(+), 101 deletions(-) diff --git a/libbeat/common/match/matcher.go b/libbeat/common/match/matcher.go index b848e1981350..c9577a2bc167 100644 --- a/libbeat/common/match/matcher.go +++ b/libbeat/common/match/matcher.go @@ -37,6 +37,15 @@ func MustCompileExact(pattern string) ExactMatcher { return m } +// CompileString matches a substring only, the input is not interpreted as +// regular expression +func CompileString(in string) (Matcher, error) { + if in == "" { + return Matcher{(*emptyStringMatcher)(nil)}, nil + } + return Matcher{&substringMatcher{in, []byte(in)}}, nil +} + // Compile regular expression to string matcher. String matcher by default uses // regular expressions as provided by regexp library, but tries to optimize some // common cases, replacing expensive patterns with cheaper custom implementations @@ -93,6 +102,22 @@ func (m *Matcher) Unpack(s string) error { return nil } +func (m *Matcher) MatchAnyString(strs []string) bool { + return matchAnyStrings(m.stringMatcher, strs) +} + +func (m *Matcher) MatchAllStrings(strs []string) bool { + return matchAllStrings(m.stringMatcher, strs) +} + +func (m *ExactMatcher) MatchAnyString(strs []string) bool { + return matchAnyStrings(m.stringMatcher, strs) +} + +func (m *ExactMatcher) MatchAllStrings(strs []string) bool { + return matchAllStrings(m.stringMatcher, strs) +} + func (m *ExactMatcher) Unpack(s string) error { tmp, err := CompileExact(s) if err != nil { @@ -102,3 +127,21 @@ func (m *ExactMatcher) Unpack(s string) error { *m = tmp return nil } + +func matchAnyStrings(m stringMatcher, strs []string) bool { + for _, s := range strs { + if m.MatchString(s) { + return true + } + } + return false +} + +func matchAllStrings(m stringMatcher, strs []string) bool { + for _, s := range strs { + if !m.MatchString(s) { + return false + } + } + return true +} diff --git a/libbeat/processors/condition.go b/libbeat/processors/condition.go index a500c7276626..4e26fd1c5ccb 100644 --- a/libbeat/processors/condition.go +++ b/libbeat/processors/condition.go @@ -1,13 +1,14 @@ package processors import ( + "errors" "fmt" "reflect" - "regexp" "strconv" "strings" "github.com/elastic/beats/libbeat/common" + "github.com/elastic/beats/libbeat/common/match" "github.com/elastic/beats/libbeat/logp" ) @@ -25,8 +26,9 @@ type EqualsValue struct { type Condition struct { equals map[string]EqualsValue - contains map[string]string - regexp map[string]*regexp.Regexp + matches map[string]match.Matcher + contains map[string]match.Matcher + regexp map[string]match.Matcher rangexp map[string]RangeValue or []Condition and []Condition @@ -52,7 +54,6 @@ func NewConditional( } func NewCondition(config *ConditionConfig) (*Condition, error) { - c := Condition{} if config == nil { @@ -60,52 +61,48 @@ func NewCondition(config *ConditionConfig) (*Condition, error) { return nil, nil } - if config.Equals != nil { - if err := c.setEquals(config.Equals); err != nil { - return nil, err - } - } else if config.Contains != nil { - if err := c.setContains(config.Contains); err != nil { - return nil, err - } - } else if config.Regexp != nil { - if err := c.setRegexp(config.Regexp); err != nil { - return nil, err - } - } else if config.Range != nil { - if err := c.setRange(config.Range); err != nil { - return nil, err - } - } else if len(config.OR) > 0 { - for _, condConfig := range config.OR { - cond, err := NewCondition(&condConfig) - if err != nil { - return nil, err - } - c.or = append(c.or, *cond) - } - } else if len(config.AND) > 0 { - for _, condConfig := range config.AND { - cond, err := NewCondition(&condConfig) - if err != nil { - return nil, err - } - c.and = append(c.and, *cond) - } - } else if config.NOT != nil { - cond, err := NewCondition(config.NOT) - if err != nil { - return nil, err - } - c.not = cond - } else { - return nil, fmt.Errorf("missing condition") + var err error + switch { + case config.Equals != nil: + err = c.setEquals(config.Equals) + case config.Contains != nil: + err = c.setContains(config.Contains) + case config.Regexp != nil: + err = c.setRegexp(config.Regexp) + case config.Match != nil: + err = c.setMatches(config.Match) + case config.Range != nil: + err = c.setRange(config.Range) + case len(config.OR) > 0: + c.or, err = NewConditionList(config.OR) + case len(config.AND) > 0: + c.and, err = NewConditionList(config.AND) + case config.NOT != nil: + c.not, err = NewCondition(config.NOT) + default: + err = errors.New("missing condition") + } + if err != nil { + return nil, err } logp.Debug("processors", "New condition %s", c) return &c, nil } +func NewConditionList(config []ConditionConfig) ([]Condition, error) { + out := make([]Condition, len(config)) + for i, condConfig := range config { + cond, err := NewCondition(&condConfig) + if err != nil { + return nil, err + } + + out[i] = *cond + } + return out, nil +} + func (c *Condition) setEquals(cfg *ConditionFields) error { c.equals = map[string]EqualsValue{} @@ -128,12 +125,18 @@ func (c *Condition) setEquals(cfg *ConditionFields) error { func (c *Condition) setContains(cfg *ConditionFields) error { - c.contains = map[string]string{} + if c.matches == nil { + c.matches = map[string]match.Matcher{} + } for field, value := range cfg.fields { switch v := value.(type) { case string: - c.contains[field] = v + m, err := match.CompileString(v) + if err != nil { + return err + } + c.matches[field] = m default: return fmt.Errorf("unexpected type %T of %v", value, value) } @@ -142,24 +145,37 @@ func (c *Condition) setContains(cfg *ConditionFields) error { return nil } -func (c *Condition) setRegexp(cfg *ConditionFields) error { +func (c *Condition) setRegexp(cfg *ConditionFields) (err error) { + c.regexp, err = compileMatches(cfg.fields) + return +} - var err error +func (c *Condition) setMatches(cfg *ConditionFields) (err error) { + c.matches, err = compileMatches(cfg.fields) + return +} + +func compileMatches(fields map[string]interface{}) (map[string]match.Matcher, error) { + if len(fields) == 0 { + return nil, nil + } + + out := map[string]match.Matcher{} + for field, value := range fields { + var err error - c.regexp = map[string]*regexp.Regexp{} - for field, value := range cfg.fields { switch v := value.(type) { case string: - c.regexp[field], err = regexp.Compile(v) + out[field], err = match.Compile(v) if err != nil { - return err + return nil, err } default: - return fmt.Errorf("unexpected type %T of %v", value, value) + return nil, fmt.Errorf("unexpected type %T of %v", value, value) } } - return nil + return out, nil } func (c *Condition) setRange(cfg *ConditionFields) error { @@ -222,20 +238,11 @@ func (c *Condition) Check(event common.MapStr) bool { return c.checkNOT(event) } - if !c.checkEquals(event) { - return false - } - if !c.checkContains(event) { - return false - } - if !c.checkRegexp(event) { - return false - } - if !c.checkRange(event) { - return false - } - - return true + return c.checkEquals(event) && + c.checkContains(event) && + c.checkRegexp(event) && + c.checkMatches(event) && + c.checkRange(event) } func (c *Condition) checkEquals(event common.MapStr) bool { @@ -269,55 +276,57 @@ func (c *Condition) checkEquals(event common.MapStr) bool { } func (c *Condition) checkContains(event common.MapStr) bool { -outer: - for field, equalValue := range c.contains { + return checkMatchers("contains", c.contains, event) +} + +func (c *Condition) checkRegexp(event common.MapStr) bool { + return checkMatchers("regexp", c.regexp, event) +} + +func (c *Condition) checkMatches(event common.MapStr) bool { + return checkMatchers("match", c.matches, event) +} + +func checkMatchers( + typ string, + matchers map[string]match.Matcher, + event common.MapStr, +) bool { + if matchers == nil { + return true + } + + for field, matcher := range matchers { value, err := event.GetValue(field) if err != nil { return false } - switch value.(type) { + switch v := value.(type) { case string: - if !strings.Contains(value.(string), equalValue) { + if !matcher.MatchString(v) { return false } + case []string: - for _, s := range value.([]string) { - if strings.Contains(s, equalValue) { - continue outer - } + if !matcher.MatchAnyString(v) { + return false } - return false - default: - logp.Warn("unexpected type %T in contains condition as it accepts only strings.", value) - return false - } - } - - return true -} - -func (c *Condition) checkRegexp(event common.MapStr) bool { - - for field, equalValue := range c.regexp { - value, err := event.GetValue(field) - if err != nil { - return false - } + default: + str, err := extractString(value) + if err != nil { + logp.Warn("unexpected type %T in %v condition as it accepts only strings.", value, typ) + return false + } - sValue, err := extractString(value) - if err != nil { - logp.Warn("unexpected type %T in regexp condition as it accepts only strings. ", value) - return false - } - if !equalValue.MatchString(sValue) { - return false + if !matcher.MatchString(str) { + return false + } } } return true - } func (c *Condition) checkRange(event common.MapStr) bool { @@ -426,6 +435,9 @@ func (c Condition) String() string { if len(c.regexp) > 0 { s = s + fmt.Sprintf("regexp: %v", c.regexp) } + if len(c.matches) > 0 { + s = s + fmt.Sprintf("match: %v", c.matches) + } if len(c.rangexp) > 0 { s = s + fmt.Sprintf("range: %v", c.rangexp) } diff --git a/libbeat/processors/condition_test.go b/libbeat/processors/condition_test.go index a643c489a844..083019946332 100644 --- a/libbeat/processors/condition_test.go +++ b/libbeat/processors/condition_test.go @@ -236,6 +236,61 @@ func TestRegexpCondition(t *testing.T) { assert.False(t, conds[2].Check(event1)) } +func TestMatchCondition(t *testing.T) { + + if testing.Verbose() { + logp.LogInit(logp.LOG_DEBUG, "", false, true, []string{"*"}) + } + + configs := []ConditionConfig{ + { + Match: &ConditionFields{fields: map[string]interface{}{ + "source": "apache2/error.*", + }}, + }, + + { + Match: &ConditionFields{fields: map[string]interface{}{ + "source": "apache2/access.*", + }}, + }, + + { + Match: &ConditionFields{fields: map[string]interface{}{ + "source": "apache2/error.*", + "message": "[client 1.2.3.4]", + }}, + }, + } + + conds := GetConditions(t, configs) + + event := common.MapStr{ + "@timestamp": "2016-04-14T20:41:06.258Z", + "message": `[Fri Dec 16 01:46:23 2005] [error] [client 1.2.3.4] Directory index forbidden by rule: /home/test/`, + "source": "/var/log/apache2/error.log", + "type": "log", + "input_type": "log", + "offset": 30, + } + + event1 := common.MapStr{ + "@timestamp": "2016-04-14T20:41:06.258Z", + "message": `127.0.0.1 - - [28/Jul/2006:10:27:32 -0300] "GET /hidden/ HTTP/1.0" 404 7218`, + "source": "/var/log/apache2/access.log", + "type": "log", + "input_type": "log", + "offset": 30, + } + + assert.True(t, conds[0].Check(event)) + assert.False(t, conds[1].Check(event)) + assert.True(t, conds[2].Check(event)) + + assert.True(t, conds[1].Check(event1)) + assert.False(t, conds[2].Check(event1)) +} + func TestRangeCondition(t *testing.T) { if testing.Verbose() { diff --git a/libbeat/processors/config.go b/libbeat/processors/config.go index 721d23c5696c..987c9d9ffac2 100644 --- a/libbeat/processors/config.go +++ b/libbeat/processors/config.go @@ -12,6 +12,7 @@ type ConditionConfig struct { Equals *ConditionFields `config:"equals"` Contains *ConditionFields `config:"contains"` Regexp *ConditionFields `config:"regexp"` + Match *ConditionFields `config:"match"` Range *ConditionFields `config:"range"` OR []ConditionConfig `config:"or"` AND []ConditionConfig `config:"and"` From 893c0c1100a86eb7f5e56c0fa86c65bbb5245869 Mon Sep 17 00:00:00 2001 From: urso Date: Wed, 25 Jan 2017 17:38:43 +0100 Subject: [PATCH 2/6] filebeat include/exclude lines/files using match.Matcher --- filebeat/harvester/config.go | 6 ++--- filebeat/harvester/log.go | 4 ++-- filebeat/harvester/log_test.go | 22 +++++++---------- filebeat/harvester/util.go | 12 ++++------ filebeat/harvester/util_test.go | 23 +++++++----------- filebeat/prospector/config.go | 24 +++++++++---------- filebeat/prospector/prospector_log.go | 2 +- .../prospector/prospector_log_other_test.go | 8 +++---- .../prospector/prospector_log_windows_test.go | 6 ++--- filebeat/prospector/prospector_test.go | 8 +++---- 10 files changed, 51 insertions(+), 64 deletions(-) diff --git a/filebeat/harvester/config.go b/filebeat/harvester/config.go index d1baaf9a6385..c4c8068fe48c 100644 --- a/filebeat/harvester/config.go +++ b/filebeat/harvester/config.go @@ -2,12 +2,12 @@ package harvester import ( "fmt" - "regexp" "time" cfg "github.com/elastic/beats/filebeat/config" "github.com/elastic/beats/filebeat/harvester/reader" "github.com/elastic/beats/libbeat/common" + "github.com/elastic/beats/libbeat/common/match" "github.com/dustin/go-humanize" "github.com/elastic/beats/libbeat/logp" @@ -47,8 +47,8 @@ type harvesterConfig struct { CloseEOF bool `config:"close_eof"` CloseTimeout time.Duration `config:"close_timeout" validate:"min=0"` ForceCloseFiles bool `config:"force_close_files"` - ExcludeLines []*regexp.Regexp `config:"exclude_lines"` - IncludeLines []*regexp.Regexp `config:"include_lines"` + ExcludeLines []match.Matcher `config:"exclude_lines"` + IncludeLines []match.Matcher `config:"include_lines"` MaxBytes int `config:"max_bytes" validate:"min=0,nonzero"` Multiline *reader.MultilineConfig `config:"multiline"` JSON *reader.JSONConfig `config:"json"` diff --git a/filebeat/harvester/log.go b/filebeat/harvester/log.go index a47bf67809c8..7b8a4a2e7750 100644 --- a/filebeat/harvester/log.go +++ b/filebeat/harvester/log.go @@ -163,14 +163,14 @@ func (h *Harvester) sendEvent(event *input.Event) bool { // the include_lines and exclude_lines options. func (h *Harvester) shouldExportLine(line string) bool { if len(h.config.IncludeLines) > 0 { - if !MatchAnyRegexps(h.config.IncludeLines, line) { + if !MatchAny(h.config.IncludeLines, line) { // drop line logp.Debug("harvester", "Drop line as it does not match any of the include patterns %s", line) return false } } if len(h.config.ExcludeLines) > 0 { - if MatchAnyRegexps(h.config.ExcludeLines, line) { + if MatchAny(h.config.ExcludeLines, line) { // drop line logp.Debug("harvester", "Drop line as it does match one of the exclude patterns%s", line) return false diff --git a/filebeat/harvester/log_test.go b/filebeat/harvester/log_test.go index 910d611ac76d..8862b15c8dc2 100644 --- a/filebeat/harvester/log_test.go +++ b/filebeat/harvester/log_test.go @@ -104,29 +104,23 @@ func TestReadLine(t *testing.T) { } func TestExcludeLine(t *testing.T) { - - regexp, err := InitRegexps([]string{"^DBG"}) - + regexp, err := InitMatchers("^DBG") assert.Nil(t, err) - - assert.True(t, MatchAnyRegexps(regexp, "DBG: a debug message")) - assert.False(t, MatchAnyRegexps(regexp, "ERR: an error message")) + assert.True(t, MatchAny(regexp, "DBG: a debug message")) + assert.False(t, MatchAny(regexp, "ERR: an error message")) } func TestIncludeLine(t *testing.T) { - - regexp, err := InitRegexps([]string{"^ERR", "^WARN"}) + regexp, err := InitMatchers("^ERR", "^WARN") assert.Nil(t, err) - - assert.False(t, MatchAnyRegexps(regexp, "DBG: a debug message")) - assert.True(t, MatchAnyRegexps(regexp, "ERR: an error message")) - assert.True(t, MatchAnyRegexps(regexp, "WARNING: a simple warning message")) + assert.False(t, MatchAny(regexp, "DBG: a debug message")) + assert.True(t, MatchAny(regexp, "ERR: an error message")) + assert.True(t, MatchAny(regexp, "WARNING: a simple warning message")) } func TestInitRegexp(t *testing.T) { - - _, err := InitRegexps([]string{"((((("}) + _, err := InitMatchers("(((((") assert.NotNil(t, err) } diff --git a/filebeat/harvester/util.go b/filebeat/harvester/util.go index 1cc7d0ad4126..b847f74ad702 100644 --- a/filebeat/harvester/util.go +++ b/filebeat/harvester/util.go @@ -1,15 +1,13 @@ package harvester -import "regexp" +import "github.com/elastic/beats/libbeat/common/match" -// MatchAnyRegexps checks if the text matches any of the regular expressions -func MatchAnyRegexps(regexps []*regexp.Regexp, text string) bool { - - for _, rexp := range regexps { - if rexp.MatchString(text) { +// MatchAny checks if the text matches any of the regular expressions +func MatchAny(matchers []match.Matcher, text string) bool { + for _, m := range matchers { + if m.MatchString(text) { return true } } - return false } diff --git a/filebeat/harvester/util_test.go b/filebeat/harvester/util_test.go index ec3ca77f36c6..7900c4541a5a 100644 --- a/filebeat/harvester/util_test.go +++ b/filebeat/harvester/util_test.go @@ -3,20 +3,20 @@ package harvester import ( - "regexp" "testing" - "github.com/elastic/beats/libbeat/logp" "github.com/stretchr/testify/assert" -) -// InitRegexps initializes a list of compiled regular expressions. -func InitRegexps(exprs []string) ([]*regexp.Regexp, error) { + "github.com/elastic/beats/libbeat/common/match" + "github.com/elastic/beats/libbeat/logp" +) - result := []*regexp.Regexp{} +// InitMatchers initializes a list of compiled regular expressions. +func InitMatchers(exprs ...string) ([]match.Matcher, error) { + result := []match.Matcher{} for _, exp := range exprs { - rexp, err := regexp.Compile(exp) + rexp, err := match.Compile(exp) if err != nil { logp.Err("Fail to compile the regexp %s: %s", exp, err) return nil, err @@ -27,13 +27,8 @@ func InitRegexps(exprs []string) ([]*regexp.Regexp, error) { } func TestMatchAnyRegexps(t *testing.T) { - - patterns := []string{"\\.gz$"} - - regexps, err := InitRegexps(patterns) - + matchers, err := InitMatchers("\\.gz$") assert.Nil(t, err) - - assert.Equal(t, MatchAnyRegexps(regexps, "/var/log/log.gz"), true) + assert.Equal(t, MatchAny(matchers, "/var/log/log.gz"), true) } diff --git a/filebeat/prospector/config.go b/filebeat/prospector/config.go index 1c0e8156319d..7bcc88eb99ae 100644 --- a/filebeat/prospector/config.go +++ b/filebeat/prospector/config.go @@ -2,10 +2,10 @@ package prospector import ( "fmt" - "regexp" "time" cfg "github.com/elastic/beats/filebeat/config" + "github.com/elastic/beats/libbeat/common/match" ) var ( @@ -23,17 +23,17 @@ var ( ) type prospectorConfig struct { - Enabled bool `config:"enabled"` - ExcludeFiles []*regexp.Regexp `config:"exclude_files"` - IgnoreOlder time.Duration `config:"ignore_older"` - Paths []string `config:"paths"` - ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"` - InputType string `config:"input_type"` - CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"` - CleanRemoved bool `config:"clean_removed"` - HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"` - Symlinks bool `config:"symlinks"` - TailFiles bool `config:"tail_files"` + Enabled bool `config:"enabled"` + ExcludeFiles []match.Matcher `config:"exclude_files"` + IgnoreOlder time.Duration `config:"ignore_older"` + Paths []string `config:"paths"` + ScanFrequency time.Duration `config:"scan_frequency" validate:"min=0,nonzero"` + InputType string `config:"input_type"` + CleanInactive time.Duration `config:"clean_inactive" validate:"min=0"` + CleanRemoved bool `config:"clean_removed"` + HarvesterLimit uint64 `config:"harvester_limit" validate:"min=0"` + Symlinks bool `config:"symlinks"` + TailFiles bool `config:"tail_files"` } func (config *prospectorConfig) Validate() error { diff --git a/filebeat/prospector/prospector_log.go b/filebeat/prospector/prospector_log.go index b4ae09fc7e54..ff4b62a5c611 100644 --- a/filebeat/prospector/prospector_log.go +++ b/filebeat/prospector/prospector_log.go @@ -342,7 +342,7 @@ func (p *ProspectorLog) handleIgnoreOlder(lastState, newState file.State) error // isFileExcluded checks if the given path should be excluded func (p *ProspectorLog) isFileExcluded(file string) bool { patterns := p.config.ExcludeFiles - return len(patterns) > 0 && harvester.MatchAnyRegexps(patterns, file) + return len(patterns) > 0 && harvester.MatchAny(patterns, file) } // isIgnoreOlder checks if the given state reached ignore_older diff --git a/filebeat/prospector/prospector_log_other_test.go b/filebeat/prospector/prospector_log_other_test.go index 2dd3bba582ba..0f7e434edf5e 100644 --- a/filebeat/prospector/prospector_log_other_test.go +++ b/filebeat/prospector/prospector_log_other_test.go @@ -3,11 +3,11 @@ package prospector import ( - "regexp" "testing" "github.com/elastic/beats/filebeat/input" "github.com/elastic/beats/filebeat/input/file" + "github.com/elastic/beats/libbeat/common/match" "github.com/stretchr/testify/assert" ) @@ -15,7 +15,7 @@ import ( var matchTests = []struct { file string paths []string - excludeFiles []*regexp.Regexp + excludeFiles []match.Matcher result bool }{ { @@ -45,13 +45,13 @@ var matchTests = []struct { { "test/test.log", []string{"test/*"}, - []*regexp.Regexp{regexp.MustCompile("test.log")}, + []match.Matcher{match.MustCompile("test.log")}, false, }, { "test/test.log", []string{"test/*"}, - []*regexp.Regexp{regexp.MustCompile("test2.log")}, + []match.Matcher{match.MustCompile("test2.log")}, true, }, } diff --git a/filebeat/prospector/prospector_log_windows_test.go b/filebeat/prospector/prospector_log_windows_test.go index a86c4095478a..213d35a11afa 100644 --- a/filebeat/prospector/prospector_log_windows_test.go +++ b/filebeat/prospector/prospector_log_windows_test.go @@ -1,18 +1,18 @@ -// +build windows +// +build !integration package prospector import ( - "regexp" "testing" + "github.com/elastic/beats/libbeat/common/match" "github.com/stretchr/testify/assert" ) var matchTestsWindows = []struct { file string paths []string - excludeFiles []*regexp.Regexp + excludeFiles []match.Matcher result bool }{ { diff --git a/filebeat/prospector/prospector_test.go b/filebeat/prospector/prospector_test.go index f4ed5460def9..d4e957957644 100644 --- a/filebeat/prospector/prospector_test.go +++ b/filebeat/prospector/prospector_test.go @@ -3,12 +3,12 @@ package prospector import ( - "regexp" "testing" - "github.com/elastic/beats/filebeat/input/file" - "github.com/stretchr/testify/assert" + + "github.com/elastic/beats/filebeat/input/file" + "github.com/elastic/beats/libbeat/common/match" ) func TestProspectorInitInputTypeLogError(t *testing.T) { @@ -28,7 +28,7 @@ func TestProspectorFileExclude(t *testing.T) { prospector := Prospector{ config: prospectorConfig{ - ExcludeFiles: []*regexp.Regexp{regexp.MustCompile(`\.gz$`)}, + ExcludeFiles: []match.Matcher{match.MustCompile(`\.gz$`)}, }, } From b3271a5042f0826ab50679931ddc43f1a35eee97 Mon Sep 17 00:00:00 2001 From: urso Date: Wed, 25 Jan 2017 17:45:46 +0100 Subject: [PATCH 3/6] Update filebeat multiline to use match.Matcher --- filebeat/harvester/reader/multiline.go | 17 +++++++++-------- filebeat/harvester/reader/multiline_config.go | 5 +++-- filebeat/harvester/reader/multiline_test.go | 12 ++++++------ 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/filebeat/harvester/reader/multiline.go b/filebeat/harvester/reader/multiline.go index 661286c4dee3..edc50d4d522e 100644 --- a/filebeat/harvester/reader/multiline.go +++ b/filebeat/harvester/reader/multiline.go @@ -3,8 +3,9 @@ package reader import ( "errors" "fmt" - "regexp" "time" + + "github.com/elastic/beats/libbeat/common/match" ) // MultiLine reader combining multiple line events into one multi-line event. @@ -55,7 +56,7 @@ func NewMultiline( maxBytes int, config *MultilineConfig, ) (*Multiline, error) { - types := map[string]func(*regexp.Regexp) (matcher, error){ + types := map[string]func(match.Matcher) (matcher, error){ "before": beforeMatcher, "after": afterMatcher, } @@ -280,14 +281,14 @@ func (mlr *Multiline) setState(next func(mlr *Multiline) (Message, error)) { // matchers -func afterMatcher(regex *regexp.Regexp) (matcher, error) { - return genPatternMatcher(regex, func(last, current []byte) []byte { +func afterMatcher(pat match.Matcher) (matcher, error) { + return genPatternMatcher(pat, func(last, current []byte) []byte { return current }) } -func beforeMatcher(regex *regexp.Regexp) (matcher, error) { - return genPatternMatcher(regex, func(last, current []byte) []byte { +func beforeMatcher(pat match.Matcher) (matcher, error) { + return genPatternMatcher(pat, func(last, current []byte) []byte { return last }) } @@ -299,12 +300,12 @@ func negatedMatcher(m matcher) matcher { } func genPatternMatcher( - regex *regexp.Regexp, + pat match.Matcher, sel func(last, current []byte) []byte, ) (matcher, error) { matcher := func(last, current []byte) bool { line := sel(last, current) - return regex.Match(line) + return pat.Match(line) } return matcher, nil } diff --git a/filebeat/harvester/reader/multiline_config.go b/filebeat/harvester/reader/multiline_config.go index 5e52e4269380..783b24386b88 100644 --- a/filebeat/harvester/reader/multiline_config.go +++ b/filebeat/harvester/reader/multiline_config.go @@ -2,15 +2,16 @@ package reader import ( "fmt" - "regexp" "time" + + "github.com/elastic/beats/libbeat/common/match" ) type MultilineConfig struct { Negate bool `config:"negate"` Match string `config:"match" validate:"required"` MaxLines *int `config:"max_lines"` - Pattern *regexp.Regexp `config:"pattern"` + Pattern match.Matcher `config:"pattern"` Timeout *time.Duration `config:"timeout" validate:"positive"` } diff --git a/filebeat/harvester/reader/multiline_test.go b/filebeat/harvester/reader/multiline_test.go index 5042031823ba..16434f30bd21 100644 --- a/filebeat/harvester/reader/multiline_test.go +++ b/filebeat/harvester/reader/multiline_test.go @@ -6,12 +6,12 @@ import ( "bytes" "errors" "os" - "regexp" "strings" "testing" "time" "github.com/elastic/beats/filebeat/harvester/encoding" + "github.com/elastic/beats/libbeat/common/match" "github.com/stretchr/testify/assert" ) @@ -26,7 +26,7 @@ func (p bufferSource) Continuable() bool { return false } func TestMultilineAfterOK(t *testing.T) { testMultilineOK(t, MultilineConfig{ - Pattern: regexp.MustCompile(`^[ \t] +`), // next line is indented by spaces + Pattern: match.MustCompile(`^[ \t] +`), // next line is indented by spaces Match: "after", }, 2, @@ -38,7 +38,7 @@ func TestMultilineAfterOK(t *testing.T) { func TestMultilineBeforeOK(t *testing.T) { testMultilineOK(t, MultilineConfig{ - Pattern: regexp.MustCompile(`\\$`), // previous line ends with \ + Pattern: match.MustCompile(`\\$`), // previous line ends with \ Match: "before", }, 2, @@ -50,7 +50,7 @@ func TestMultilineBeforeOK(t *testing.T) { func TestMultilineAfterNegateOK(t *testing.T) { testMultilineOK(t, MultilineConfig{ - Pattern: regexp.MustCompile(`^-`), // first line starts with '-' at beginning of line + Pattern: match.MustCompile(`^-`), // first line starts with '-' at beginning of line Negate: true, Match: "after", }, @@ -63,7 +63,7 @@ func TestMultilineAfterNegateOK(t *testing.T) { func TestMultilineBeforeNegateOK(t *testing.T) { testMultilineOK(t, MultilineConfig{ - Pattern: regexp.MustCompile(`;$`), // last line ends with ';' + Pattern: match.MustCompile(`;$`), // last line ends with ';' Negate: true, Match: "before", }, @@ -76,7 +76,7 @@ func TestMultilineBeforeNegateOK(t *testing.T) { func TestMultilineBeforeNegateOKWithEmptyLine(t *testing.T) { testMultilineOK(t, MultilineConfig{ - Pattern: regexp.MustCompile(`;$`), // last line ends with ';' + Pattern: match.MustCompile(`;$`), // last line ends with ';' Negate: true, Match: "before", }, From 450f9988b6ec468fa396b1791819a4efcc6851ec Mon Sep 17 00:00:00 2001 From: urso Date: Wed, 25 Jan 2017 17:49:44 +0100 Subject: [PATCH 4/6] metricbeat system module whitelist using new string matcher --- metricbeat/module/system/process/helper.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/metricbeat/module/system/process/helper.go b/metricbeat/module/system/process/helper.go index adce7f353ffd..e71d0c8d2a36 100644 --- a/metricbeat/module/system/process/helper.go +++ b/metricbeat/module/system/process/helper.go @@ -5,12 +5,12 @@ package process import ( "fmt" "os" - "regexp" "runtime" "strings" "time" "github.com/elastic/beats/libbeat/common" + "github.com/elastic/beats/libbeat/common/match" "github.com/elastic/beats/libbeat/logp" "github.com/elastic/beats/metricbeat/module/system" "github.com/elastic/beats/metricbeat/module/system/memory" @@ -40,8 +40,8 @@ type ProcStats struct { CpuTicks bool EnvWhitelist []string - procRegexps []*regexp.Regexp // List of regular expressions used to whitelist processes. - envRegexps []*regexp.Regexp // List of regular expressions used to whitelist env vars. + procRegexps []match.Matcher // List of regular expressions used to whitelist processes. + envRegexps []match.Matcher // List of regular expressions used to whitelist env vars. } // newProcess creates a new Process object and initializes it with process @@ -297,18 +297,18 @@ func (procStats *ProcStats) InitProcStats() error { return nil } - procStats.procRegexps = []*regexp.Regexp{} + procStats.procRegexps = []match.Matcher{} for _, pattern := range procStats.Procs { - reg, err := regexp.Compile(pattern) + reg, err := match.Compile(pattern) if err != nil { return fmt.Errorf("Failed to compile regexp [%s]: %v", pattern, err) } procStats.procRegexps = append(procStats.procRegexps, reg) } - procStats.envRegexps = make([]*regexp.Regexp, 0, len(procStats.EnvWhitelist)) + procStats.envRegexps = make([]match.Matcher, 0, len(procStats.EnvWhitelist)) for _, pattern := range procStats.EnvWhitelist { - reg, err := regexp.Compile(pattern) + reg, err := match.Compile(pattern) if err != nil { return fmt.Errorf("failed to compile env whitelist regexp [%v]: %v", pattern, err) } From 0e54571e40519017d4dff55831cf3881d27924c2 Mon Sep 17 00:00:00 2001 From: urso Date: Thu, 26 Jan 2017 16:34:46 +0100 Subject: [PATCH 5/6] remove `match` conditional --- libbeat/processors/condition.go | 95 +++++++--------------------- libbeat/processors/condition_test.go | 55 ---------------- libbeat/processors/config.go | 1 - 3 files changed, 22 insertions(+), 129 deletions(-) diff --git a/libbeat/processors/condition.go b/libbeat/processors/condition.go index 4e26fd1c5ccb..d3e7932ccc90 100644 --- a/libbeat/processors/condition.go +++ b/libbeat/processors/condition.go @@ -25,14 +25,15 @@ type EqualsValue struct { } type Condition struct { - equals map[string]EqualsValue - matches map[string]match.Matcher - contains map[string]match.Matcher - regexp map[string]match.Matcher - rangexp map[string]RangeValue - or []Condition - and []Condition - not *Condition + equals map[string]EqualsValue + matches struct { + name string + filters map[string]match.Matcher + } + rangexp map[string]RangeValue + or []Condition + and []Condition + not *Condition } type WhenProcessor struct { @@ -66,11 +67,11 @@ func NewCondition(config *ConditionConfig) (*Condition, error) { case config.Equals != nil: err = c.setEquals(config.Equals) case config.Contains != nil: - err = c.setContains(config.Contains) + c.matches.name = "contains" + c.matches.filters, err = compileMatches(config.Contains.fields, match.CompileString) case config.Regexp != nil: - err = c.setRegexp(config.Regexp) - case config.Match != nil: - err = c.setMatches(config.Match) + c.matches.name = "regexp" + c.matches.filters, err = compileMatches(config.Regexp.fields, match.Compile) case config.Range != nil: err = c.setRange(config.Range) case len(config.OR) > 0: @@ -123,39 +124,10 @@ func (c *Condition) setEquals(cfg *ConditionFields) error { return nil } -func (c *Condition) setContains(cfg *ConditionFields) error { - - if c.matches == nil { - c.matches = map[string]match.Matcher{} - } - - for field, value := range cfg.fields { - switch v := value.(type) { - case string: - m, err := match.CompileString(v) - if err != nil { - return err - } - c.matches[field] = m - default: - return fmt.Errorf("unexpected type %T of %v", value, value) - } - } - - return nil -} - -func (c *Condition) setRegexp(cfg *ConditionFields) (err error) { - c.regexp, err = compileMatches(cfg.fields) - return -} - -func (c *Condition) setMatches(cfg *ConditionFields) (err error) { - c.matches, err = compileMatches(cfg.fields) - return -} - -func compileMatches(fields map[string]interface{}) (map[string]match.Matcher, error) { +func compileMatches( + fields map[string]interface{}, + compile func(string) (match.Matcher, error), +) (map[string]match.Matcher, error) { if len(fields) == 0 { return nil, nil } @@ -166,7 +138,7 @@ func compileMatches(fields map[string]interface{}) (map[string]match.Matcher, er switch v := value.(type) { case string: - out[field], err = match.Compile(v) + out[field], err = compile(v) if err != nil { return nil, err } @@ -239,8 +211,6 @@ func (c *Condition) Check(event common.MapStr) bool { } return c.checkEquals(event) && - c.checkContains(event) && - c.checkRegexp(event) && c.checkMatches(event) && c.checkRange(event) } @@ -275,23 +245,8 @@ func (c *Condition) checkEquals(event common.MapStr) bool { } -func (c *Condition) checkContains(event common.MapStr) bool { - return checkMatchers("contains", c.contains, event) -} - -func (c *Condition) checkRegexp(event common.MapStr) bool { - return checkMatchers("regexp", c.regexp, event) -} - func (c *Condition) checkMatches(event common.MapStr) bool { - return checkMatchers("match", c.matches, event) -} - -func checkMatchers( - typ string, - matchers map[string]match.Matcher, - event common.MapStr, -) bool { + matchers := c.matches.filters if matchers == nil { return true } @@ -316,7 +271,7 @@ func checkMatchers( default: str, err := extractString(value) if err != nil { - logp.Warn("unexpected type %T in %v condition as it accepts only strings.", value, typ) + logp.Warn("unexpected type %T in %v condition as it accepts only strings.", value, c.matches.name) return false } @@ -429,14 +384,8 @@ func (c Condition) String() string { if len(c.equals) > 0 { s = s + fmt.Sprintf("equals: %v", c.equals) } - if len(c.contains) > 0 { - s = s + fmt.Sprintf("contains: %v", c.contains) - } - if len(c.regexp) > 0 { - s = s + fmt.Sprintf("regexp: %v", c.regexp) - } - if len(c.matches) > 0 { - s = s + fmt.Sprintf("match: %v", c.matches) + if len(c.matches.filters) > 0 { + s = s + fmt.Sprintf("%v: %v", c.matches.name, c.matches.filters) } if len(c.rangexp) > 0 { s = s + fmt.Sprintf("range: %v", c.rangexp) diff --git a/libbeat/processors/condition_test.go b/libbeat/processors/condition_test.go index 083019946332..a643c489a844 100644 --- a/libbeat/processors/condition_test.go +++ b/libbeat/processors/condition_test.go @@ -236,61 +236,6 @@ func TestRegexpCondition(t *testing.T) { assert.False(t, conds[2].Check(event1)) } -func TestMatchCondition(t *testing.T) { - - if testing.Verbose() { - logp.LogInit(logp.LOG_DEBUG, "", false, true, []string{"*"}) - } - - configs := []ConditionConfig{ - { - Match: &ConditionFields{fields: map[string]interface{}{ - "source": "apache2/error.*", - }}, - }, - - { - Match: &ConditionFields{fields: map[string]interface{}{ - "source": "apache2/access.*", - }}, - }, - - { - Match: &ConditionFields{fields: map[string]interface{}{ - "source": "apache2/error.*", - "message": "[client 1.2.3.4]", - }}, - }, - } - - conds := GetConditions(t, configs) - - event := common.MapStr{ - "@timestamp": "2016-04-14T20:41:06.258Z", - "message": `[Fri Dec 16 01:46:23 2005] [error] [client 1.2.3.4] Directory index forbidden by rule: /home/test/`, - "source": "/var/log/apache2/error.log", - "type": "log", - "input_type": "log", - "offset": 30, - } - - event1 := common.MapStr{ - "@timestamp": "2016-04-14T20:41:06.258Z", - "message": `127.0.0.1 - - [28/Jul/2006:10:27:32 -0300] "GET /hidden/ HTTP/1.0" 404 7218`, - "source": "/var/log/apache2/access.log", - "type": "log", - "input_type": "log", - "offset": 30, - } - - assert.True(t, conds[0].Check(event)) - assert.False(t, conds[1].Check(event)) - assert.True(t, conds[2].Check(event)) - - assert.True(t, conds[1].Check(event1)) - assert.False(t, conds[2].Check(event1)) -} - func TestRangeCondition(t *testing.T) { if testing.Verbose() { diff --git a/libbeat/processors/config.go b/libbeat/processors/config.go index 987c9d9ffac2..721d23c5696c 100644 --- a/libbeat/processors/config.go +++ b/libbeat/processors/config.go @@ -12,7 +12,6 @@ type ConditionConfig struct { Equals *ConditionFields `config:"equals"` Contains *ConditionFields `config:"contains"` Regexp *ConditionFields `config:"regexp"` - Match *ConditionFields `config:"match"` Range *ConditionFields `config:"range"` OR []ConditionConfig `config:"or"` AND []ConditionConfig `config:"and"` From 6b71a780889fc1f71a54a770b8d783df7f452c0e Mon Sep 17 00:00:00 2001 From: urso Date: Thu, 26 Jan 2017 16:53:17 +0100 Subject: [PATCH 6/6] Update changelog --- CHANGELOG.asciidoc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index 7972fd954be8..678caecde7ca 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -64,6 +64,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Files created by Beats (logs, registry, file output) will have 0600 permissions. {pull}3387[3387]. - RPM/deb packages will now install the config file with 0600 permissions. {pull}3382[3382] - Add the option to pass custom HTTP headers to the Elasticsearch output. {pull}3400[3400] +- Unify `regexp` and `contains` conditionals, for both to support array of strings and convert numbers to strings if required. {pull}3469[3469] *Metricbeat* @@ -87,6 +88,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Kafka consumer groups metricset. {pull}3240[3240] - Add dynamic configuration reloading for modules. {pull}3281[3281] - Add docker health metricset {pull}3357[3357] +- System module uses new matchers for white-listing processes. {pull}3469[3469] *Packetbeat* @@ -96,6 +98,7 @@ https://github.com/elastic/beats/compare/v5.1.1...master[Check the HEAD diff] - Add enabled config option to prospectors. {pull}3157[3157] - Add target option for decoded_json_field. {pull}3169[3169] - Add the `pipeline` config option at the prospector level, for configuring the Ingest Node pipeline ID. {pull}3433[3433] +- Update regular expressions used for matching file names or lines (multiline, include/exclude functionality) to new matchers improving performance of simple string matches. {pull}3469[3469] *Winlogbeat*