From b0bda28597ab3a60a38b5fb2eb31eca31d6af01d Mon Sep 17 00:00:00 2001 From: jsirianni Date: Mon, 28 Mar 2022 15:22:29 -0400 Subject: [PATCH 1/3] port key value parser from stanza --- go.mod | 2 + go.sum | 4 + operator/parser/keyvalue/config_test.go | 122 +++++ operator/parser/keyvalue/keyvalue.go | 164 ++++++ operator/parser/keyvalue/keyvalue_test.go | 508 ++++++++++++++++++ .../parser/keyvalue/testdata/default.yaml | 1 + .../parser/keyvalue/testdata/delimiter.yaml | 2 + .../keyvalue/testdata/on_error_drop.yaml | 2 + .../keyvalue/testdata/pair_delimiter.yaml | 2 + .../keyvalue/testdata/parse_from_simple.yaml | 2 + .../keyvalue/testdata/parse_to_simple.yaml | 2 + .../parser/keyvalue/testdata/preserve_to.yaml | 2 + .../parser/keyvalue/testdata/severity.yaml | 8 + .../parser/keyvalue/testdata/timestamp.yaml | 5 + 14 files changed, 826 insertions(+) create mode 100644 operator/parser/keyvalue/config_test.go create mode 100644 operator/parser/keyvalue/keyvalue.go create mode 100644 operator/parser/keyvalue/keyvalue_test.go create mode 100644 operator/parser/keyvalue/testdata/default.yaml create mode 100644 operator/parser/keyvalue/testdata/delimiter.yaml create mode 100644 operator/parser/keyvalue/testdata/on_error_drop.yaml create mode 100644 operator/parser/keyvalue/testdata/pair_delimiter.yaml create mode 100644 operator/parser/keyvalue/testdata/parse_from_simple.yaml create mode 100644 operator/parser/keyvalue/testdata/parse_to_simple.yaml create mode 100644 operator/parser/keyvalue/testdata/preserve_to.yaml create mode 100644 operator/parser/keyvalue/testdata/severity.yaml create mode 100644 operator/parser/keyvalue/testdata/timestamp.yaml diff --git a/go.mod b/go.mod index 6c352bc4..4b9dbbac 100644 --- a/go.mod +++ b/go.mod @@ -24,6 +24,7 @@ require ( ) require ( + github.com/hashicorp/go-multierror v1.1.1 github.com/influxdata/go-syslog/v3 v3.0.1-0.20210608084020-ac565dc76ba6 go.uber.org/multierr v1.8.0 ) @@ -39,6 +40,7 @@ require ( github.com/google/go-cmp v0.5.7 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/googleapis/gnostic v0.5.6 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect github.com/kr/pretty v0.3.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect diff --git a/go.sum b/go.sum index e5f3afca..423d646f 100644 --- a/go.sum +++ b/go.sum @@ -181,6 +181,10 @@ github.com/googleapis/gnostic v0.5.5/go.mod h1:7+EbHbldMins07ALC74bsA81Ovc97Dwqy github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= diff --git a/operator/parser/keyvalue/config_test.go b/operator/parser/keyvalue/config_test.go new file mode 100644 index 00000000..7a3f8086 --- /dev/null +++ b/operator/parser/keyvalue/config_test.go @@ -0,0 +1,122 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package keyvalue + +import ( + "testing" + + "github.com/open-telemetry/opentelemetry-log-collection/entry" + "github.com/open-telemetry/opentelemetry-log-collection/operator/helper" + "github.com/open-telemetry/opentelemetry-log-collection/operator/helper/operatortest" +) + +func TestKVParserConfig(t *testing.T) { + cases := []operatortest.ConfigUnmarshalTest{ + { + Name: "default", + Expect: defaultCfg(), + }, + { + Name: "parse_from_simple", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + cfg.ParseFrom = entry.NewBodyField("from") + return cfg + }(), + }, + { + Name: "parse_to_simple", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + cfg.ParseTo = entry.NewBodyField("log") + return cfg + }(), + }, + { + Name: "on_error_drop", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + cfg.OnError = "drop" + return cfg + }(), + }, + { + Name: "timestamp", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + parseField := entry.NewBodyField("timestamp_field") + newTime := helper.TimeParser{ + LayoutType: "strptime", + Layout: "%Y-%m-%d", + ParseFrom: &parseField, + } + cfg.TimeParser = &newTime + return cfg + }(), + }, + { + Name: "severity", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + parseField := entry.NewBodyField("severity_field") + severityField := helper.NewSeverityParserConfig() + severityField.ParseFrom = &parseField + mapping := map[interface{}]interface{}{ + "critical": "5xx", + "error": "4xx", + "info": "3xx", + "debug": "2xx", + } + severityField.Mapping = mapping + cfg.SeverityParserConfig = &severityField + return cfg + }(), + }, + { + Name: "preserve_to", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + preserve := entry.NewBodyField("aField") + cfg.PreserveTo = &preserve + return cfg + }(), + }, + { + Name: "delimiter", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + cfg.Delimiter = ";" + return cfg + }(), + }, + { + Name: "pair_delimiter", + Expect: func() *KVParserConfig { + cfg := defaultCfg() + cfg.PairDelimiter = ";" + return cfg + }(), + }, + } + + for _, tc := range cases { + t.Run(tc.Name, func(t *testing.T) { + tc.Run(t, defaultCfg()) + }) + } +} + +func defaultCfg() *KVParserConfig { + return NewKVParserConfig("key_value_parser") +} diff --git a/operator/parser/keyvalue/keyvalue.go b/operator/parser/keyvalue/keyvalue.go new file mode 100644 index 00000000..90ed89db --- /dev/null +++ b/operator/parser/keyvalue/keyvalue.go @@ -0,0 +1,164 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keyvalue + +import ( + "context" + "errors" + "fmt" + "strings" + + "github.com/hashicorp/go-multierror" + "go.uber.org/zap" + + "github.com/open-telemetry/opentelemetry-log-collection/entry" + "github.com/open-telemetry/opentelemetry-log-collection/operator" + "github.com/open-telemetry/opentelemetry-log-collection/operator/helper" +) + +func init() { + operator.Register("key_value_parser", func() operator.Builder { return NewKVParserConfig("") }) +} + +// NewKVParserConfig creates a new key value parser config with default values +func NewKVParserConfig(operatorID string) *KVParserConfig { + return &KVParserConfig{ + ParserConfig: helper.NewParserConfig(operatorID, "key_value_parser"), + Delimiter: "=", + } +} + +// KVParserConfig is the configuration of a key value parser operator. +type KVParserConfig struct { + helper.ParserConfig `mapstructure:",squash" yaml:",inline"` + + Delimiter string `mapstructure:"delimiter" yaml:"delimiter"` + PairDelimiter string `mapstructure:"pair_delimiter" yaml:"pair_delimiter"` +} + +// Build will build a key value parser operator. +func (c KVParserConfig) Build(logger *zap.SugaredLogger) (operator.Operator, error) { + parserOperator, err := c.ParserConfig.Build(logger) + if err != nil { + return nil, err + } + + if c.Delimiter == c.PairDelimiter { + return nil, errors.New("delimiter and pair_delimiter cannot be the same value") + } + + if len(c.Delimiter) == 0 { + return nil, errors.New("delimiter is a required parameter") + } + + // split on whitespace by default, if pair delimiter is set, use + // strings.Split() + pairSplitFunc := splitStringByWhitespace + if c.PairDelimiter != "" { + pairSplitFunc = func(input string) []string { + return strings.Split(input, c.PairDelimiter) + } + } + + return &KVParser{ + ParserOperator: parserOperator, + delimiter: c.Delimiter, + pairSplitFunc: pairSplitFunc, + }, nil +} + +// KVParser is an operator that parses key value pairs. +type KVParser struct { + helper.ParserOperator + delimiter string + pairSplitFunc func(input string) []string +} + +// Process will parse an entry for key value pairs. +func (kv *KVParser) Process(ctx context.Context, entry *entry.Entry) error { + return kv.ParserOperator.ProcessWith(ctx, entry, kv.parse) +} + +// parse will parse a value as key values. +func (kv *KVParser) parse(value interface{}) (interface{}, error) { + switch m := value.(type) { + case string: + return kv.parser(m, kv.delimiter) + default: + return nil, fmt.Errorf("type %T cannot be parsed as key value pairs", value) + } +} + +func (kv *KVParser) parser(input string, delimiter string) (map[string]interface{}, error) { + if len(input) == 0 { + return nil, fmt.Errorf("parse from field %s is empty", kv.ParseFrom.String()) + } + + parsed := make(map[string]interface{}) + + var err error + for _, raw := range kv.pairSplitFunc(input) { + m := strings.Split(raw, delimiter) + if len(m) != 2 { + e := fmt.Errorf("expected '%s' to split by '%s' into two items, got %d", raw, delimiter, len(m)) + err = multierror.Append(err, e) + continue + } + + key := cleanString(m[0]) + value := cleanString(m[1]) + + parsed[key] = value + } + + return parsed, err +} + +// split on whitespace and preserve quoted text +func splitStringByWhitespace(input string) []string { + quoted := false + raw := strings.FieldsFunc(input, func(r rune) bool { + if r == '"' || r == '\'' { + quoted = !quoted + } + return !quoted && r == ' ' + }) + return raw +} + +// remove surrounding quotes and trim leading and trailing space +func cleanString(input string) string { + if len(input) < 1 { + return input + } + + if input[0] == '"' { + input = input[1:] + } + + if input[len(input)-1] == '"' { + input = input[:len(input)-1] + } + + if input[0] == '\'' { + input = input[1:] + } + + if input[len(input)-1] == '\'' { + input = input[:len(input)-1] + } + + return strings.TrimSpace(input) +} diff --git a/operator/parser/keyvalue/keyvalue_test.go b/operator/parser/keyvalue/keyvalue_test.go new file mode 100644 index 00000000..2401d049 --- /dev/null +++ b/operator/parser/keyvalue/keyvalue_test.go @@ -0,0 +1,508 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package keyvalue + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/open-telemetry/opentelemetry-log-collection/entry" + "github.com/open-telemetry/opentelemetry-log-collection/operator" + "github.com/open-telemetry/opentelemetry-log-collection/testutil" +) + +func newTestParser(t *testing.T) *KVParser { + config := NewKVParserConfig("test") + op, err := config.Build(testutil.Logger(t)) + require.NoError(t, err) + return op.(*KVParser) +} + +func TestInit(t *testing.T) { + builder, ok := operator.DefaultRegistry.Lookup("key_value_parser") + require.True(t, ok, "expected key_value_parser to be registered") + require.Equal(t, "key_value_parser", builder().Type()) +} + +func TestKVParserConfigBuild(t *testing.T) { + config := NewKVParserConfig("test") + op, err := config.Build(testutil.Logger(t)) + require.NoError(t, err) + require.IsType(t, &KVParser{}, op) +} + +func TestKVParserConfigBuildFailure(t *testing.T) { + config := NewKVParserConfig("test") + config.OnError = "invalid_on_error" + _, err := config.Build(testutil.Logger(t)) + require.Error(t, err) + require.Contains(t, err.Error(), "invalid `on_error` field") +} + +func TestBuild(t *testing.T) { + basicConfig := func() *KVParserConfig { + cfg := NewKVParserConfig("test_operator_id") + return cfg + } + + cases := []struct { + name string + input *KVParserConfig + expectErr bool + }{ + { + "default", + func() *KVParserConfig { + cfg := basicConfig() + return cfg + }(), + false, + }, + { + "delimiter", + func() *KVParserConfig { + cfg := basicConfig() + cfg.Delimiter = "/" + return cfg + }(), + false, + }, + { + "missing-delimiter", + func() *KVParserConfig { + cfg := basicConfig() + cfg.Delimiter = "" + return cfg + }(), + true, + }, + { + "pair-delimiter", + func() *KVParserConfig { + cfg := basicConfig() + cfg.PairDelimiter = "|" + return cfg + }(), + false, + }, + { + "same-delimiter-and-pair-delimiter", + func() *KVParserConfig { + cfg := basicConfig() + cfg.Delimiter = "|" + cfg.PairDelimiter = cfg.Delimiter + return cfg + }(), + true, + }, + { + "unset-delimiter", + func() *KVParserConfig { + cfg := basicConfig() + cfg.Delimiter = "" + cfg.PairDelimiter = "!" + return cfg + }(), + true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := tc.input + _, err := cfg.Build(testutil.Logger(t)) + if tc.expectErr { + require.Error(t, err) + return + } + require.NoError(t, err) + }) + } +} + +func TestKVParserStringFailure(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse("invalid") + require.Error(t, err) + require.Contains(t, err.Error(), fmt.Sprintf("expected '%s' to split by '%s' into two items, got", "invalid", parser.delimiter)) +} + +func TestKVParserInvalidType(t *testing.T) { + parser := newTestParser(t) + _, err := parser.parse([]int{}) + require.Error(t, err) + require.Contains(t, err.Error(), "type []int cannot be parsed as key value pairs") +} + +func TestKVImplementations(t *testing.T) { + require.Implements(t, (*operator.Operator)(nil), new(KVParser)) +} + +func TestKVParser(t *testing.T) { + cases := []struct { + name string + configure func(*KVParserConfig) + inputBody interface{} + outputBody interface{} + expectError bool + expectBuildErr bool + }{ + { + "simple", + func(kv *KVParserConfig) {}, + "name=stanza age=2", + map[string]interface{}{ + "name": "stanza", + "age": "2", + }, + false, + false, + }, + { + "parse-from", + func(kv *KVParserConfig) { + kv.ParseFrom = entry.NewBodyField("test") + }, + map[string]interface{}{ + "test": "name=otel age=2", + }, + map[string]interface{}{ + "name": "otel", + "age": "2", + }, + false, + false, + }, + { + "parse-to", + func(kv *KVParserConfig) { + kv.ParseTo = entry.NewBodyField("test") + }, + "name=stanza age=10", + map[string]interface{}{ + "test": map[string]interface{}{ + "name": "stanza", + "age": "10", + }, + }, + false, + false, + }, + { + "preserve-to", + func(kv *KVParserConfig) { + preserveTo := entry.NewBodyField("test") + kv.PreserveTo = &preserveTo + }, + "name=stanza age=10", + map[string]interface{}{ + "name": "stanza", + "age": "10", + "test": "name=stanza age=10", + }, + false, + false, + }, + { + "from-to-preserve", + func(kv *KVParserConfig) { + kv.ParseFrom = entry.NewBodyField("from") + kv.ParseTo = entry.NewBodyField("to") + orig := entry.NewBodyField("orig") + kv.PreserveTo = &orig + }, + map[string]interface{}{ + "from": "name=stanza age=10", + }, + map[string]interface{}{ + "to": map[string]interface{}{ + "name": "stanza", + "age": "10", + }, + "orig": "name=stanza age=10", + }, + false, + false, + }, + { + "user-agent", + func(kv *KVParserConfig) {}, + `requestClientApplication="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0"`, + map[string]interface{}{ + "requestClientApplication": `Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0`, + }, + false, + false, + }, + { + "double-quotes-removed", + func(kv *KVParserConfig) {}, + "name=\"stanza\" age=2", + map[string]interface{}{ + "name": "stanza", + "age": "2", + }, + false, + false, + }, + { + "single-quotes-removed", + func(kv *KVParserConfig) {}, + "description='stanza deployment number 5' x=y", + map[string]interface{}{ + "description": "stanza deployment number 5", + "x": "y", + }, + false, + false, + }, + { + "double-quotes-spaces-removed", + func(kv *KVParserConfig) {}, + `name=" stanza " age=2`, + map[string]interface{}{ + "name": "stanza", + "age": "2", + }, + false, + false, + }, + { + "leading-and-trailing-space", + func(kv *KVParserConfig) {}, + `" name "=" stanza " age=2`, + map[string]interface{}{ + "name": "stanza", + "age": "2", + }, + false, + false, + }, + { + "delimiter", + func(kv *KVParserConfig) { + kv.Delimiter = "|" + kv.ParseFrom = entry.NewBodyField("testfield") + kv.ParseTo = entry.NewBodyField("testparsed") + }, + map[string]interface{}{ + "testfield": `name|" stanza " age|2 key|value`, + }, + map[string]interface{}{ + "testparsed": map[string]interface{}{ + "name": "stanza", + "age": "2", + "key": "value", + }, + }, + false, + false, + }, + { + "double-delimiter", + func(kv *KVParserConfig) { + kv.Delimiter = "==" + }, + `name==" stanza " age==2 key==value`, + map[string]interface{}{ + "name": "stanza", + "age": "2", + "key": "value", + }, + false, + false, + }, + { + "pair-delimiter", + func(kv *KVParserConfig) { + kv.PairDelimiter = "|" + }, + `name=stanza|age=2 | key=value`, + map[string]interface{}{ + "name": "stanza", + "age": "2", + "key": "value", + }, + false, + false, + }, + { + "large", + func(kv *KVParserConfig) {}, + "name=stanza age=1 job=\"software engineering\" location=\"grand rapids michigan\" src=\"10.3.3.76\" dst=172.217.0.10 protocol=udp sport=57112 dport=443 translated_src_ip=96.63.176.3 translated_port=57112", + map[string]interface{}{ + "age": "1", + "dport": "443", + "dst": "172.217.0.10", + "job": "software engineering", + "location": "grand rapids michigan", + "name": "stanza", + "protocol": "udp", + "sport": "57112", + "src": "10.3.3.76", + "translated_port": "57112", + "translated_src_ip": "96.63.176.3", + }, + false, + false, + }, + { + "dell-sonic-wall", + func(kv *KVParserConfig) {}, + `id=LVM_Sonicwall sn=22255555 time="2021-09-22 16:30:31" fw=14.165.177.10 pri=6 c=1024 gcat=2 m=97 msg="Web site hit" srcMac=6c:0b:84:3f:fa:63 src=192.168.50.2:52006:X0 srcZone=LAN natSrc=14.165.177.10:58457 dstMac=08:b2:58:46:30:54 dst=15.159.150.83:443:X1 dstZone=WAN natDst=15.159.150.83:443 proto=tcp/https sent=1422 rcvd=5993 rule="6 (LAN->WAN)" app=48 dstname=example.space.dev.com arg=/ code=27 Category="Information Technology/Computers" note="Policy: a0, Info: 888 " n=3412158`, + map[string]interface{}{ + "id": "LVM_Sonicwall", + "sn": "22255555", + "time": "2021-09-22 16:30:31", + "fw": "14.165.177.10", + "pri": "6", + "c": "1024", + "gcat": "2", + "m": "97", + "msg": "Web site hit", + "srcMac": "6c:0b:84:3f:fa:63", + "src": "192.168.50.2:52006:X0", + "srcZone": "LAN", + "natSrc": "14.165.177.10:58457", + "dstMac": "08:b2:58:46:30:54", + "dst": "15.159.150.83:443:X1", + "dstZone": "WAN", + "natDst": "15.159.150.83:443", + "proto": "tcp/https", + "sent": "1422", + "rcvd": "5993", + "rule": "6 (LAN->WAN)", + "app": "48", + "dstname": "example.space.dev.com", + "arg": "/", + "code": "27", + "Category": "Information Technology/Computers", + "note": "Policy: a0, Info: 888", + "n": "3412158", + }, + false, + false, + }, + { + "missing-delimiter", + func(kv *KVParserConfig) {}, + `test text`, + nil, + true, + false, + }, + { + "invalid-pair", + func(kv *KVParserConfig) {}, + `test=text=abc`, + map[string]interface{}{}, + true, + false, + }, + { + "empty-input", + func(kv *KVParserConfig) {}, + "", + nil, + true, + false, + }, + { + "same-delimiter-and-pair-delimiter", + func(kv *KVParserConfig) { + kv.Delimiter = "!" + kv.PairDelimiter = kv.Delimiter + }, + "a=b c=d", + nil, + false, + true, + }, + { + "unset-delimiter", + func(kv *KVParserConfig) { + kv.Delimiter = "" + kv.PairDelimiter = "!" + }, + "a=b c=d", + nil, + false, + true, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + cfg := NewKVParserConfig("test") + cfg.OutputIDs = []string{"fake"} + tc.configure(cfg) + + op, err := cfg.Build(testutil.Logger(t)) + if tc.expectBuildErr { + require.Error(t, err) + return + } + require.NoError(t, err) + + fake := testutil.NewFakeOutput(t) + op.SetOutputs([]operator.Operator{fake}) + + entry := entry.New() + entry.Body = tc.inputBody + err = op.Process(context.Background(), entry) + if tc.expectError { + require.Error(t, err) + return + } + require.NoError(t, err) + fake.ExpectBody(t, tc.outputBody) + }) + } +} + +func TestSplitStringByWhitespace(t *testing.T) { + cases := []struct { + name string + intput string + output []string + }{ + { + "simple", + "k=v a=b x=\" y \" job=\"software engineering\"", + []string{ + "k=v", + "a=b", + "x=\" y \"", + "job=\"software engineering\"", + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + require.Equal(t, tc.output, splitStringByWhitespace(tc.intput)) + }) + } +} + +func TestCleanString(t *testing.T) { + t.Run("empty", func(t *testing.T) { + x := "" + require.Equal(t, x, cleanString(x)) + }) +} diff --git a/operator/parser/keyvalue/testdata/default.yaml b/operator/parser/keyvalue/testdata/default.yaml new file mode 100644 index 00000000..ef7be79c --- /dev/null +++ b/operator/parser/keyvalue/testdata/default.yaml @@ -0,0 +1 @@ + type: key_value_parser diff --git a/operator/parser/keyvalue/testdata/delimiter.yaml b/operator/parser/keyvalue/testdata/delimiter.yaml new file mode 100644 index 00000000..357313e4 --- /dev/null +++ b/operator/parser/keyvalue/testdata/delimiter.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +delimiter: ";" diff --git a/operator/parser/keyvalue/testdata/on_error_drop.yaml b/operator/parser/keyvalue/testdata/on_error_drop.yaml new file mode 100644 index 00000000..570c4189 --- /dev/null +++ b/operator/parser/keyvalue/testdata/on_error_drop.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +on_error: drop diff --git a/operator/parser/keyvalue/testdata/pair_delimiter.yaml b/operator/parser/keyvalue/testdata/pair_delimiter.yaml new file mode 100644 index 00000000..c298f87d --- /dev/null +++ b/operator/parser/keyvalue/testdata/pair_delimiter.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +pair_delimiter: ";" diff --git a/operator/parser/keyvalue/testdata/parse_from_simple.yaml b/operator/parser/keyvalue/testdata/parse_from_simple.yaml new file mode 100644 index 00000000..d48cbb8e --- /dev/null +++ b/operator/parser/keyvalue/testdata/parse_from_simple.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +parse_from: body.from diff --git a/operator/parser/keyvalue/testdata/parse_to_simple.yaml b/operator/parser/keyvalue/testdata/parse_to_simple.yaml new file mode 100644 index 00000000..68bf2265 --- /dev/null +++ b/operator/parser/keyvalue/testdata/parse_to_simple.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +parse_to: body.log diff --git a/operator/parser/keyvalue/testdata/preserve_to.yaml b/operator/parser/keyvalue/testdata/preserve_to.yaml new file mode 100644 index 00000000..f037dd99 --- /dev/null +++ b/operator/parser/keyvalue/testdata/preserve_to.yaml @@ -0,0 +1,2 @@ +type: key_value_parser +preserve_to: body.aField diff --git a/operator/parser/keyvalue/testdata/severity.yaml b/operator/parser/keyvalue/testdata/severity.yaml new file mode 100644 index 00000000..8670cd42 --- /dev/null +++ b/operator/parser/keyvalue/testdata/severity.yaml @@ -0,0 +1,8 @@ +type: key_value_parser +severity: + parse_from: body.severity_field + mapping: + critical: 5xx + error: 4xx + info: 3xx + debug: 2xx diff --git a/operator/parser/keyvalue/testdata/timestamp.yaml b/operator/parser/keyvalue/testdata/timestamp.yaml new file mode 100644 index 00000000..50d7bf3b --- /dev/null +++ b/operator/parser/keyvalue/testdata/timestamp.yaml @@ -0,0 +1,5 @@ +type: key_value_parser +timestamp: + parse_from: body.timestamp_field + layout_type: strptime + layout: '%Y-%m-%d' From 4e85f4b615e1c4dd589e4bd9dcf2fedeec936d71 Mon Sep 17 00:00:00 2001 From: jsirianni Date: Mon, 28 Mar 2022 15:24:37 -0400 Subject: [PATCH 2/3] key value parser doc --- docs/operators/key_value_parser.md | 178 +++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 docs/operators/key_value_parser.md diff --git a/docs/operators/key_value_parser.md b/docs/operators/key_value_parser.md new file mode 100644 index 00000000..f9aebd65 --- /dev/null +++ b/docs/operators/key_value_parser.md @@ -0,0 +1,178 @@ +## `key_value_parser` operator + +The `key_value_parser` operator parses the string-type field selected by `parse_from` into key value pairs. All values are of type string. + +### Configuration Fields + +| Field | Default | Description | +| --- | --- | --- | +| `id` | `key_value_parser` | A unique identifier for the operator. | +| `delimiter` | `=` | The delimiter used for splitting a value into a key value pair. | +| `pair_delimiter` | | The delimiter used for seperating key value pairs, defaults to whitespace. | +| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries. | +| `parse_from` | `body` | A [field](/docs/types/field.md) that indicates the field to be parsed into key value pairs. | +| `parse_to` | `body` | A [field](/docs/types/field.md) that indicates the field to be parsed as into key value pairs. | +| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md). | +| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md). | +| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. | +| `timestamp` | `nil` | An optional [timestamp](/docs/types/timestamp.md) block which will parse a timestamp field before passing the entry to the output operator. | +| `severity` | `nil` | An optional [severity](/docs/types/severity.md) block which will parse a severity field before passing the entry to the output operator. | + + +### Example Configurations + +#### Parse the field `message` into key value pairs + +Configuration: +```yaml +- type: key_value_parser + parse_from: message +``` + + + + + + + +
Input body Output body
+ +```json +{ + "timestamp": "", + "body": { + "message": "name=stanza" + } +} +``` + + + +```json +{ + "timestamp": "", + "body": { + "name": "stanza" + } +} +``` + +
+ +#### Parse the field `message` into key value pairs, using a non default delimiter + +Configuration: +```yaml +- type: key_value_parser + parse_from: message + delimiter: ":" +``` + + + + + +
Input body Output body
+ +```json +{ + "timestamp": "", + "body": { + "message": "name:stanza" + } +} +``` + + + +```json +{ + "timestamp": "", + "body": { + "name": "stanza" + } +} +``` + +#### Parse the field `message` into key value pairs, using a non default pair delimiter + +Configuration: +```yaml +- type: key_value_parser + parse_from: message + pair_delimiter: "!" +``` + + + + + + + +
Input body Output body
+ +```json +{ + "timestamp": "", + "body": { + "message": "name=stanza ! org=otel ! group=dev" + } +} +``` + + + +```json +{ + "timestamp": "", + "body": { + "name": "stanza", + "org": "otel", + "group": "dev" + } +} +``` + +
+ +#### Parse the field `message` as key value pairs, and parse the timestamp + +Configuration: +```yaml +- type: key_value_parser + parse_from: message + timestamp: + parse_from: seconds_since_epoch + layout_type: epoch + layout: s +``` + + + + + + + +
Input body Output body
+ +```json +{ + "timestamp": "", + "body": { + "message": "name=stanza seconds_since_epoch=1136214245" + } +} +``` + + + +```json +{ + "timestamp": "2006-01-02T15:04:05-07:00", + "body": { + "name": "stanza" + } +} +``` + +
From c182e231fe8b13169cf0e9c2076db74634d48580 Mon Sep 17 00:00:00 2001 From: jsirianni Date: Wed, 30 Mar 2022 13:16:51 -0400 Subject: [PATCH 3/3] pr feadback. replaced clean string function with call to strings.TrimSpace + strings.Trim --- operator/parser/keyvalue/keyvalue.go | 33 +++-------------------- operator/parser/keyvalue/keyvalue_test.go | 7 ----- 2 files changed, 4 insertions(+), 36 deletions(-) diff --git a/operator/parser/keyvalue/keyvalue.go b/operator/parser/keyvalue/keyvalue.go index 90ed89db..734c4a01 100644 --- a/operator/parser/keyvalue/keyvalue.go +++ b/operator/parser/keyvalue/keyvalue.go @@ -59,7 +59,7 @@ func (c KVParserConfig) Build(logger *zap.SugaredLogger) (operator.Operator, err return nil, errors.New("delimiter and pair_delimiter cannot be the same value") } - if len(c.Delimiter) == 0 { + if c.Delimiter == "" { return nil, errors.New("delimiter is a required parameter") } @@ -102,7 +102,7 @@ func (kv *KVParser) parse(value interface{}) (interface{}, error) { } func (kv *KVParser) parser(input string, delimiter string) (map[string]interface{}, error) { - if len(input) == 0 { + if input == "" { return nil, fmt.Errorf("parse from field %s is empty", kv.ParseFrom.String()) } @@ -117,8 +117,8 @@ func (kv *KVParser) parser(input string, delimiter string) (map[string]interface continue } - key := cleanString(m[0]) - value := cleanString(m[1]) + key := strings.TrimSpace(strings.Trim(m[0], "\"'")) + value := strings.TrimSpace(strings.Trim(m[1], "\"'")) parsed[key] = value } @@ -137,28 +137,3 @@ func splitStringByWhitespace(input string) []string { }) return raw } - -// remove surrounding quotes and trim leading and trailing space -func cleanString(input string) string { - if len(input) < 1 { - return input - } - - if input[0] == '"' { - input = input[1:] - } - - if input[len(input)-1] == '"' { - input = input[:len(input)-1] - } - - if input[0] == '\'' { - input = input[1:] - } - - if input[len(input)-1] == '\'' { - input = input[:len(input)-1] - } - - return strings.TrimSpace(input) -} diff --git a/operator/parser/keyvalue/keyvalue_test.go b/operator/parser/keyvalue/keyvalue_test.go index 2401d049..d01c78ec 100644 --- a/operator/parser/keyvalue/keyvalue_test.go +++ b/operator/parser/keyvalue/keyvalue_test.go @@ -499,10 +499,3 @@ func TestSplitStringByWhitespace(t *testing.T) { }) } } - -func TestCleanString(t *testing.T) { - t.Run("empty", func(t *testing.T) { - x := "" - require.Equal(t, x, cleanString(x)) - }) -}