-
Notifications
You must be signed in to change notification settings - Fork 4.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement the Dissect Tokenizer from `logstash-input-dissect`[1] This tokenizer allows you to define patterns of strings and extract the relevant informations. It also permet to do some string manipulations when extracting the keys. Example tokenizer: ```yaml tokenizer: "%{at} - [%{machine}] %{code} - %{message}" message: "10/10/2017 - [wopr] 1 - oh fire fire!" result: at: "10/10/2017" machine: "wopr" code: "1" message: "of fire fire!" ``` ```yaml tokenizer: "%{?key} %{&key}" message: "hello world" result: hello: "world" ``` Example of configuration: ```yaml processors: - dissect: tokenizer: "%{key1} - %{key2}" field: "message" target_field: "extracted" ``` Dissect support a few more features: - Indirect field - Append - skip field - Greedy padding for CSV file [1]: https://github.com/logstash-plugins/logstash-filter-dissect
- Loading branch information
Showing
23 changed files
with
1,426 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
dissect_tests.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package dissect | ||
|
||
type config struct { | ||
Tokenizer *tokenizer `config:"tokenizer"` | ||
Field string `config:"field"` | ||
TargetPrefix string `config:"target_prefix"` | ||
} | ||
|
||
var defaultConfig = config{ | ||
Field: "message", | ||
TargetPrefix: "dissect", | ||
} | ||
|
||
// tokenizer add validation at the unpack level for this specific field. | ||
type tokenizer = Dissector | ||
|
||
// Unpack a tokenizer into a dissector this will trigger the normal validation of the dissector. | ||
func (t *tokenizer) Unpack(v string) error { | ||
d, err := New(v) | ||
if err != nil { | ||
return err | ||
} | ||
*t = *d | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package dissect | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
|
||
"github.com/elastic/beats/libbeat/common" | ||
) | ||
|
||
func TestTokenizerType(t *testing.T) { | ||
t.Run("valid", func(t *testing.T) { | ||
c, err := common.NewConfigFrom(map[string]interface{}{ | ||
"tokenizer": "%{value1}", | ||
"field": "message", | ||
}) | ||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
|
||
cfg := config{} | ||
err = c.Unpack(&cfg) | ||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
}) | ||
|
||
t.Run("invalid", func(t *testing.T) { | ||
c, err := common.NewConfigFrom(map[string]interface{}{ | ||
"tokenizer": "%value1}", | ||
"field": "message", | ||
}) | ||
if !assert.NoError(t, err) { | ||
return | ||
} | ||
|
||
cfg := config{} | ||
err = c.Unpack(&cfg) | ||
if !assert.Error(t, err) { | ||
return | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package dissect | ||
|
||
import ( | ||
"errors" | ||
"regexp" | ||
) | ||
|
||
var ( | ||
// delimiterRE tokenizes the following string into walkable with extracted delimiter + key. | ||
// string: | ||
// ` %{key}, %{key/2}` | ||
// into: | ||
// [["", "key" ], [", ", "key/2"]] | ||
delimiterRE = regexp.MustCompile("(?s)(.*?)%\\{([^}]*?)}") | ||
suffixRE = regexp.MustCompile("(.+?)(/(\\d{1,2}))?(->)?$") | ||
|
||
skipFieldPrefix = "?" | ||
appendFieldPrefix = "+" | ||
indirectFieldPrefix = "&" | ||
appendIndirectPrefix = "+&" | ||
indirectAppendPrefix = "&+" | ||
greedySuffix = "->" | ||
|
||
defaultJoinString = " " | ||
|
||
errParsingFailure = errors.New("parsing failure") | ||
errInvalidTokenizer = errors.New("invalid dissect tokenizer") | ||
errEmpty = errors.New("empty string provided") | ||
errMixedPrefixIndirectAppend = errors.New("mixed prefix `&+`") | ||
errMixedPrefixAppendIndirect = errors.New("mixed prefix `&+`") | ||
errEmptyKey = errors.New("empty key") | ||
) |
Oops, something went wrong.