From 10778e2ada59c7fb7b3add6f8193285307b46c62 Mon Sep 17 00:00:00 2001 From: Dan Jaglowski Date: Thu, 16 Sep 2021 15:47:08 -0400 Subject: [PATCH 1/2] Refactor file finding into a testable struct --- operator/builtin/input/file/config.go | 7 +- operator/builtin/input/file/config_test.go | 2 +- operator/builtin/input/file/file.go | 37 +---- operator/builtin/input/file/file_test.go | 43 ------ operator/builtin/input/file/finder.go | 52 +++++++ operator/builtin/input/file/finder_test.go | 162 +++++++++++++++++++++ operator/builtin/input/file/util_test.go | 11 -- 7 files changed, 222 insertions(+), 92 deletions(-) create mode 100644 operator/builtin/input/file/finder.go create mode 100644 operator/builtin/input/file/finder_test.go diff --git a/operator/builtin/input/file/config.go b/operator/builtin/input/file/config.go index 64cfd1be..4d8e1f31 100644 --- a/operator/builtin/input/file/config.go +++ b/operator/builtin/input/file/config.go @@ -55,9 +55,7 @@ func NewInputConfig(operatorID string) *InputConfig { // InputConfig is the configuration of a file input operator type InputConfig struct { helper.InputConfig `mapstructure:",squash" yaml:",inline"` - - Include []string `mapstructure:"include,omitempty" json:"include,omitempty" yaml:"include,omitempty"` - Exclude []string `mapstructure:"exclude,omitempty" json:"exclude,omitempty" yaml:"exclude,omitempty"` + Finder `mapstructure:",squash" yaml:",inline"` PollInterval helper.Duration `mapstructure:"poll_interval,omitempty" json:"poll_interval,omitempty" yaml:"poll_interval,omitempty"` IncludeFileName bool `mapstructure:"include_file_name,omitempty" json:"include_file_name,omitempty" yaml:"include_file_name,omitempty"` @@ -156,8 +154,7 @@ func (c InputConfig) Build(context operator.BuildContext) ([]operator.Operator, op := &InputOperator{ InputOperator: inputOperator, - Include: c.Include, - Exclude: c.Exclude, + finder: c.Finder, PollInterval: c.PollInterval.Raw(), FilePathField: filePathField, FileNameField: fileNameField, diff --git a/operator/builtin/input/file/config_test.go b/operator/builtin/input/file/config_test.go index 98125e21..9af05fd7 100644 --- a/operator/builtin/input/file/config_test.go +++ b/operator/builtin/input/file/config_test.go @@ -539,7 +539,7 @@ func TestBuild(t *testing.T) { require.NoError, func(t *testing.T, f *InputOperator) { require.Equal(t, f.OutputOperators[0], fakeOutput) - require.Equal(t, f.Include, []string{"/var/log/testpath.*"}) + require.Equal(t, f.finder.Include, []string{"/var/log/testpath.*"}) require.Equal(t, f.FilePathField, entry.NewNilField()) require.Equal(t, f.FileNameField, entry.NewAttributeField("file.name")) require.Equal(t, f.PollInterval, 10*time.Millisecond) diff --git a/operator/builtin/input/file/file.go b/operator/builtin/input/file/file.go index a6b10bc5..7c32a36a 100644 --- a/operator/builtin/input/file/file.go +++ b/operator/builtin/input/file/file.go @@ -20,11 +20,9 @@ import ( "encoding/json" "fmt" "os" - "path/filepath" "sync" "time" - "github.com/bmatcuk/doublestar/v3" "go.uber.org/zap" "github.com/open-telemetry/opentelemetry-log-collection/entry" @@ -36,8 +34,7 @@ import ( type InputOperator struct { helper.InputOperator - Include []string - Exclude []string + finder Finder FilePathField entry.Field FileNameField entry.Field FilePathResolvedField entry.Field @@ -137,9 +134,11 @@ func (f *InputOperator) poll(ctx context.Context) { } // Get the list of paths on disk - matches = getMatches(f.Include, f.Exclude) + matches = f.finder.FindFiles() if f.firstCheck && len(matches) == 0 { - f.Warnw("no files match the configured include patterns", "include", f.Include) + f.Warnw("no files match the configured include patterns", + "include", f.finder.Include, + "exclude", f.finder.Exclude) } else if len(matches) > f.maxBatchFiles { matches, f.queuedMatches = matches[:f.maxBatchFiles], matches[f.maxBatchFiles:] } @@ -192,32 +191,6 @@ OUTER: f.syncLastPollFiles(ctx) } -// getMatches gets a list of paths given an array of glob patterns to include and exclude -func getMatches(includes, excludes []string) []string { - all := make([]string, 0, len(includes)) - for _, include := range includes { - matches, _ := filepath.Glob(include) // compile error checked in build - INCLUDE: - for _, match := range matches { - for _, exclude := range excludes { - if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches { - continue INCLUDE - } - } - - for _, existing := range all { - if existing == match { - continue INCLUDE - } - } - - all = append(all, match) - } - } - - return all -} - // makeReaders takes a list of paths, then creates readers from each of those paths, // discarding any that have a duplicate fingerprint to other files that have already // been read this polling interval diff --git a/operator/builtin/input/file/file_test.go b/operator/builtin/input/file/file_test.go index 6cf01cff..e4464323 100644 --- a/operator/builtin/input/file/file_test.go +++ b/operator/builtin/input/file/file_test.go @@ -929,46 +929,3 @@ func TestEncodings(t *testing.T) { }) } } - -// TestExclude tests that a log file will be excluded if it matches the -// glob specified in the operator. -func TestExclude(t *testing.T) { - tempDir := testutil.NewTempDir(t) - paths := writeTempFiles(tempDir, []string{"include.log", "exclude.log"}) - - includes := []string{filepath.Join(tempDir, "*")} - excludes := []string{filepath.Join(tempDir, "*exclude.log")} - - matches := getMatches(includes, excludes) - require.ElementsMatch(t, matches, paths[:1]) -} -func TestExcludeEmpty(t *testing.T) { - tempDir := testutil.NewTempDir(t) - paths := writeTempFiles(tempDir, []string{"include.log", "exclude.log"}) - - includes := []string{filepath.Join(tempDir, "*")} - excludes := []string{} - - matches := getMatches(includes, excludes) - require.ElementsMatch(t, matches, paths) -} -func TestExcludeMany(t *testing.T) { - tempDir := testutil.NewTempDir(t) - paths := writeTempFiles(tempDir, []string{"a1.log", "a2.log", "b1.log", "b2.log"}) - - includes := []string{filepath.Join(tempDir, "*")} - excludes := []string{filepath.Join(tempDir, "a*.log"), filepath.Join(tempDir, "*2.log")} - - matches := getMatches(includes, excludes) - require.ElementsMatch(t, matches, paths[2:3]) -} -func TestExcludeDuplicates(t *testing.T) { - tempDir := testutil.NewTempDir(t) - paths := writeTempFiles(tempDir, []string{"a1.log", "a2.log", "b1.log", "b2.log"}) - - includes := []string{filepath.Join(tempDir, "*1*"), filepath.Join(tempDir, "a*")} - excludes := []string{filepath.Join(tempDir, "a*.log"), filepath.Join(tempDir, "*2.log")} - - matches := getMatches(includes, excludes) - require.ElementsMatch(t, matches, paths[2:3]) -} diff --git a/operator/builtin/input/file/finder.go b/operator/builtin/input/file/finder.go new file mode 100644 index 00000000..4dc7d8c2 --- /dev/null +++ b/operator/builtin/input/file/finder.go @@ -0,0 +1,52 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "path/filepath" + + "github.com/bmatcuk/doublestar/v3" +) + +type Finder struct { + Include []string `mapstructure:"include,omitempty" json:"include,omitempty" yaml:"include,omitempty"` + Exclude []string `mapstructure:"exclude,omitempty" json:"exclude,omitempty" yaml:"exclude,omitempty"` +} + +// FindFiles gets a list of paths given an array of glob patterns to include and exclude +func (f Finder) FindFiles() []string { + all := make([]string, 0, len(f.Include)) + for _, include := range f.Include { + matches, _ := filepath.Glob(include) // compile error checked in build + INCLUDE: + for _, match := range matches { + for _, exclude := range f.Exclude { + if itMatches, _ := doublestar.PathMatch(exclude, match); itMatches { + continue INCLUDE + } + } + + for _, existing := range all { + if existing == match { + continue INCLUDE + } + } + + all = append(all, match) + } + } + + return all +} diff --git a/operator/builtin/input/file/finder_test.go b/operator/builtin/input/file/finder_test.go new file mode 100644 index 00000000..0a1d744f --- /dev/null +++ b/operator/builtin/input/file/finder_test.go @@ -0,0 +1,162 @@ +// Copyright The OpenTelemetry Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package file + +import ( + "io/ioutil" + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/open-telemetry/opentelemetry-log-collection/testutil" +) + +func TestFinder(t *testing.T) { + t.Parallel() + cases := []struct { + name string + files []string + include []string + exclude []string + expected []string + }{ + { + name: "IncludeOne", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"a1.log"}, + exclude: []string{}, + expected: []string{"a1.log"}, + }, + { + name: "IncludeNone", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"c*.log"}, + exclude: []string{}, + expected: []string{}, + }, + { + name: "IncludeAll", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"*"}, + exclude: []string{}, + expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + }, + { + name: "IncludeLogs", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"*.log"}, + exclude: []string{}, + expected: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + }, + { + name: "IncludeA", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"a*.log"}, + exclude: []string{}, + expected: []string{"a1.log", "a2.log"}, + }, + { + name: "Include2s", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"*2.log"}, + exclude: []string{}, + expected: []string{"a2.log", "b2.log"}, + }, + { + name: "Exclude", + files: []string{"include.log", "exclude.log"}, + include: []string{"*"}, + exclude: []string{"exclude.log"}, + expected: []string{"include.log"}, + }, + // { + // name: "ExcludeEmpty", + // files: []string{"include.log", "exclude.log"}, + // include: []string{"*"}, + // exclude: []string{"exclude.log"}, + // expected: []string{"include.log", "exclude.log"}, + // }, + { + name: "ExcludeMany", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"*"}, + exclude: []string{"a*.log", "*2.log"}, + expected: []string{"b1.log"}, + }, + { + name: "ExcludeDuplicates", + files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, + include: []string{"*1*", "a*"}, + exclude: []string{"a*.log", "*2.log"}, + expected: []string{"b1.log"}, + }, + { + name: "IncludeMultipleDirectories", + files: []string{"a/1.log", "a/2.log", "b/1.log", "b/2.log"}, + include: []string{"a/*.log", "b/*.log"}, + exclude: []string{}, + expected: []string{"a/1.log", "a/2.log", "b/1.log", "b/2.log"}, + }, + { + name: "IncludeMultipleDirectoriesVaryingDepth", + files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + include: []string{"*.log", "a/*.log", "a/b/*.log", "c/*.log"}, + exclude: []string{}, + expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + }, + { + name: "DoubleStarSameDepth", + files: []string{"a/1.log", "b/1.log", "c/1.log"}, + include: []string{"**/*.log"}, + exclude: []string{}, + expected: []string{"a/1.log", "b/1.log", "c/1.log"}, + }, + // { + // name: "DoubleStarVaryingDepth", + // files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + // include: []string{"**/*.log"}, + // exclude: []string{}, + // expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + // }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + tempDir := testutil.NewTempDir(t) + files := absPath(tempDir, tc.files) + include := absPath(tempDir, tc.include) + exclude := absPath(tempDir, tc.exclude) + expected := absPath(tempDir, tc.expected) + + for _, f := range files { + os.MkdirAll(filepath.Dir(f), 0755) + ioutil.WriteFile(f, []byte(filepath.Base(f)), 0755) + } + + finder := Finder{include, exclude} + require.ElementsMatch(t, finder.FindFiles(), expected) + }) + } +} + +func absPath(tempDir string, files []string) []string { + absFiles := make([]string, 0, len(files)) + for _, f := range files { + absFiles = append(absFiles, filepath.Join(tempDir, f)) + } + return absFiles +} diff --git a/operator/builtin/input/file/util_test.go b/operator/builtin/input/file/util_test.go index 2e8dfb07..9e4ead36 100644 --- a/operator/builtin/input/file/util_test.go +++ b/operator/builtin/input/file/util_test.go @@ -186,14 +186,3 @@ func expectNoMessagesUntil(t *testing.T, c chan *entry.Entry, d time.Duration) { case <-time.After(d): } } - -// writes file with the specified file names and returns their full paths in order -func writeTempFiles(tempDir string, names []string) []string { - result := make([]string, 0, len(names)) - for _, name := range names { - path := filepath.Join(tempDir, name) - ioutil.WriteFile(path, []byte(name), 0755) - result = append(result, path) - } - return result -} From 2c3defd984283d2208f53c326a7fd4a50862c299 Mon Sep 17 00:00:00 2001 From: Dan Jaglowski Date: Thu, 16 Sep 2021 15:56:42 -0400 Subject: [PATCH 2/2] Use doublestar for globbing --- internal/tools/go.sum | 1 + operator/builtin/input/file/finder.go | 4 +--- operator/builtin/input/file/finder_test.go | 21 +++++++-------------- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/internal/tools/go.sum b/internal/tools/go.sum index 08676db3..fe9a38b2 100644 --- a/internal/tools/go.sum +++ b/internal/tools/go.sum @@ -283,6 +283,7 @@ github.com/google/trillian v1.3.11/go.mod h1:0tPraVHrSDkA3BO6vKX67zgLXs6SsOAbHEi github.com/google/uuid v0.0.0-20161128191214-064e2069ce9c/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.2.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= diff --git a/operator/builtin/input/file/finder.go b/operator/builtin/input/file/finder.go index 4dc7d8c2..3fb260c6 100644 --- a/operator/builtin/input/file/finder.go +++ b/operator/builtin/input/file/finder.go @@ -15,8 +15,6 @@ package file import ( - "path/filepath" - "github.com/bmatcuk/doublestar/v3" ) @@ -29,7 +27,7 @@ type Finder struct { func (f Finder) FindFiles() []string { all := make([]string, 0, len(f.Include)) for _, include := range f.Include { - matches, _ := filepath.Glob(include) // compile error checked in build + matches, _ := doublestar.Glob(include) // compile error checked in build INCLUDE: for _, match := range matches { for _, exclude := range f.Exclude { diff --git a/operator/builtin/input/file/finder_test.go b/operator/builtin/input/file/finder_test.go index 0a1d744f..615ca396 100644 --- a/operator/builtin/input/file/finder_test.go +++ b/operator/builtin/input/file/finder_test.go @@ -83,13 +83,6 @@ func TestFinder(t *testing.T) { exclude: []string{"exclude.log"}, expected: []string{"include.log"}, }, - // { - // name: "ExcludeEmpty", - // files: []string{"include.log", "exclude.log"}, - // include: []string{"*"}, - // exclude: []string{"exclude.log"}, - // expected: []string{"include.log", "exclude.log"}, - // }, { name: "ExcludeMany", files: []string{"a1.log", "a2.log", "b1.log", "b2.log"}, @@ -125,13 +118,13 @@ func TestFinder(t *testing.T) { exclude: []string{}, expected: []string{"a/1.log", "b/1.log", "c/1.log"}, }, - // { - // name: "DoubleStarVaryingDepth", - // files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, - // include: []string{"**/*.log"}, - // exclude: []string{}, - // expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, - // }, + { + name: "DoubleStarVaryingDepth", + files: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + include: []string{"**/*.log"}, + exclude: []string{}, + expected: []string{"1.log", "a/1.log", "a/b/1.log", "c/1.log"}, + }, } for _, tc := range cases {