diff --git a/.stylist.yml b/.stylist.yml index bb50d22..8ffb212 100644 --- a/.stylist.yml +++ b/.stylist.yml @@ -1,7 +1,3 @@ -excludes: - - dist/** - - .cspellcache - - coverage.out output: show_context: true show_url: true diff --git a/Makefile b/Makefile index bec2512..fb4dcbb 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ BUILD_DIR := ./dist DST_DIR := /usr/local/bin CURRENT_SHA = $(shell git rev-parse --short HEAD) -CURRENT_TS = $(shell date -I seconds) +CURRENT_TS = $(shell date -Iseconds) ##@ App diff --git a/cspell.yml b/cspell.yml index c7a4821..10d549a 100644 --- a/cspell.yml +++ b/cspell.yml @@ -39,6 +39,7 @@ words: - gopls - goreleaser - gosec + - gostub - hadolint - infof - iostreams diff --git a/go.mod b/go.mod index 146d911..745a808 100644 --- a/go.mod +++ b/go.mod @@ -6,9 +6,11 @@ require ( github.com/alecthomas/chroma/v2 v2.12.0 github.com/bmatcuk/doublestar/v4 v4.6.1 github.com/deckarep/golang-set/v2 v2.5.0 + github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817 github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 github.com/imdario/mergo v1.0.0 github.com/owenrumney/go-sarif/v2 v2.3.0 + github.com/prashantv/gostub v1.1.0 github.com/sirupsen/logrus v1.9.3 github.com/sourcegraph/go-diff v0.7.0 github.com/spf13/cobra v1.8.0 @@ -28,6 +30,7 @@ require ( github.com/briandowns/spinner v1.23.0 // indirect github.com/caarlos0/env/v8 v8.0.0 // indirect github.com/creasty/defaults v1.7.0 // indirect + github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 // indirect github.com/davecgh/go-spew v1.1.1 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect github.com/fatih/color v1.16.0 // indirect diff --git a/go.sum b/go.sum index 821c358..c16a519 100644 --- a/go.sum +++ b/go.sum @@ -25,11 +25,15 @@ github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= github.com/creasty/defaults v1.7.0 h1:eNdqZvc5B509z18lD8yc212CAqJNvfT1Jq6L8WowdBA= github.com/creasty/defaults v1.7.0/go.mod h1:iGzKe6pbEHnpMPtfDXZEr0NVxWnPTjb1bbDy08fPzYM= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/deckarep/golang-set/v2 v2.5.0 h1:hn6cEZtQ0h3J8kFrHR/NrzyOoTnjgW1+FmNJzQ7y/sA= github.com/deckarep/golang-set/v2 v2.5.0/go.mod h1:VAky9rY/yGXJOLEDv3OMci+7wtDpOF4IN+y82NBOac4= +github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817 h1:0nsrg//Dc7xC74H/TZ5sYR8uk4UQRNjsw8zejqH5a4Q= +github.com/denormal/go-gitignore v0.0.0-20180930084346-ae8ad1d07817/go.mod h1:C/+sI4IFnEpCn6VQ3GIPEp+FrQnQw+YQP3+n+GdGq7o= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= @@ -97,6 +101,7 @@ github.com/owenrumney/go-sarif/v2 v2.3.0/go.mod h1:MSqMMx9WqlBSY7pXoOZWgEsVB4FDN github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prashantv/gostub v1.1.0 h1:BTyx3RfQjRHnUWaGF9oQos79AlQ5k8WNktv7VGvVH4g= +github.com/prashantv/gostub v1.1.0/go.mod h1:A5zLQHz7ieHGG7is6LLXLz7I8+3LZzsrV0P1IAHhP5U= github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o= diff --git a/internal/stylist/config.go b/internal/stylist/config.go index 35cc6e4..39fb708 100644 --- a/internal/stylist/config.go +++ b/internal/stylist/config.go @@ -17,7 +17,7 @@ type Config struct { LogLevel LogLevel `yaml:"log_level,omitempty" default:"warn"` Output OutputConfig `yaml:"output,omitempty"` - Excludes []string `yaml:"excludes,omitempty" default:"[\".git\", \"node_modules\"]"` + Excludes []string `yaml:"excludes,omitempty"` Processors []*Processor `yaml:"processors,omitempty"` } diff --git a/internal/stylist/path_ignorer.go b/internal/stylist/path_ignorer.go new file mode 100644 index 0000000..d3833e6 --- /dev/null +++ b/internal/stylist/path_ignorer.go @@ -0,0 +1,68 @@ +package stylist + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/bmatcuk/doublestar/v4" + "github.com/denormal/go-gitignore" + + "github.com/twelvelabs/stylist/internal/fsutils" +) + +var ( + newGitIgnoreParser = gitignore.NewFromFile +) + +// NewPathIgnorer returns a new path ignorer. +func NewPathIgnorer(gitIgnorePath string, patterns []string) (*PathIgnorer, error) { + cleanPatterns := []string{} + for _, pattern := range patterns { + p := strings.ReplaceAll(pattern, "\\", "/") + if !doublestar.ValidatePattern(p) { + return nil, doublestar.ErrBadPattern + } + cleanPatterns = append(cleanPatterns, p) + } + + pi := &PathIgnorer{ + patterns: cleanPatterns, + } + if gitIgnorePath != "" && fsutils.PathExists(gitIgnorePath) { + gitIgnore, err := newGitIgnoreParser(gitIgnorePath) + if err != nil { + return nil, fmt.Errorf("gitignore parse: %w", err) + } + pi.gitIgnore = gitIgnore + pi.gitIgnorePath = gitIgnorePath + } + return pi, nil +} + +// PathIgnorer is responsible for ignoring paths during the index process. +type PathIgnorer struct { + gitIgnore gitignore.GitIgnore + gitIgnorePath string + patterns []string +} + +// ShouldIgnore returns true if path should be ignored. +func (pi *PathIgnorer) ShouldIgnore(path string, isDir bool) bool { + if pi.gitIgnore != nil { + // Note: Specifically using `.Relative` for speed. + // The higher level GitIgnore methods (Ignore, Match) + // stat the path to figure out isDir. + if match := pi.gitIgnore.Relative(path, isDir); match != nil { + if match.Ignore() { + return true + } + } + } + for _, pattern := range pi.patterns { + if ok, _ := doublestar.Match(pattern, filepath.ToSlash(path)); ok { + return true + } + } + return false +} diff --git a/internal/stylist/path_ignorer_test.go b/internal/stylist/path_ignorer_test.go new file mode 100644 index 0000000..2ea8941 --- /dev/null +++ b/internal/stylist/path_ignorer_test.go @@ -0,0 +1,98 @@ +package stylist + +import ( + "errors" + "testing" + + "github.com/prashantv/gostub" + "github.com/stretchr/testify/require" +) + +func TestNewPathIgnorer(t *testing.T) { + require := require.New(t) + + ignorer, err := NewPathIgnorer("", nil) + require.NoError(err) + require.Empty(ignorer.gitIgnore) + require.Empty(ignorer.gitIgnorePath) + require.Empty(ignorer.patterns) + + ignorer, err = NewPathIgnorer("testdata/.gitignore", []string{ + `\Documents\**\*.bat`, + }) + require.NoError(err) + require.NotNil(ignorer.gitIgnore) + require.Equal("testdata/.gitignore", ignorer.gitIgnorePath) + require.Equal([]string{ + "/Documents/**/*.bat", + }, ignorer.patterns) +} + +func TestNewPathIgnorer_WhenGitIgnoreParseError(t *testing.T) { + stubs := gostub.StubFunc(&newGitIgnoreParser, nil, errors.New("boom")) + defer stubs.Reset() + + require := require.New(t) + + ignorer, err := NewPathIgnorer("testdata/.gitignore", nil) + require.ErrorContains(err, "boom") + require.Nil(ignorer) +} + +func TestNewPathIgnorer_WhenPatternValidateError(t *testing.T) { + require := require.New(t) + + ignorer, err := NewPathIgnorer("", []string{ + `*{{*}`, + }) + require.ErrorContains(err, "error in pattern") + require.Nil(ignorer) +} + +func TestPathIgnorer_ShouldIgnore(t *testing.T) { + tests := []struct { + path string + dir bool + expected bool + }{ + { + path: "example.json", + expected: false, + }, + { + path: "example.yaml", + expected: true, + }, + { + path: "ignored_file.txt", + expected: true, + }, + { + path: "ignored_dir", + dir: true, + expected: true, + }, + { + path: "ignored_dir", + dir: false, + expected: false, + }, + { + path: "ignored_dir/allowed.txt", + expected: false, + }, + } + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + require := require.New(t) + + ignorer, err := NewPathIgnorer("testdata/.gitignore", []string{ + "**/*.{yaml,yml}", + }) + require.NoError(err) + + actual := ignorer.ShouldIgnore(tt.path, tt.dir) + require.Equal(tt.expected, actual) + }) + } +} diff --git a/internal/stylist/path_indexer.go b/internal/stylist/path_indexer.go index 8285216..a666fb3 100644 --- a/internal/stylist/path_indexer.go +++ b/internal/stylist/path_indexer.go @@ -1,6 +1,7 @@ package stylist import ( + "context" "errors" "io/fs" "os" @@ -9,6 +10,7 @@ import ( "github.com/bmatcuk/doublestar/v4" mapset "github.com/deckarep/golang-set/v2" + "github.com/sirupsen/logrus" ) var ( @@ -59,6 +61,9 @@ type PathIndexer struct { // Paths grouped by wildcard pattern. PathsByInclude map[string]PathSet + + ignorer *PathIgnorer + logger *logrus.Logger } // Cardinality returns the total number of patterns @@ -71,51 +76,77 @@ func (pi *PathIndexer) Cardinality() int { // to a list of paths and attempts to add them to the index. // Paths will only be added to the index if they match // the types and/or patterns registered with the indexer. -func (pi *PathIndexer) Index(pathSpecs ...string) error { - for _, pathSpec := range pathSpecs { - if err := pi.indexPathSpec(pathSpec); err != nil { - return err - } +func (pi *PathIndexer) Index(ctx context.Context, pathSpecs ...string) error { + ignorer, err := NewPathIgnorer(".gitignore", pi.Excludes.ToSlice()) + if err != nil { + return err } - return nil -} + pi.ignorer = ignorer + pi.logger = AppLogger(ctx) -// indexPathSpec dispatches to the appropriate index method for the given type. -func (pi *PathIndexer) indexPathSpec(pathSpec string) error { - if strings.ContainsAny(pathSpec, patternChars) { - return pi.indexPattern(pathSpec) - } - info, err := os.Stat(pathSpec) + files, dirs, patterns, err := pi.partitionPathSpecs(pathSpecs) if err != nil { return err } - if info.IsDir() { - return pi.indexDir(pathSpec) + + for _, f := range files { + if err := pi.indexPath(f); err != nil { + return err + } + } + for _, d := range dirs { + if err := pi.indexDir(d); err != nil { + return err + } + } + if len(patterns) > 0 { + if err := pi.indexPatterns(patterns); err != nil { + return err + } } - return pi.indexPath(pathSpec) + return nil } -// indexPattern walks every path in pattern and calls indexWalkedPath(). -func (pi *PathIndexer) indexPattern(pattern string) error { - pattern = filepath.ToSlash(filepath.Clean(pattern)) +func (pi *PathIndexer) partitionPathSpecs(pathSpecs []string) ( + []string, []string, []string, error, +) { + var fileSpecs []string + var dirSpecs []string + var patternSpecs []string + + for _, pathSpec := range pathSpecs { + if strings.ContainsAny(pathSpec, patternChars) { + pattern := filepath.ToSlash(filepath.Clean(pathSpec)) + + base, _ := doublestar.SplitPattern(pattern) + if _, err := os.Lstat(base); err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil, nil, doublestar.ErrPatternNotExist + } + return nil, nil, nil, err + } - // Replicate the pattern validation logic from doublestar's Glob* functions. - base, _ := doublestar.SplitPattern(pattern) - if _, err := os.Lstat(base); err != nil { - if errors.Is(err, os.ErrNotExist) { - return doublestar.ErrPatternNotExist + patternSpecs = append(patternSpecs, pattern) + continue + } + + info, err := os.Lstat(pathSpec) + if err != nil { + return nil, nil, nil, err + } + if info.IsDir() { + dirSpecs = append(dirSpecs, pathSpec) + } else { + fileSpecs = append(fileSpecs, pathSpec) } - return err } - // Note: We're intentionally using `filepath.WalkDir` (vs. using doublestar's - // Glob or GlobWalk functions) because this allows us to skip excluded dirs - // (see logic in indexWalkedPath). - // For most projects this ends up being way faster, especially projects - // containing large cache or node_modules directories. - // Skipping those dirs can cut the time in half. + return fileSpecs, dirSpecs, patternSpecs, nil +} + +func (pi *PathIndexer) indexPatterns(patterns []string) error { return filepath.WalkDir(".", func(path string, d fs.DirEntry, err error) error { - return pi.indexWalkedPath(path, d, err, pattern) + return pi.indexWalkedPath(path, d, err, patterns...) }) } @@ -139,11 +170,7 @@ func (pi *PathIndexer) indexWalkedPath( // and if so, return `fs.SkipDir`. This is a sentinel that `filepath.WalkDir` // uses to determine whether to recurse into subdirectories. if d.IsDir() { - ok, err := pi.exclude(path) - if err != nil { - return err - } - if ok { + if pi.exclude(path, true) { return fs.SkipDir } return nil @@ -174,11 +201,7 @@ func (pi *PathIndexer) indexWalkedPath( // indexPath adds path to the index if it matches. func (pi *PathIndexer) indexPath(path string) error { // Check if it's excluded - ok, err := pi.exclude(path) - if err != nil { - return err - } - if ok { + if pi.exclude(path, false) { return nil } @@ -195,17 +218,12 @@ func (pi *PathIndexer) indexPath(path string) error { } // exclude returns true if path should be excluded. -func (pi *PathIndexer) exclude(path string) (bool, error) { - for exclude := range pi.Excludes.Iter() { - ok, err := matchPattern(exclude, path) - if err != nil { - return false, err // doublestar parse error - } - if ok { - return true, nil - } +func (pi *PathIndexer) exclude(path string, isDir bool) bool { + if pi.ignorer.ShouldIgnore(path, isDir) { + pi.logger.Debugf("Index ignore dir=%v path=%s", isDir, path) + return true } - return false, nil + return false } // match returns the patterns that match path. diff --git a/internal/stylist/path_indexer_test.go b/internal/stylist/path_indexer_test.go index cf79db2..7ba844c 100644 --- a/internal/stylist/path_indexer_test.go +++ b/internal/stylist/path_indexer_test.go @@ -1,6 +1,7 @@ package stylist import ( + "context" "testing" "github.com/stretchr/testify/assert" @@ -157,7 +158,10 @@ func TestPathIndexer_Index(t *testing.T) { } for _, tt := range tests { t.Run(tt.desc, func(t *testing.T) { - err := tt.indexer.Index(tt.pathSpec) + app := NewTestApp() + ctx := app.InitContext(context.Background()) + + err := tt.indexer.Index(ctx, tt.pathSpec) if tt.err == "" { assert.NoError(t, err) diff --git a/internal/stylist/pipeline.go b/internal/stylist/pipeline.go index 7ac08be..bf43acc 100644 --- a/internal/stylist/pipeline.go +++ b/internal/stylist/pipeline.go @@ -35,10 +35,11 @@ func (p *Pipeline) Index(ctx context.Context, pathSpecs []string) error { logger := AppLogger(ctx) // Aggregate each processor's include patterns - includes := []string{} + includeSet := NewPathSet() for _, processor := range p.processors { - includes = append(includes, processor.Includes...) + includeSet.Append(processor.Includes...) } + includes := includeSet.ToSlice() startedAt := time.Now() logger.Debugf( @@ -47,13 +48,12 @@ func (p *Pipeline) Index(ctx context.Context, pathSpecs []string) error { p.excludes, ) - // TODO: support passing Context to the indexer // Create an index of paths (resolved from the path specs), // matching any of the include patterns used by our processors. // Doing this once is _much_ faster than once per-processor, // especially when dealing w/ very large projects and many processors or patterns. indexer := NewPathIndexer(includes, p.excludes) - if err := indexer.Index(pathSpecs...); err != nil { + if err := indexer.Index(ctx, pathSpecs...); err != nil { return err } diff --git a/internal/stylist/testdata/.gitignore b/internal/stylist/testdata/.gitignore new file mode 100644 index 0000000..7f88baf --- /dev/null +++ b/internal/stylist/testdata/.gitignore @@ -0,0 +1,4 @@ +ignored_file.* + +ignored_dir/ +!ignored_dir/allowed.txt