diff --git a/main.go b/main.go index a24f6fae7d0e..a21e28a26f60 100644 --- a/main.go +++ b/main.go @@ -60,6 +60,7 @@ var ( gitScanURI = gitScan.Arg("uri", "Git repository URL. https://, file://, or ssh:// schema expected.").Required().String() gitScanIncludePaths = gitScan.Flag("include-paths", "Path to file with newline separated regexes for files to include in scan.").Short('i').String() gitScanExcludePaths = gitScan.Flag("exclude-paths", "Path to file with newline separated regexes for files to exclude in scan.").Short('x').String() + gitScanExcludeGlobs = gitScan.Flag("exclude-globs", "Comma separated list of globs to exclude in scan. This option filters at the `git log` level, resulting in faster scans.").String() gitScanSinceCommit = gitScan.Flag("since-commit", "Commit to start scan from.").String() gitScanBranch = gitScan.Flag("branch", "Branch to scan.").String() gitScanMaxDepth = gitScan.Flag("max-depth", "Maximum depth of commits to scan.").Int() @@ -301,13 +302,18 @@ func run(state overseer.State) { if remote { defer os.RemoveAll(repoPath) } + excludedGlobs := []string{} + if *gitScanExcludeGlobs != "" { + excludedGlobs = strings.Split(*gitScanExcludeGlobs, ",") + } cfg := sources.GitConfig{ - RepoPath: repoPath, - HeadRef: *gitScanBranch, - BaseRef: *gitScanSinceCommit, - MaxDepth: *gitScanMaxDepth, - Filter: filter, + RepoPath: repoPath, + HeadRef: *gitScanBranch, + BaseRef: *gitScanSinceCommit, + MaxDepth: *gitScanMaxDepth, + Filter: filter, + ExcludeGlobs: excludedGlobs, } if err = e.ScanGit(ctx, cfg); err != nil { logFatal(err, "Failed to scan Git.") diff --git a/pkg/engine/git.go b/pkg/engine/git.go index ac4db85271bc..410d1e7d751e 100644 --- a/pkg/engine/git.go +++ b/pkg/engine/git.go @@ -36,6 +36,9 @@ func (e *Engine) ScanGit(ctx context.Context, c sources.GitConfig) error { if c.HeadRef != "" { opts = append(opts, git.ScanOptionHeadCommit(c.HeadRef)) } + if c.ExcludeGlobs != nil { + opts = append(opts, git.ScanOptionExcludeGlobs(c.ExcludeGlobs)) + } scanOptions := git.NewScanOptions(opts...) gitSource := git.NewGit(sourcespb.SourceType_SOURCE_TYPE_GIT, 0, 0, "trufflehog - git", true, runtime.NumCPU(), diff --git a/pkg/gitparse/gitparse.go b/pkg/gitparse/gitparse.go index 95a28e195f1b..429064e2917c 100644 --- a/pkg/gitparse/gitparse.go +++ b/pkg/gitparse/gitparse.go @@ -116,7 +116,7 @@ func (c1 *Commit) Equal(c2 *Commit) bool { } // RepoPath parses the output of the `git log` command for the `source` path. -func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool) (chan Commit, error) { +func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string) (chan Commit, error) { args := []string{"-C", source, "log", "-p", "-U5", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"} if abbreviatedLog { args = append(args, "--diff-filter=AM") @@ -126,9 +126,11 @@ func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbre } else { args = append(args, "--all") } + for _, glob := range excludedGlobs { + args = append(args, "--", ".", fmt.Sprintf(":(exclude)%s", glob)) + } cmd := exec.Command("git", args...) - absPath, err := filepath.Abs(source) if err == nil { cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git"))) diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 2b9a25fd36da..8337b37c4bc5 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -338,7 +338,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string return err } - commitChan, err := gitparse.NewParser().RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "") + commitChan, err := gitparse.NewParser().RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "", scanOptions.ExcludeGlobs) if err != nil { return err } diff --git a/pkg/sources/git/scan_options.go b/pkg/sources/git/scan_options.go index d68c140586aa..09572a6fb408 100644 --- a/pkg/sources/git/scan_options.go +++ b/pkg/sources/git/scan_options.go @@ -6,11 +6,12 @@ import ( ) type ScanOptions struct { - Filter *common.Filter - BaseHash string // When scanning a git.Log, this is the oldest/first commit. - HeadHash string - MaxDepth int64 - LogOptions *git.LogOptions + Filter *common.Filter + BaseHash string // When scanning a git.Log, this is the oldest/first commit. + HeadHash string + MaxDepth int64 + ExcludeGlobs []string + LogOptions *git.LogOptions } type ScanOption func(*ScanOptions) @@ -39,6 +40,12 @@ func ScanOptionMaxDepth(maxDepth int64) ScanOption { } } +func ScanOptionExcludeGlobs(globs []string) ScanOption { + return func(scanOptions *ScanOptions) { + scanOptions.ExcludeGlobs = globs + } +} + func ScanOptionLogOptions(logOptions *git.LogOptions) ScanOption { return func(scanOptions *ScanOptions) { scanOptions.LogOptions = logOptions diff --git a/pkg/sources/sources.go b/pkg/sources/sources.go index aa41abaff56b..19b2a5de6517 100644 --- a/pkg/sources/sources.go +++ b/pkg/sources/sources.go @@ -83,6 +83,9 @@ type GitConfig struct { MaxDepth int // Filter is the filter to use to scan the source. Filter *common.Filter + // ExcludeGlobs is a list of globs to exclude from the scan. + // This differs from the Filter exclusions as ExcludeGlobs is applied at the `git log -p` level + ExcludeGlobs []string } // GithubConfig defines the optional configuration for a github source.