From 4f6ee2bcf191931331020daea8bf6c50d17e9290 Mon Sep 17 00:00:00 2001 From: Gawan Schroeder <34353307+gawansch@users.noreply.github.com> Date: Fri, 16 Sep 2022 15:18:31 +0200 Subject: [PATCH] Add baseline (#975) * Add baseline * Update doc, add error, move baseline to detect namespace, ignore findings instead of reactively filter them out * Update detect/detect.go Co-authored-by: Zachary Rice * Update IsNew function (no check on tags - omit finger print check) * Update README.md Co-authored-by: Zachary Rice * Update examples in readme to make it ensure it's clear that a baseline is indeed a gitleaks report * Fix test - updated tags doesn't make a finding new * Add missing err assignment * Allow scanner to continue without baseline if file is malformed * Fix typo in comment * Fix control flow err. (Real life testing) * Fix wording * Auto-ignore baseline path --- README.md | 18 ++++ cmd/detect.go | 9 ++ cmd/root.go | 1 + detect/baseline.go | 58 +++++++++++++ detect/baseline_test.go | 137 +++++++++++++++++++++++++++++++ detect/detect.go | 25 +++++- testdata/baseline/baseline.csv | 2 + testdata/baseline/baseline.json | 40 +++++++++ testdata/baseline/baseline.sarif | 6 ++ 9 files changed, 295 insertions(+), 1 deletion(-) create mode 100644 detect/baseline.go create mode 100644 detect/baseline_test.go create mode 100644 testdata/baseline/baseline.csv create mode 100644 testdata/baseline/baseline.json create mode 100644 testdata/baseline/baseline.sarif diff --git a/README.md b/README.md index 5f35dca47..e9ced3160 100644 --- a/README.md +++ b/README.md @@ -156,6 +156,7 @@ Flags: --redact redact secrets from logs and stdout -f, --report-format string output format (json, csv, sarif) -r, --report-path string report file + -b, --baseline-path path to a previously generated report with known issues that gitleaks should ignore -s, --source string path to source (git repo, directory, file) -v, --verbose show verbose output from scan @@ -190,6 +191,23 @@ as a pre-commit. **NOTE**: the `protect` command can only be used on git repos, running `protect` on files or directories will result in an error message. +### Creating a baseline + +When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline, +gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter. + +``` +gitleaks detect --report-path gitleaks-report.json # This will save the report in a file called gitleaks-report.json +``` + +Once as baseline is created it can be applied when running the detect command again: + +``` +gitleaks detect --baseline-path gitleaks-report.json --report-path findings.json +``` + +After running the detect command with the --baseline-path parameter, report output (findings.json) will only contain new issues. + ### Verify Findings You can verify a finding found by gitleaks using a `git log` command. diff --git a/cmd/detect.go b/cmd/detect.go index d92b26d41..05694d158 100644 --- a/cmd/detect.go +++ b/cmd/detect.go @@ -75,6 +75,15 @@ func runDetect(cmd *cobra.Command, args []string) { detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore")) } + // ignore findings from the baseline (an existing report in json format generated earlier) + baselinePath, _ := cmd.Flags().GetString("baseline-path") + if baselinePath != "" { + err = detector.AddBaseline(baselinePath) + if err != nil { + log.Error().Msgf("Could not load baseline. The path must point of a gitleaks report generated using the default format: %s", err) + } + } + // set exit code exitCode, err := cmd.Flags().GetInt("exit-code") if err != nil { diff --git a/cmd/root.go b/cmd/root.go index 450934a31..19cd721bb 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -42,6 +42,7 @@ func init() { rootCmd.PersistentFlags().StringP("source", "s", ".", "path to source (default: $PWD)") rootCmd.PersistentFlags().StringP("report-path", "r", "", "report file") rootCmd.PersistentFlags().StringP("report-format", "f", "json", "output format (json, csv, sarif)") + rootCmd.PersistentFlags().StringP("baseline-path", "b", "", "path to baseline with issues that can be ignored") rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)") rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan") rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout") diff --git a/detect/baseline.go b/detect/baseline.go new file mode 100644 index 000000000..9e14cbc51 --- /dev/null +++ b/detect/baseline.go @@ -0,0 +1,58 @@ +package detect + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "os" + + "github.com/zricethezav/gitleaks/v8/report" +) + +func IsNew(finding report.Finding, baseline []report.Finding) bool { + // Explicitly testing each property as it gives significantly better performance in comparison to cmp.Equal(). Drawback is that + // the code requires maintanance if/when the Finding struct changes + for _, b := range baseline { + + if finding.Author == b.Author && + finding.Commit == b.Commit && + finding.Date == b.Date && + finding.Description == b.Description && + finding.Email == b.Email && + finding.EndColumn == b.EndColumn && + finding.EndLine == b.EndLine && + finding.Entropy == b.Entropy && + finding.File == b.File && + // Omit checking finding.Fingerprint - if the format of the fingerprint changes, the users will see unexpected behaviour + finding.Match == b.Match && + finding.Message == b.Message && + finding.RuleID == b.RuleID && + finding.Secret == b.Secret && + finding.StartColumn == b.StartColumn && + finding.StartLine == b.StartLine { + return false + } + } + return true +} + +func LoadBaseline(baselinePath string) ([]report.Finding, error) { + var previousFindings []report.Finding + jsonFile, err := os.Open(baselinePath) + if err != nil { + return nil, fmt.Errorf("could not open %s", baselinePath) + } + + bytes, err := ioutil.ReadAll(jsonFile) + jsonFile.Close() + if err != nil { + return nil, fmt.Errorf("could not read data from the file %s", baselinePath) + } + + err = json.Unmarshal(bytes, &previousFindings) + if err != nil { + return nil, fmt.Errorf("the format of the file %s is not supported", baselinePath) + } + + return previousFindings, nil +} diff --git a/detect/baseline_test.go b/detect/baseline_test.go new file mode 100644 index 000000000..eb8983945 --- /dev/null +++ b/detect/baseline_test.go @@ -0,0 +1,137 @@ +package detect + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/zricethezav/gitleaks/v8/report" +) + +func TestIsNew(t *testing.T) { + tests := []struct { + findings report.Finding + baseline []report.Finding + expect bool + }{ + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0000", + }, + }, + expect: false, + }, + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0002", + }, + }, + expect: true, + }, + { + findings: report.Finding{ + Author: "a", + Commit: "0000", + Tags: []string{"a", "b"}, + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "0000", + Tags: []string{"a", "c"}, + }, + }, + expect: false, // Updated tags doesn't make it a new finding + }, + } + for _, test := range tests { + assert.Equal(t, test.expect, IsNew(test.findings, test.baseline)) + } +} + +func TestFileLoadBaseline(t *testing.T) { + tests := []struct { + Filename string + ExpectedError error + }{ + { + Filename: "../testdata/baseline/baseline.csv", + ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.csv is not supported"), + }, + { + Filename: "../testdata/baseline/baseline.sarif", + ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.sarif is not supported"), + }, + { + Filename: "../testdata/baseline/notfound.json", + ExpectedError: errors.New("could not open ../testdata/baseline/notfound.json"), + }, + } + + for _, test := range tests { + _, err := LoadBaseline(test.Filename) + assert.Equal(t, test.ExpectedError.Error(), err.Error()) + } +} + +func TestIgnoreIssuesInBaseline(t *testing.T) { + tests := []struct { + findings []report.Finding + baseline []report.Finding + expectCount int + }{ + { + findings: []report.Finding{ + { + Author: "a", + Commit: "5", + }, + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "5", + }, + }, + expectCount: 0, + }, + { + findings: []report.Finding{ + { + Author: "a", + Commit: "5", + Fingerprint: "a", + }, + }, + baseline: []report.Finding{ + { + Author: "a", + Commit: "5", + Fingerprint: "b", + }, + }, + expectCount: 0, + }, + } + + for _, test := range tests { + d, _ := NewDetectorDefaultConfig() + d.baseline = test.baseline + for _, finding := range test.findings { + d.addFinding(finding) + } + assert.Equal(t, test.expectCount, len(d.findings)) + } +} diff --git a/detect/detect.go b/detect/detect.go index 32dc51030..6962b4a6c 100644 --- a/detect/detect.go +++ b/detect/detect.go @@ -66,6 +66,12 @@ type Detector struct { // matching given a set of words (keywords from the rules in the config) prefilter ahocorasick.AhoCorasick + // a list of known findings that should be ignored + baseline []report.Finding + + // path to baseline + baselinePath string + // gitleaksIgnore gitleaksIgnore map[string]bool } @@ -145,6 +151,18 @@ func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error { return nil } +func (d *Detector) AddBaseline(baselinePath string) error { + if baselinePath != "" { + baseline, err := LoadBaseline(baselinePath) + if err != nil { + return err + } + d.baseline = baseline + } + d.baselinePath = baselinePath + return nil +} + // DetectBytes scans the given bytes and returns a list of findings func (d *Detector) DetectBytes(content []byte) []report.Finding { return d.DetectString(string(content)) @@ -424,7 +442,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding { // check if filepath is allowed if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) || - fragment.FilePath == d.Config.Path) { + fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) { return findings } @@ -473,6 +491,11 @@ func (d *Detector) addFinding(finding report.Finding) { return } + if d.baseline != nil && !IsNew(finding, d.baseline) { + log.Debug().Msgf("baseline duplicate -- ignoring finding with Fingerprint %s", finding.Fingerprint) + return + } + d.findingMutex.Lock() d.findings = append(d.findings, finding) if d.Verbose { diff --git a/testdata/baseline/baseline.csv b/testdata/baseline/baseline.csv new file mode 100644 index 000000000..d3f953727 --- /dev/null +++ b/testdata/baseline/baseline.csv @@ -0,0 +1,2 @@ +RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint +1,b,c,f,s,m,s,e,s,e,a,m,f,r,f \ No newline at end of file diff --git a/testdata/baseline/baseline.json b/testdata/baseline/baseline.json new file mode 100644 index 000000000..3a4c5427f --- /dev/null +++ b/testdata/baseline/baseline.json @@ -0,0 +1,40 @@ +[ + { + "Description": "PyPI upload token", + "StartLine": 32, + "EndLine": 32, + "StartColumn": 21, + "EndColumn": 106, + "Match": "************************", + "Secret": "************************", + "File": "detect/detect_test.go", + "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2", + "Entropy": 1.9606875, + "Author": "****", + "Email": "****", + "Date": "2022-03-07T14:33:06Z", + "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test", + "Tags": [], + "RuleID": "pypi-upload-token", + "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:32" + }, + { + "Description": "PyPI upload token", + "StartLine": 33, + "EndLine": 33, + "StartColumn": 21, + "EndColumn": 106, + "Match": "************************", + "Secret": "************************", + "File": "detect/detect_test.go", + "Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2", + "Entropy": 1.9606875, + "Author": "****", + "Email": "****", + "Date": "2022-03-07T14:33:06Z", + "Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test", + "Tags": [], + "RuleID": "pypi-upload-token", + "Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:33" + } +] diff --git a/testdata/baseline/baseline.sarif b/testdata/baseline/baseline.sarif new file mode 100644 index 000000000..bbaaf0149 --- /dev/null +++ b/testdata/baseline/baseline.sarif @@ -0,0 +1,6 @@ +{ + "$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json", + "version": "2.1.0", + "runs": [ + ] +}