Skip to content

Commit

Permalink
Add baseline (gitleaks#975)
Browse files Browse the repository at this point in the history
* Add baseline

* Update doc, add error, move baseline to detect namespace, ignore findings instead of reactively filter them out

* Update detect/detect.go

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update IsNew function (no check on tags - omit finger print check)

* Update README.md

Co-authored-by: Zachary Rice <zricezrice@gmail.com>

* Update examples in readme to make it ensure it's clear that a baseline is indeed a gitleaks report

* Fix test - updated tags doesn't make a finding new

* Add missing err assignment

* Allow scanner to continue without baseline if file is malformed

* Fix typo in comment

* Fix control flow err. (Real life testing)

* Fix wording

* Auto-ignore baseline path
  • Loading branch information
gawansch authored Sep 16, 2022
1 parent 6202053 commit 4f6ee2b
Show file tree
Hide file tree
Showing 9 changed files with 295 additions and 1 deletion.
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ Flags:
--redact redact secrets from logs and stdout
-f, --report-format string output format (json, csv, sarif)
-r, --report-path string report file
-b, --baseline-path path to a previously generated report with known issues that gitleaks should ignore
-s, --source string path to source (git repo, directory, file)
-v, --verbose show verbose output from scan
Expand Down Expand Up @@ -190,6 +191,23 @@ as a pre-commit.

**NOTE**: the `protect` command can only be used on git repos, running `protect` on files or directories will result in an error message.

### Creating a baseline

When scanning large repositories or repositories with a long history, it can be convenient to use a baseline. When using a baseline,
gitleaks will ignore any old findings that are present in the baseline. A baseline can be any gitleaks report. To create a gitleaks report, run gitleaks with the `--report-path` parameter.

```
gitleaks detect --report-path gitleaks-report.json # This will save the report in a file called gitleaks-report.json
```

Once as baseline is created it can be applied when running the detect command again:

```
gitleaks detect --baseline-path gitleaks-report.json --report-path findings.json
```

After running the detect command with the --baseline-path parameter, report output (findings.json) will only contain new issues.

### Verify Findings

You can verify a finding found by gitleaks using a `git log` command.
Expand Down
9 changes: 9 additions & 0 deletions cmd/detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,15 @@ func runDetect(cmd *cobra.Command, args []string) {
detector.AddGitleaksIgnore(filepath.Join(source, ".gitleaksignore"))
}

// ignore findings from the baseline (an existing report in json format generated earlier)
baselinePath, _ := cmd.Flags().GetString("baseline-path")
if baselinePath != "" {
err = detector.AddBaseline(baselinePath)
if err != nil {
log.Error().Msgf("Could not load baseline. The path must point of a gitleaks report generated using the default format: %s", err)
}
}

// set exit code
exitCode, err := cmd.Flags().GetInt("exit-code")
if err != nil {
Expand Down
1 change: 1 addition & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func init() {
rootCmd.PersistentFlags().StringP("source", "s", ".", "path to source (default: $PWD)")
rootCmd.PersistentFlags().StringP("report-path", "r", "", "report file")
rootCmd.PersistentFlags().StringP("report-format", "f", "json", "output format (json, csv, sarif)")
rootCmd.PersistentFlags().StringP("baseline-path", "b", "", "path to baseline with issues that can be ignored")
rootCmd.PersistentFlags().StringP("log-level", "l", "info", "log level (trace, debug, info, warn, error, fatal)")
rootCmd.PersistentFlags().BoolP("verbose", "v", false, "show verbose output from scan")
rootCmd.PersistentFlags().Bool("redact", false, "redact secrets from logs and stdout")
Expand Down
58 changes: 58 additions & 0 deletions detect/baseline.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package detect

import (
"encoding/json"
"fmt"
"io/ioutil"
"os"

"github.com/zricethezav/gitleaks/v8/report"
)

func IsNew(finding report.Finding, baseline []report.Finding) bool {
// Explicitly testing each property as it gives significantly better performance in comparison to cmp.Equal(). Drawback is that
// the code requires maintanance if/when the Finding struct changes
for _, b := range baseline {

if finding.Author == b.Author &&
finding.Commit == b.Commit &&
finding.Date == b.Date &&
finding.Description == b.Description &&
finding.Email == b.Email &&
finding.EndColumn == b.EndColumn &&
finding.EndLine == b.EndLine &&
finding.Entropy == b.Entropy &&
finding.File == b.File &&
// Omit checking finding.Fingerprint - if the format of the fingerprint changes, the users will see unexpected behaviour
finding.Match == b.Match &&
finding.Message == b.Message &&
finding.RuleID == b.RuleID &&
finding.Secret == b.Secret &&
finding.StartColumn == b.StartColumn &&
finding.StartLine == b.StartLine {
return false
}
}
return true
}

func LoadBaseline(baselinePath string) ([]report.Finding, error) {
var previousFindings []report.Finding
jsonFile, err := os.Open(baselinePath)
if err != nil {
return nil, fmt.Errorf("could not open %s", baselinePath)
}

bytes, err := ioutil.ReadAll(jsonFile)
jsonFile.Close()
if err != nil {
return nil, fmt.Errorf("could not read data from the file %s", baselinePath)
}

err = json.Unmarshal(bytes, &previousFindings)
if err != nil {
return nil, fmt.Errorf("the format of the file %s is not supported", baselinePath)
}

return previousFindings, nil
}
137 changes: 137 additions & 0 deletions detect/baseline_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
package detect

import (
"errors"
"testing"

"github.com/stretchr/testify/assert"
"github.com/zricethezav/gitleaks/v8/report"
)

func TestIsNew(t *testing.T) {
tests := []struct {
findings report.Finding
baseline []report.Finding
expect bool
}{
{
findings: report.Finding{
Author: "a",
Commit: "0000",
},
baseline: []report.Finding{
{
Author: "a",
Commit: "0000",
},
},
expect: false,
},
{
findings: report.Finding{
Author: "a",
Commit: "0000",
},
baseline: []report.Finding{
{
Author: "a",
Commit: "0002",
},
},
expect: true,
},
{
findings: report.Finding{
Author: "a",
Commit: "0000",
Tags: []string{"a", "b"},
},
baseline: []report.Finding{
{
Author: "a",
Commit: "0000",
Tags: []string{"a", "c"},
},
},
expect: false, // Updated tags doesn't make it a new finding
},
}
for _, test := range tests {
assert.Equal(t, test.expect, IsNew(test.findings, test.baseline))
}
}

func TestFileLoadBaseline(t *testing.T) {
tests := []struct {
Filename string
ExpectedError error
}{
{
Filename: "../testdata/baseline/baseline.csv",
ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.csv is not supported"),
},
{
Filename: "../testdata/baseline/baseline.sarif",
ExpectedError: errors.New("the format of the file ../testdata/baseline/baseline.sarif is not supported"),
},
{
Filename: "../testdata/baseline/notfound.json",
ExpectedError: errors.New("could not open ../testdata/baseline/notfound.json"),
},
}

for _, test := range tests {
_, err := LoadBaseline(test.Filename)
assert.Equal(t, test.ExpectedError.Error(), err.Error())
}
}

func TestIgnoreIssuesInBaseline(t *testing.T) {
tests := []struct {
findings []report.Finding
baseline []report.Finding
expectCount int
}{
{
findings: []report.Finding{
{
Author: "a",
Commit: "5",
},
},
baseline: []report.Finding{
{
Author: "a",
Commit: "5",
},
},
expectCount: 0,
},
{
findings: []report.Finding{
{
Author: "a",
Commit: "5",
Fingerprint: "a",
},
},
baseline: []report.Finding{
{
Author: "a",
Commit: "5",
Fingerprint: "b",
},
},
expectCount: 0,
},
}

for _, test := range tests {
d, _ := NewDetectorDefaultConfig()
d.baseline = test.baseline
for _, finding := range test.findings {
d.addFinding(finding)
}
assert.Equal(t, test.expectCount, len(d.findings))
}
}
25 changes: 24 additions & 1 deletion detect/detect.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,12 @@ type Detector struct {
// matching given a set of words (keywords from the rules in the config)
prefilter ahocorasick.AhoCorasick

// a list of known findings that should be ignored
baseline []report.Finding

// path to baseline
baselinePath string

// gitleaksIgnore
gitleaksIgnore map[string]bool
}
Expand Down Expand Up @@ -145,6 +151,18 @@ func (d *Detector) AddGitleaksIgnore(gitleaksIgnorePath string) error {
return nil
}

func (d *Detector) AddBaseline(baselinePath string) error {
if baselinePath != "" {
baseline, err := LoadBaseline(baselinePath)
if err != nil {
return err
}
d.baseline = baseline
}
d.baselinePath = baselinePath
return nil
}

// DetectBytes scans the given bytes and returns a list of findings
func (d *Detector) DetectBytes(content []byte) []report.Finding {
return d.DetectString(string(content))
Expand Down Expand Up @@ -424,7 +442,7 @@ func (d *Detector) Detect(fragment Fragment) []report.Finding {

// check if filepath is allowed
if fragment.FilePath != "" && (d.Config.Allowlist.PathAllowed(fragment.FilePath) ||
fragment.FilePath == d.Config.Path) {
fragment.FilePath == d.Config.Path || (d.baselinePath != "" && fragment.FilePath == d.baselinePath)) {
return findings
}

Expand Down Expand Up @@ -473,6 +491,11 @@ func (d *Detector) addFinding(finding report.Finding) {
return
}

if d.baseline != nil && !IsNew(finding, d.baseline) {
log.Debug().Msgf("baseline duplicate -- ignoring finding with Fingerprint %s", finding.Fingerprint)
return
}

d.findingMutex.Lock()
d.findings = append(d.findings, finding)
if d.Verbose {
Expand Down
2 changes: 2 additions & 0 deletions testdata/baseline/baseline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
RuleID,Commit,File,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint
1,b,c,f,s,m,s,e,s,e,a,m,f,r,f
40 changes: 40 additions & 0 deletions testdata/baseline/baseline.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
[
{
"Description": "PyPI upload token",
"StartLine": 32,
"EndLine": 32,
"StartColumn": 21,
"EndColumn": 106,
"Match": "************************",
"Secret": "************************",
"File": "detect/detect_test.go",
"Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
"Entropy": 1.9606875,
"Author": "****",
"Email": "****",
"Date": "2022-03-07T14:33:06Z",
"Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
"Tags": [],
"RuleID": "pypi-upload-token",
"Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:32"
},
{
"Description": "PyPI upload token",
"StartLine": 33,
"EndLine": 33,
"StartColumn": 21,
"EndColumn": 106,
"Match": "************************",
"Secret": "************************",
"File": "detect/detect_test.go",
"Commit": "9326f35380636bcbe61e94b0584d1618c4b5c2c2",
"Entropy": 1.9606875,
"Author": "****",
"Email": "****",
"Date": "2022-03-07T14:33:06Z",
"Message": "Escape - character in regex character groups (#802)\n\n* fix char escape\n\n* add test\n\n* fix verbosity in make test",
"Tags": [],
"RuleID": "pypi-upload-token",
"Fingerprint": "9326f35380636bcbe61e94b0584d1618c4b5c2c2:detect/detect_test.go:pypi-upload-token:33"
}
]
6 changes: 6 additions & 0 deletions testdata/baseline/baseline.sarif
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"$schema": "https://schemastore.azurewebsites.net/schemas/json/sarif-2.1.0-rtm.5.json",
"version": "2.1.0",
"runs": [
]
}

0 comments on commit 4f6ee2b

Please sign in to comment.