diff --git a/pkg/engine/git.go b/pkg/engine/git.go index c0bd34d45c4f..1100a56ccc17 100644 --- a/pkg/engine/git.go +++ b/pkg/engine/git.go @@ -4,10 +4,7 @@ import ( "fmt" "runtime" - "github.com/go-errors/errors" gogit "github.com/go-git/go-git/v5" - "github.com/go-git/go-git/v5/plumbing" - "github.com/go-git/go-git/v5/plumbing/object" "github.com/sirupsen/logrus" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -31,53 +28,6 @@ func (e *Engine) ScanGit(ctx context.Context, c sources.Config) error { return fmt.Errorf("could open repo: %s: %w", c.RepoPath, err) } - var baseCommit *object.Commit - if len(c.BaseRef) > 0 { - baseHash := plumbing.NewHash(c.BaseRef) - if !plumbing.IsHash(c.BaseRef) { - base, err := git.TryAdditionalBaseRefs(repo, c.BaseRef) - if err != nil { - return errors.WrapPrefix(err, "unable to resolve base ref", 0) - } else { - c.BaseRef = base.String() - baseCommit, _ = repo.CommitObject(plumbing.NewHash(c.BaseRef)) - } - } else { - baseCommit, err = repo.CommitObject(baseHash) - if err != nil { - return errors.WrapPrefix(err, "unable to resolve base ref", 0) - } - } - } - - var headCommit *object.Commit - if len(c.HeadRef) > 0 { - headHash := plumbing.NewHash(c.HeadRef) - if !plumbing.IsHash(c.HeadRef) { - head, err := git.TryAdditionalBaseRefs(repo, c.HeadRef) - if err != nil { - return errors.WrapPrefix(err, "unable to resolve head ref", 0) - } else { - c.HeadRef = head.String() - headCommit, _ = repo.CommitObject(plumbing.NewHash(c.HeadRef)) - } - } else { - headCommit, err = repo.CommitObject(headHash) - if err != nil { - return errors.WrapPrefix(err, "unable to resolve head ref", 0) - } - } - } - - // If baseCommit is an ancestor of headCommit, update c.BaseRef to be the common ancestor. - if headCommit != nil && baseCommit != nil { - mergeBase, err := headCommit.MergeBase(baseCommit) - if err != nil || len(mergeBase) < 1 { - return errors.WrapPrefix(err, "could not find common base between the given references", 0) - } - c.BaseRef = mergeBase[0].Hash.String() - } - if c.MaxDepth != 0 { opts = append(opts, git.ScanOptionMaxDepth(int64(c.MaxDepth))) } diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index b5cc68313f42..0b56aef23cc0 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -15,6 +15,7 @@ import ( "github.com/go-errors/errors" "github.com/go-git/go-git/v5" "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" "github.com/google/go-github/v42/github" "github.com/rs/zerolog" log "github.com/sirupsen/logrus" @@ -301,6 +302,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string var reachedBase = false log.Debugf("Scanning repo") for commit := range commitChan { + log.Debugf("Scanning commit %s", commit.Hash) if scanOptions.MaxDepth > 0 && depth >= scanOptions.MaxDepth { log.Debugf("reached max depth") break @@ -441,6 +443,9 @@ func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path strin } func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath string, scanOptions *ScanOptions, chunksChan chan *sources.Chunk) error { + if err := normalizeConfig(scanOptions, repo); err != nil { + return err + } start := time.Now().UnixNano() if err := s.ScanCommits(ctx, repo, repoPath, scanOptions, chunksChan); err != nil { return err @@ -453,6 +458,55 @@ func (s *Git) ScanRepo(ctx context.Context, repo *git.Repository, repoPath strin return nil } +func normalizeConfig(scanOptions *ScanOptions, repo *git.Repository) (err error) { + var baseCommit *object.Commit + if len(scanOptions.BaseHash) > 0 { + baseHash := plumbing.NewHash(scanOptions.BaseHash) + if !plumbing.IsHash(scanOptions.BaseHash) { + base, err := TryAdditionalBaseRefs(repo, scanOptions.BaseHash) + if err != nil { + return errors.WrapPrefix(err, "unable to resolve base ref", 0) + } + scanOptions.BaseHash = base.String() + baseCommit, _ = repo.CommitObject(plumbing.NewHash(scanOptions.BaseHash)) + } else { + baseCommit, err = repo.CommitObject(baseHash) + if err != nil { + return errors.WrapPrefix(err, "unable to resolve base ref", 0) + } + } + } + + var headCommit *object.Commit + if len(scanOptions.HeadHash) > 0 { + headHash := plumbing.NewHash(scanOptions.HeadHash) + if !plumbing.IsHash(scanOptions.HeadHash) { + head, err := TryAdditionalBaseRefs(repo, scanOptions.HeadHash) + if err != nil { + return errors.WrapPrefix(err, "unable to resolve head ref", 0) + } + scanOptions.HeadHash = head.String() + headCommit, _ = repo.CommitObject(plumbing.NewHash(scanOptions.HeadHash)) + } else { + headCommit, err = repo.CommitObject(headHash) + if err != nil { + return errors.WrapPrefix(err, "unable to resolve head ref", 0) + } + } + } + + // If baseCommit is an ancestor of headCommit, update c.BaseRef to be the common ancestor. + if headCommit != nil && baseCommit != nil { + mergeBase, err := headCommit.MergeBase(baseCommit) + if err != nil || len(mergeBase) < 1 { + return errors.WrapPrefix(err, "could not find common base between the given references", 0) + } + scanOptions.BaseHash = mergeBase[0].Hash.String() + } + + return nil +} + // GenerateLink crafts a link to the specific file from a commit. This works in most major git providers (Github/Gitlab) func GenerateLink(repo, commit, file string) string { // bitbucket links are commits not commit... diff --git a/pkg/sources/git/git_test.go b/pkg/sources/git/git_test.go index b1ec863a364d..500b748d2522 100644 --- a/pkg/sources/git/git_test.go +++ b/pkg/sources/git/git_test.go @@ -210,19 +210,13 @@ func TestSource_Chunks_Integration(t *testing.T) { name string init init //verified + repoURL string expectedChunkData map[string]*byteCompare + scanOptions ScanOptions }{ { - name: "remote repo, unauthenticated", - init: init{ - name: "test source", - connection: &sourcespb.Git{ - Repositories: []string{"https://github.com/dustin-decker/secretsandstuff.git"}, - Credential: &sourcespb.Git_Unauthenticated{ - Unauthenticated: &credentialspb.Unauthenticated{}, - }, - }, - }, + name: "remote repo, unauthenticated", + repoURL: "https://github.com/dustin-decker/secretsandstuff.git", expectedChunkData: map[string]*byteCompare{ "70001020fab32b1fcf2f1f0e5c66424eae649826-aws": {B: []byte("[default]\naws_access_key_id = AKIAXYZDQCEN4B6JSJQI\naws_secret_access_key = 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\noutput = json\nregion = us-east-2\n")}, "a6f8aa55736d4a85be31a0048a4607396898647a-bump": {B: []byte("\n\nf\n")}, @@ -239,6 +233,40 @@ func TestSource_Chunks_Integration(t *testing.T) { "84e9c75e388ae3e866e121087ea2dd45a71068f2-aws": {B: []byte("\n\nthis is the secret: [Default]\nAccess key Id: AKIAILE3JG6KMS3HZGCA\nSecret Access Key: 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\n\nokay thank you bye\n"), Multi: false}, }, }, + { + name: "remote repo, limited", + repoURL: "https://github.com/dustin-decker/secretsandstuff.git", + expectedChunkData: map[string]*byteCompare{ + "70001020fab32b1fcf2f1f0e5c66424eae649826-aws": {B: []byte("[default]\naws_access_key_id = AKIAXYZDQCEN4B6JSJQI\naws_secret_access_key = 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\noutput = json\nregion = us-east-2\n")}, + "a6f8aa55736d4a85be31a0048a4607396898647a-bump": {B: []byte("\n\nf\n")}, + }, + scanOptions: ScanOptions{ + HeadHash: "70001020fab32b1fcf2f1f0e5c66424eae649826", + BaseHash: "a6f8aa55736d4a85be31a0048a4607396898647a", + }, + }, + { + name: "remote repo, base ahead of head", + repoURL: "https://github.com/dustin-decker/secretsandstuff.git", + expectedChunkData: map[string]*byteCompare{ + "a6f8aa55736d4a85be31a0048a4607396898647a-bump": {B: []byte("\n\nf\n")}, + }, + scanOptions: ScanOptions{ + HeadHash: "a6f8aa55736d4a85be31a0048a4607396898647a", + BaseHash: "70001020fab32b1fcf2f1f0e5c66424eae649826", + }, + }, + { + name: "remote repo, main ahead of branch", + repoURL: "https://github.com/bill-rich/bad-secrets.git", + expectedChunkData: map[string]*byteCompare{ + "547865c6cc0da46622306902b1b66f7e25dd0412-some_branch_file": {B: []byte("[default]\naws_access_key=AKIAYVP4CIPPH5TNP3SW\naws_secret_access_key=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\nregion=us-east-2\noutput=json\n\n#addibng a comment\n")}, + }, + scanOptions: ScanOptions{ + HeadHash: "some_branch", + BaseHash: "master", + }, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -256,7 +284,11 @@ func TestSource_Chunks_Integration(t *testing.T) { chunksCh := make(chan *sources.Chunk, 1) go func() { defer close(chunksCh) - err := s.Chunks(ctx, chunksCh) + repoPath, repo, err := CloneRepoUsingUnauthenticated(tt.repoURL) + if err != nil { + panic(err) + } + err = s.git.ScanRepo(ctx, repo, repoPath, &tt.scanOptions, chunksCh) if err != nil { panic(err) }