diff --git a/pkg/engine/git.go b/pkg/engine/git.go index 67029ac82f78..2907ccf2b4a3 100644 --- a/pkg/engine/git.go +++ b/pkg/engine/git.go @@ -40,24 +40,28 @@ func (e *Engine) ScanGit(ctx context.Context, repoPath, headRef, baseRef string, } } - if headRef != "" { - headHash, err := git.TryAdditionalBaseRefs(repo, headRef) + if headRef == "" { + head, err := repo.Head() if err != nil { - return fmt.Errorf("could not parse revision: %q: %w", headRef, err) + return err } + headRef = head.Hash().String() + } + headHash, err := git.TryAdditionalBaseRefs(repo, headRef) + if err != nil { + return fmt.Errorf("could not parse revision: %q: %w", headRef, err) + } - headCommit, err = repo.CommitObject(*headHash) - if err != nil { - return fmt.Errorf("could not find commit: %q: %w", headRef, err) - } + headCommit, err = repo.CommitObject(*headHash) + if err != nil { + return fmt.Errorf("could not find commit: %q: %w", headRef, err) + } - logrus.WithFields(logrus.Fields{ - "commit": headCommit.Hash.String(), - }).Debug("resolved head reference") + logrus.WithFields(logrus.Fields{ + "commit": headCommit.Hash.String(), + }).Debug("resolved head reference") - logOptions.From = headCommit.Hash - logOptions.All = false - } + logOptions.From = headCommit.Hash gitSource := git.NewGit(sourcespb.SourceType_SOURCE_TYPE_GIT, 0, 0, "local", true, runtime.NumCPU(), func(file, email, commit, timestamp, repository string) *source_metadatapb.MetaData { diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index a6a515e24320..552c3e28beac 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -12,6 +12,7 @@ import ( "runtime" "sort" "strings" + "sync" "time" "github.com/go-errors/errors" @@ -261,18 +262,44 @@ func (s *Git) ScanCommits(repo *git.Repository, scanOptions *ScanOptions, chunks return err } commits := map[int64][]*object.Commit{} + seenMap := map[plumbing.Hash]bool{} depth := int64(0) if scanOptions.BaseCommit != nil { + var head *object.Commit + if scanOptions.HeadCommit != nil { + head = scanOptions.HeadCommit + } + headMap := sync.Map{} + headIter, err := head.Files() + if err == nil { + _ = headIter.ForEach(func(file *object.File) error { + headMap.Store(file.Hash, struct{}{}) + return nil + }) + } + parentCommits := []*object.Commit{} parentHashes := scanOptions.BaseCommit.ParentHashes for _, parentHash := range parentHashes { parentCommit, err := repo.CommitObject(parentHash) if err != nil { log.WithError(err).WithField("parentHash", parentHash.String()).WithField("commit", scanOptions.BaseCommit.Hash.String()).Debug("could not find parent commit") } - dummyMap := map[plumbing.Hash]bool{} - s.scanCommit(repo, parentCommit, &dummyMap, scanOptions, true, chunksChan) + parentCommits = append(parentCommits, parentCommit) + parentIter, err := parentCommit.Files() + if err == nil { + _ = parentIter.ForEach(func(file *object.File) error { + _, ok := headMap.Load(file.Hash) + if ok { + seenMap[file.Hash] = true + } + return nil + }) + } + } + for _, parentCommit := range parentCommits { + s.scanCommit(repo, parentCommit, &seenMap, scanOptions, true, chunksChan) } } @@ -301,8 +328,6 @@ func (s *Git) ScanCommits(repo *git.Repository, scanOptions *ScanOptions, chunks i++ } - seenMap := map[plumbing.Hash]bool{} - // Sort the timestamps sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] }) for _, commitTime := range keys { @@ -473,7 +498,7 @@ func verifyOptions(scanOptions *ScanOptions) error { head := scanOptions.HeadCommit if base != nil && head != nil { if ok, _ := base.IsAncestor(head); !ok { - return fmt.Errorf("unable to scan from requested head to end commit. %s is not an ancestor of %s", base, head) + return fmt.Errorf("unable to scan from requested head to end commit. %s is not an ancestor of %s", base.Hash.String(), head.Hash.String()) } } return nil diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go index 1afd7147f107..215c0e087a08 100644 --- a/pkg/sources/github/github.go +++ b/pkg/sources/github/github.go @@ -399,7 +399,11 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or if handled := handleRateLimit(err, res); handled { continue } - if len(someRepos) == 0 || err != nil { + if err != nil { + log.WithError(err).WithField("org", org).Errorf("could not load list repos for org") + break + } + if len(someRepos) == 0 { break } for _, r := range someRepos {