Skip to content

Commit

Permalink
Only scan files that changed from base to head (#68)
Browse files Browse the repository at this point in the history
* Only scan files that changed from base to head

* Acknowledge ignored errors
  • Loading branch information
bill-rich authored Mar 8, 2022
1 parent 2a0ece9 commit 55b839f
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 19 deletions.
30 changes: 17 additions & 13 deletions pkg/engine/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,28 @@ func (e *Engine) ScanGit(ctx context.Context, repoPath, headRef, baseRef string,
}
}

if headRef != "" {
headHash, err := git.TryAdditionalBaseRefs(repo, headRef)
if headRef == "" {
head, err := repo.Head()
if err != nil {
return fmt.Errorf("could not parse revision: %q: %w", headRef, err)
return err
}
headRef = head.Hash().String()
}
headHash, err := git.TryAdditionalBaseRefs(repo, headRef)
if err != nil {
return fmt.Errorf("could not parse revision: %q: %w", headRef, err)
}

headCommit, err = repo.CommitObject(*headHash)
if err != nil {
return fmt.Errorf("could not find commit: %q: %w", headRef, err)
}
headCommit, err = repo.CommitObject(*headHash)
if err != nil {
return fmt.Errorf("could not find commit: %q: %w", headRef, err)
}

logrus.WithFields(logrus.Fields{
"commit": headCommit.Hash.String(),
}).Debug("resolved head reference")
logrus.WithFields(logrus.Fields{
"commit": headCommit.Hash.String(),
}).Debug("resolved head reference")

logOptions.From = headCommit.Hash
logOptions.All = false
}
logOptions.From = headCommit.Hash

gitSource := git.NewGit(sourcespb.SourceType_SOURCE_TYPE_GIT, 0, 0, "local", true, runtime.NumCPU(),
func(file, email, commit, timestamp, repository string) *source_metadatapb.MetaData {
Expand Down
35 changes: 30 additions & 5 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"runtime"
"sort"
"strings"
"sync"
"time"

"github.com/go-errors/errors"
Expand Down Expand Up @@ -261,18 +262,44 @@ func (s *Git) ScanCommits(repo *git.Repository, scanOptions *ScanOptions, chunks
return err
}
commits := map[int64][]*object.Commit{}
seenMap := map[plumbing.Hash]bool{}

depth := int64(0)

if scanOptions.BaseCommit != nil {
var head *object.Commit
if scanOptions.HeadCommit != nil {
head = scanOptions.HeadCommit
}
headMap := sync.Map{}
headIter, err := head.Files()
if err == nil {
_ = headIter.ForEach(func(file *object.File) error {
headMap.Store(file.Hash, struct{}{})
return nil
})
}
parentCommits := []*object.Commit{}
parentHashes := scanOptions.BaseCommit.ParentHashes
for _, parentHash := range parentHashes {
parentCommit, err := repo.CommitObject(parentHash)
if err != nil {
log.WithError(err).WithField("parentHash", parentHash.String()).WithField("commit", scanOptions.BaseCommit.Hash.String()).Debug("could not find parent commit")
}
dummyMap := map[plumbing.Hash]bool{}
s.scanCommit(repo, parentCommit, &dummyMap, scanOptions, true, chunksChan)
parentCommits = append(parentCommits, parentCommit)
parentIter, err := parentCommit.Files()
if err == nil {
_ = parentIter.ForEach(func(file *object.File) error {
_, ok := headMap.Load(file.Hash)
if ok {
seenMap[file.Hash] = true
}
return nil
})
}
}
for _, parentCommit := range parentCommits {
s.scanCommit(repo, parentCommit, &seenMap, scanOptions, true, chunksChan)
}
}

Expand Down Expand Up @@ -301,8 +328,6 @@ func (s *Git) ScanCommits(repo *git.Repository, scanOptions *ScanOptions, chunks
i++
}

seenMap := map[plumbing.Hash]bool{}

// Sort the timestamps
sort.Slice(keys, func(i, j int) bool { return keys[i] < keys[j] })
for _, commitTime := range keys {
Expand Down Expand Up @@ -473,7 +498,7 @@ func verifyOptions(scanOptions *ScanOptions) error {
head := scanOptions.HeadCommit
if base != nil && head != nil {
if ok, _ := base.IsAncestor(head); !ok {
return fmt.Errorf("unable to scan from requested head to end commit. %s is not an ancestor of %s", base, head)
return fmt.Errorf("unable to scan from requested head to end commit. %s is not an ancestor of %s", base.Hash.String(), head.Hash.String())
}
}
return nil
Expand Down
6 changes: 5 additions & 1 deletion pkg/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,11 @@ func (s *Source) addReposByOrg(ctx context.Context, apiClient *github.Client, or
if handled := handleRateLimit(err, res); handled {
continue
}
if len(someRepos) == 0 || err != nil {
if err != nil {
log.WithError(err).WithField("org", org).Errorf("could not load list repos for org")
break
}
if len(someRepos) == 0 {
break
}
for _, r := range someRepos {
Expand Down

0 comments on commit 55b839f

Please sign in to comment.