Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull gitparse config options out of pkg consts #1072

Merged
merged 2 commits into from
Feb 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 57 additions & 14 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,14 @@ import (
)

const (
Copy link
Collaborator

@ahrav ahrav Feb 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: Can we actually leave the constants and then just use them in your factory function for NewParser, but also make them private? I think they are only ever used in this file.

Ex:

const (
	// defaultDateFormat is the standard date format for git.
	defaultDateFormat = "Mon Jan 02 15:04:05 2006 -0700"

	// maxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
	maxDiffSize = 1 * 1024 * 1024 * 1024 // 1GB
	// maxCommitSize is the maximum size for a commit. Larger commits will be cut off.
	maxCommitSize = maxDiffSize
)
// NewParser creates a GitParse config from options and sets defaults.
func NewParser(options ...Option) *Parser {
	parser := &Parser{
		dateFormat:    defaultDateFormat,
		maxDiffSize:   maxDiffSize,   // 1GB
		maxCommitSize: maxCommitSize, // 1GB
	}
	for _, option := range options {
		option(parser)
	}
	return parser
}

// DateFormat is the standard date format for git.
DateFormat = "Mon Jan 02 15:04:05 2006 -0700"
// defaultDateFormat is the standard date format for git.
defaultDateFormat = "Mon Jan 02 15:04:05 2006 -0700"

// MaxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
MaxDiffSize = 1 * 1024 * 1024 * 1024 // 1GB
// defaultMaxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
defaultMaxDiffSize = 1 * 1024 * 1024 * 1024 // 1GB

// defaultMaxCommitSize is the maximum size for a commit. Larger commits will be cut off.
defaultMaxCommitSize = 1 * 1024 * 1024 * 1024 // 1GB
)

// Commit contains commit header info and diffs.
Expand All @@ -42,6 +45,46 @@ type Diff struct {
IsBinary bool
}

// Parser sets values used in GitParse.
type Parser struct {
maxDiffSize int
maxCommitSize int
dateFormat string
}

// WithMaxDiffSize sets maxDiffSize option. Diffs larger than maxDiffSize will
// be truncated.
func WithMaxDiffSize(maxDiffSize int) Option {
return func(parser *Parser) {
parser.maxDiffSize = maxDiffSize
}
}

// WithMaxCommitSize sets maxCommitSize option. Commits larger than maxCommitSize
// will be put in the commit channel and additional diffs will be added to a
// new commit.
func WithMaxCommitSize(maxCommitSize int) Option {
return func(parser *Parser) {
parser.maxCommitSize = maxCommitSize
}
}

// Option is used for adding options to Config.
type Option func(*Parser)

// NewParser creates a GitParse config from options and sets defaults.
func NewParser(options ...Option) *Parser {
parser := &Parser{
dateFormat: defaultDateFormat,
maxDiffSize: defaultMaxDiffSize,
maxCommitSize: defaultMaxCommitSize,
}
for _, option := range options {
option(parser)
}
return parser
}

// Equal compares the content of two Commits to determine if they are the same.
func (c1 *Commit) Equal(c2 *Commit) bool {
switch {
Expand Down Expand Up @@ -75,7 +118,7 @@ func (c1 *Commit) Equal(c2 *Commit) bool {
}

// RepoPath parses the output of the `git log` command for the `source` path.
func RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool) (chan Commit, error) {
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool) (chan Commit, error) {
args := []string{"-C", source, "log", "-p", "-U5", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"}
if abbreviatedLog {
args = append(args, "--diff-filter=AM")
Expand All @@ -93,11 +136,11 @@ func RepoPath(ctx context.Context, source string, head string, abbreviatedLog bo
cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git")))
}

return executeCommand(ctx, cmd)
return c.executeCommand(ctx, cmd)
}

// Unstaged parses the output of the `git diff` command for the `source` path.
func Unstaged(ctx context.Context, source string) (chan Commit, error) {
func (c *Parser) Unstaged(ctx context.Context, source string) (chan Commit, error) {
args := []string{"-C", source, "diff", "-p", "-U5", "--full-history", "--diff-filter=AM", "--date=format:%a %b %d %H:%M:%S %Y %z", "HEAD"}

cmd := exec.Command("git", args...)
Expand All @@ -107,11 +150,11 @@ func Unstaged(ctx context.Context, source string) (chan Commit, error) {
cmd.Env = append(cmd.Env, fmt.Sprintf("GIT_DIR=%s", filepath.Join(absPath, ".git")))
}

return executeCommand(ctx, cmd)
return c.executeCommand(ctx, cmd)
}

// executeCommand runs an exec.Cmd, reads stdout and stderr, and waits for the Cmd to complete.
func executeCommand(ctx context.Context, cmd *exec.Cmd) (chan Commit, error) {
func (c *Parser) executeCommand(ctx context.Context, cmd *exec.Cmd) (chan Commit, error) {
commitChan := make(chan Commit, 64)

stdOut, err := cmd.StdoutPipe()
Expand All @@ -136,7 +179,7 @@ func executeCommand(ctx context.Context, cmd *exec.Cmd) (chan Commit, error) {
}()

go func() {
FromReader(ctx, stdOut, commitChan)
c.fromReader(ctx, stdOut, commitChan)
if err := cmd.Wait(); err != nil {
log.WithError(err).Debugf("Error waiting for git command to complete.")
}
Expand All @@ -145,7 +188,7 @@ func executeCommand(ctx context.Context, cmd *exec.Cmd) (chan Commit, error) {
return commitChan, nil
}

func FromReader(ctx context.Context, stdOut io.Reader, commitChan chan Commit) {
func (c *Parser) fromReader(ctx context.Context, stdOut io.Reader, commitChan chan Commit) {
outReader := bufio.NewReader(stdOut)
var currentCommit *Commit
var currentDiff *Diff
Expand Down Expand Up @@ -178,7 +221,7 @@ func FromReader(ctx context.Context, stdOut io.Reader, commitChan chan Commit) {
case isAuthorLine(line):
currentCommit.Author = strings.TrimRight(string(line[8:]), "\n")
case isDateLine(line):
date, err := time.Parse(DateFormat, strings.TrimSpace(string(line[6:])))
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[6:])))
if err != nil {
log.WithError(err).Debug("Could not parse date from git stream.")
}
Expand Down Expand Up @@ -230,8 +273,8 @@ func FromReader(ctx context.Context, stdOut io.Reader, commitChan chan Commit) {
}
}
}
if currentDiff.Content.Len() > MaxDiffSize {
log.Debugf("Diff for %s exceeded MaxDiffSize(%d)", currentDiff.PathB, MaxDiffSize)
if currentDiff.Content.Len() > c.maxDiffSize {
log.Debugf("Diff for %s exceeded MaxDiffSize(%d)", currentDiff.PathB, c.maxDiffSize)
break
}
}
Expand Down
21 changes: 12 additions & 9 deletions pkg/gitparse/gitparse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ func TestBinaryPathParse(t *testing.T) {
func TestSingleCommitSingleDiff(t *testing.T) {
r := bytes.NewReader([]byte(singleCommitSingleDiff))
commitChan := make(chan Commit)
date, _ := time.Parse(DateFormat, "Mon Mar 15 23:27:16 2021 -0700")
parser := NewParser()
date, _ := time.Parse(parser.dateFormat, "Mon Mar 15 23:27:16 2021 -0700")
content := bytes.NewBuffer([]byte(singleCommitSingleDiffDiff))
builder := strings.Builder{}
builder.Write([]byte(singleCommitSingleDiffMessage))
Expand All @@ -108,7 +109,7 @@ func TestSingleCommitSingleDiff(t *testing.T) {
},
}
go func() {
FromReader(context.TODO(), r, commitChan)
parser.fromReader(context.TODO(), r, commitChan)
}()
i := 0
for commit := range commitChan {
Expand All @@ -125,9 +126,10 @@ func TestSingleCommitSingleDiff(t *testing.T) {

func TestMultiCommitContextDiff(t *testing.T) {
r := bytes.NewReader([]byte(singleCommitContextDiff))
parser := NewParser()
commitChan := make(chan Commit)
dateOne, _ := time.Parse(DateFormat, "Mon Mar 15 23:27:16 2021 -0700")
dateTwo, _ := time.Parse(DateFormat, "Wed Dec 12 18:19:21 2018 -0800")
dateOne, _ := time.Parse(parser.dateFormat, "Mon Mar 15 23:27:16 2021 -0700")
dateTwo, _ := time.Parse(parser.dateFormat, "Wed Dec 12 18:19:21 2018 -0800")
diffOneA := bytes.NewBuffer([]byte(singleCommitContextDiffDiffOneA))
diffTwoA := bytes.NewBuffer([]byte(singleCommitContextDiffDiffTwoA))
// diffTwoB := bytes.NewBuffer([]byte(singleCommitContextDiffDiffTwoB))
Expand Down Expand Up @@ -166,7 +168,7 @@ func TestMultiCommitContextDiff(t *testing.T) {
},
}
go func() {
FromReader(context.TODO(), r, commitChan)
NewParser().fromReader(context.TODO(), r, commitChan)
}()
i := 0
for commit := range commitChan {
Expand All @@ -182,9 +184,10 @@ func TestMultiCommitContextDiff(t *testing.T) {
}

func TestMaxDiffSize(t *testing.T) {
parser := NewParser()
bigBytes := bytes.Buffer{}
bigBytes.WriteString(singleCommitSingleDiff)
for i := 0; i <= MaxDiffSize/1024+10; i++ {
for i := 0; i <= parser.maxDiffSize/1024+10; i++ {
bigBytes.WriteString("+")
for n := 0; n < 1024; n++ {
bigBytes.Write([]byte("0"))
Expand All @@ -195,12 +198,12 @@ func TestMaxDiffSize(t *testing.T) {

commitChan := make(chan Commit)
go func() {
FromReader(context.TODO(), bigReader, commitChan)
parser.fromReader(context.TODO(), bigReader, commitChan)
}()

commit := <-commitChan
if commit.Diffs[0].Content.Len() > MaxDiffSize+1024 {
t.Errorf("diff did not match MaxDiffSize. Got: %d, expected (max): %d", commit.Diffs[0].Content.Len(), MaxDiffSize+1024)
if commit.Diffs[0].Content.Len() > parser.maxDiffSize+1024 {
t.Errorf("diff did not match MaxDiffSize. Got: %d, expected (max): %d", commit.Diffs[0].Content.Len(), parser.maxDiffSize+1024)
}

}
Expand Down
4 changes: 2 additions & 2 deletions pkg/sources/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
return err
}

commitChan, err := gitparse.RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "")
commitChan, err := gitparse.NewParser().RepoPath(ctx, path, scanOptions.HeadHash, scanOptions.BaseHash == "")
if err != nil {
return err
}
Expand Down Expand Up @@ -469,7 +469,7 @@ func (s *Git) ScanUnstaged(ctx context.Context, repo *git.Repository, path strin
// get the URL metadata for reporting (may be empty)
urlMetadata := getSafeRemoteURL(repo, "origin")

commitChan, err := gitparse.Unstaged(ctx, path)
commitChan, err := gitparse.NewParser().Unstaged(ctx, path)
if err != nil {
return err
}
Expand Down