Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: fetch branches before shallow fetch to reduce the total commits collected #7760

Merged
merged 1 commit into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions backend/plugins/gitextractor/impl/impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,11 @@ func (p GitExtractor) PrepareTaskData(taskCtx plugin.TaskContext, options map[st

func (p GitExtractor) Close(taskCtx plugin.TaskContext) errors.Error {
if taskData, ok := taskCtx.GetData().(*parser.GitExtractorTaskData); ok {
if taskData.GitRepo != nil {
if err := taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
return errors.Convert(err)
if !taskCtx.GetConfigReader().GetBool("GIT_EXTRACTOR_KEEP_REPO") {
if taskData.GitRepo != nil {
if err := taskData.GitRepo.Close(taskCtx.GetContext()); err != nil {
return errors.Convert(err)
}
}
}
}
Expand Down
20 changes: 12 additions & 8 deletions backend/plugins/gitextractor/parser/clone_gitcli.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,13 +124,11 @@ func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir s
// https://stackoverflow.com/questions/23708231/git-shallow-clone-clone-depth-misses-remote-branches

// 1. clone the repo with depth 1
if err := g.execGitCommand(ctx, "clone", taskData.Options.Url, localDir, "--depth=1", "--bare"); err != nil {
cloneArgs := append([]string{"clone", taskData.Options.Url, localDir, "--depth=1", "--bare"}, args...)
if err := g.execGitCommand(ctx, cloneArgs...); err != nil {
return err
}
// 2. set remote for all branches
// if err := g.execGitCommandIn(ctx, localDir, "remote", "set-branches", "origin", "'*'"); err != nil {
// return err
// } // someshow it fails siliently on my local machine, don't know why
// 2. configure to fetch all branches from the remote server so we can collect new commits from them
gitConfig, err := os.OpenFile(path.Join(localDir, "config"), os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return errors.Default.Wrap(err, "failed to open git config file")
Expand All @@ -139,14 +137,20 @@ func (g *GitcliCloner) execGitCloneCommand(ctx plugin.SubTaskContext, localDir s
if err != nil {
return errors.Default.Wrap(err, "failed to write to git config file")
}
// 3. fetch all new commits from all branches since the given time
args = append([]string{"fetch", "--progress", fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
// 3. fetch all branches with depth=1 so the next step would collect less commits
// (I don't know why, but it reduced total number of commits from 18k to 7k on https://gitlab.com/gitlab-org/gitlab-foss.git with the same parameters)
fetchBranchesArgs := append([]string{"fetch", "--depth=1", "origin"}, args...)
if err := g.execGitCommandIn(ctx, localDir, fetchBranchesArgs...); err != nil {
return errors.Default.Wrap(err, "failed to fetch all branches from the remote server")
}
// 4. fetch all new commits from all branches since the given time
args = append([]string{"fetch", fmt.Sprintf("--shallow-since=%s", since.Format(time.RFC3339))}, args...)
if err := g.execGitCommandIn(ctx, localDir, args...); err != nil {
g.logger.Warn(err, "shallow fetch failed")
}
return nil
} else {
args = append([]string{"clone", taskData.Options.Url, localDir, "--progress", "--bare"}, args...)
args = append([]string{"clone", taskData.Options.Url, localDir, "--bare"}, args...)
return g.execGitCommand(ctx, args...)
}
}
Expand Down
Loading