From beca473c3460cf66718e99f0ac609a0e9d93bf5b Mon Sep 17 00:00:00 2001 From: d4x1 <1507509064@qq.com> Date: Tue, 25 Jun 2024 11:20:38 +0800 Subject: [PATCH 1/3] fix(gitextractor): update database store, set it incremental by default --- backend/plugins/gitextractor/parser/taskdata.go | 2 +- backend/plugins/gitextractor/store/database.go | 4 +++- backend/plugins/gitextractor/tasks/repo_cloner.go | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/plugins/gitextractor/parser/taskdata.go b/backend/plugins/gitextractor/parser/taskdata.go index 4b3ab01722c..1b22c98855e 100644 --- a/backend/plugins/gitextractor/parser/taskdata.go +++ b/backend/plugins/gitextractor/parser/taskdata.go @@ -25,7 +25,7 @@ type GitExtractorTaskData struct { Options *GitExtractorOptions ParsedURL *url.URL GitRepo RepoCollector - SkipAllSubtasks bool // siliently skip all tasks without raising error + SkipAllSubtasks bool // silently skip all tasks without raising errors } type GitExtractorApiParams struct { diff --git a/backend/plugins/gitextractor/store/database.go b/backend/plugins/gitextractor/store/database.go index 5e03359f87e..44ec55fc17b 100644 --- a/backend/plugins/gitextractor/store/database.go +++ b/backend/plugins/gitextractor/store/database.go @@ -37,7 +37,8 @@ type Database struct { params string } -func NewDatabase(basicRes context.BasicRes, repoId string) *Database { +func NewDatabase(basicRes context.BasicRes, repoId string, incrementalMode bool) *Database { + database := &Database{ table: "gitextractor", params: repoId, @@ -48,6 +49,7 @@ func NewDatabase(basicRes context.BasicRes, repoId string) *Database { database.table, database.params, ) + database.driver.SetIncrementalMode(incrementalMode) return database } diff --git a/backend/plugins/gitextractor/tasks/repo_cloner.go b/backend/plugins/gitextractor/tasks/repo_cloner.go index a00b447b32f..3b8dd0e1df6 100644 --- a/backend/plugins/gitextractor/tasks/repo_cloner.go +++ b/backend/plugins/gitextractor/tasks/repo_cloner.go @@ -42,7 +42,7 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error { panic("git repo reference not found on context") } op := taskData.Options - storage := store.NewDatabase(subTaskCtx, op.RepoId) + storage := store.NewDatabase(subTaskCtx, op.RepoId, true) var err errors.Error logger := subTaskCtx.GetLogger() From f201440e3e62a650b05e996af54b9d6de8d6832a Mon Sep 17 00:00:00 2001 From: d4x1 <1507509064@qq.com> Date: Tue, 25 Jun 2024 12:01:00 +0800 Subject: [PATCH 2/3] fix(gitextractor): add `SetIncrementalMode` to store --- backend/plugins/gitextractor/models/interface.go | 1 + backend/plugins/gitextractor/parser/clone_gitcli.go | 12 ++++++++++++ backend/plugins/gitextractor/store/csv.go | 4 ++++ backend/plugins/gitextractor/store/database.go | 7 +++++-- backend/plugins/gitextractor/tasks/repo_cloner.go | 6 ++++-- 5 files changed, 26 insertions(+), 4 deletions(-) diff --git a/backend/plugins/gitextractor/models/interface.go b/backend/plugins/gitextractor/models/interface.go index cd176f3b59b..0a228de8341 100644 --- a/backend/plugins/gitextractor/models/interface.go +++ b/backend/plugins/gitextractor/models/interface.go @@ -23,6 +23,7 @@ import ( ) type Store interface { + SetIncrementalMode(bool) RepoCommits(repoCommit *code.RepoCommit) errors.Error Commits(commit *code.Commit) errors.Error Refs(ref *code.Ref) errors.Error diff --git a/backend/plugins/gitextractor/parser/clone_gitcli.go b/backend/plugins/gitextractor/parser/clone_gitcli.go index 69883bf1741..9f2ca7ad528 100644 --- a/backend/plugins/gitextractor/parser/clone_gitcli.go +++ b/backend/plugins/gitextractor/parser/clone_gitcli.go @@ -56,6 +56,17 @@ type CloneRepoConfig struct { NoShallowClone bool } +func (g *GitcliCloner) IsIncremental() bool { + if g != nil && g.stateManager != nil { + if g.stateManager.GetSince() != nil { + return true + } + return g.stateManager.IsIncremental() + } + + return false +} + func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext, localDir string) errors.Error { taskData := ctx.GetData().(*GitExtractorTaskData) var since *time.Time @@ -75,6 +86,7 @@ func (g *GitcliCloner) CloneRepo(ctx plugin.SubTaskContext, localDir string) err } g.stateManager = stateManager since = stateManager.GetSince() + } cmd, err := g.buildCloneCommand(ctx, localDir, since) diff --git a/backend/plugins/gitextractor/store/csv.go b/backend/plugins/gitextractor/store/csv.go index 8701501c5c8..4b05361e942 100644 --- a/backend/plugins/gitextractor/store/csv.go +++ b/backend/plugins/gitextractor/store/csv.go @@ -132,6 +132,10 @@ func NewCsvStore(dir string) (*CsvStore, errors.Error) { return s, nil } +func (c *CsvStore) SetIncrementalMode(incrementalMode bool) { + return +} + func (c *CsvStore) RepoCommits(repoCommit *code.RepoCommit) errors.Error { return c.repoCommitWriter.Write(repoCommit) } diff --git a/backend/plugins/gitextractor/store/database.go b/backend/plugins/gitextractor/store/database.go index 44ec55fc17b..c795179ec1f 100644 --- a/backend/plugins/gitextractor/store/database.go +++ b/backend/plugins/gitextractor/store/database.go @@ -37,7 +37,7 @@ type Database struct { params string } -func NewDatabase(basicRes context.BasicRes, repoId string, incrementalMode bool) *Database { +func NewDatabase(basicRes context.BasicRes, repoId string) *Database { database := &Database{ table: "gitextractor", @@ -49,7 +49,6 @@ func NewDatabase(basicRes context.BasicRes, repoId string, incrementalMode bool) database.table, database.params, ) - database.driver.SetIncrementalMode(incrementalMode) return database } @@ -58,6 +57,10 @@ func (d *Database) updateRawDataFields(rawData *common.RawDataOrigin) { rawData.RawDataParams = d.params } +func (d *Database) SetIncrementalMode(incrementalMode bool) { + d.driver.SetIncrementalMode(incrementalMode) +} + func (d *Database) RepoCommits(repoCommit *code.RepoCommit) errors.Error { batch, err := d.driver.ForType(reflect.TypeOf(repoCommit)) if err != nil { diff --git a/backend/plugins/gitextractor/tasks/repo_cloner.go b/backend/plugins/gitextractor/tasks/repo_cloner.go index 3b8dd0e1df6..45eff7bbac5 100644 --- a/backend/plugins/gitextractor/tasks/repo_cloner.go +++ b/backend/plugins/gitextractor/tasks/repo_cloner.go @@ -42,7 +42,7 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error { panic("git repo reference not found on context") } op := taskData.Options - storage := store.NewDatabase(subTaskCtx, op.RepoId, true) + storage := store.NewDatabase(subTaskCtx, op.RepoId) var err errors.Error logger := subTaskCtx.GetLogger() @@ -62,7 +62,9 @@ func CloneGitRepo(subTaskCtx plugin.SubTaskContext) errors.Error { } return err } - + if repoCloner.IsIncremental() { + storage.SetIncrementalMode(repoCloner.IsIncremental()) + } // We have done comparison experiments for git2go and go-git, and the results show that git2go has better performance. var repoCollector parser.RepoCollector if *taskData.Options.UseGoGit { From 9f300cd09a7374c5f3ac64e3636ce28dc079ed5f Mon Sep 17 00:00:00 2001 From: d4x1 <1507509064@qq.com> Date: Tue, 25 Jun 2024 12:04:18 +0800 Subject: [PATCH 3/3] style(gitextractor): fix ci errors --- backend/plugins/gitextractor/store/csv.go | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/plugins/gitextractor/store/csv.go b/backend/plugins/gitextractor/store/csv.go index 4b05361e942..982485aea6e 100644 --- a/backend/plugins/gitextractor/store/csv.go +++ b/backend/plugins/gitextractor/store/csv.go @@ -133,7 +133,6 @@ func NewCsvStore(dir string) (*CsvStore, errors.Error) { } func (c *CsvStore) SetIncrementalMode(incrementalMode bool) { - return } func (c *CsvStore) RepoCommits(repoCommit *code.RepoCommit) errors.Error {