diff --git a/.gitignore b/.gitignore index 13e345788..05df2812e 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,5 @@ _testmain.go *.prof coverage.out + +benchmark/ diff --git a/Makefile b/Makefile index 683b8e522..e659a5968 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ IMPORT := code.gitea.io/git -PACKAGES ?= $(shell go list ./... | grep -v /vendor/) +PACKAGES ?= $(shell go list -e ./... | grep -v /vendor/ | grep -v /benchmark/) GENERATE ?= code.gitea.io/git .PHONY: all @@ -18,7 +18,7 @@ generate: .PHONY: fmt fmt: - find . -name "*.go" -type f -not -path "./vendor/*" | xargs gofmt -s -w + find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark/*" | xargs gofmt -s -w .PHONY: vet vet: @@ -35,6 +35,10 @@ lint: test: for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done; +.PHONY: bench +bench: + go test -run=XXXXXX -benchtime=10s -bench=. || exit 1 + .PHONY: build build: go build . diff --git a/tree_entry.go b/tree_entry.go index 1e4934e81..58c513246 100644 --- a/tree_entry.go +++ b/tree_entry.go @@ -5,10 +5,7 @@ package git import ( - "fmt" - "path" "path/filepath" - "runtime" "sort" "strconv" "strings" @@ -147,112 +144,147 @@ func (tes Entries) Sort() { sort.Sort(tes) } -type commitInfo struct { - entryName string - infos []interface{} - err error +// getCommitInfoState transient state for getting commit info for entries +type getCommitInfoState struct { + entries map[string]*TreeEntry // map from filepath to entry + commits map[string]*Commit // map from entry name to commit + lastCommitHash string + lastCommit *Commit + treePath string + headCommit *Commit + nextSearchSize int // next number of commits to search for } -// GetCommitsInfo takes advantages of concurrency to speed up getting information -// of all commits that are corresponding to these entries. This method will automatically -// choose the right number of goroutine (concurrency) to use related of the host CPU. +func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState { + entriesByPath := make(map[string]*TreeEntry, len(entries)) + for _, entry := range entries { + entriesByPath[filepath.Join(treePath, entry.Name())] = entry + } + return &getCommitInfoState{ + entries: entriesByPath, + commits: make(map[string]*Commit, len(entriesByPath)), + treePath: treePath, + headCommit: headCommit, + nextSearchSize: 16, + } +} + +// GetCommitsInfo gets information of all commits that are corresponding to these entries func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) { - return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0) + state := initGetCommitInfoState(tes, commit, treePath) + if err := getCommitsInfo(state); err != nil { + return nil, err + } + + commitsInfo := make([][]interface{}, len(tes)) + for i, entry := range tes { + commit = state.commits[filepath.Join(treePath, entry.Name())] + switch entry.Type { + case ObjectCommit: + subModuleURL := "" + if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil { + return nil, err + } else if subModule != nil { + subModuleURL = subModule.URL + } + subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String()) + commitsInfo[i] = []interface{}{entry, subModuleFile} + default: + commitsInfo[i] = []interface{}{entry, commit} + } + } + return commitsInfo, nil } -// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information -// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or -// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the -// host CPU. -func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) { - if len(tes) == 0 { - return nil, nil +func (state *getCommitInfoState) nextCommit(hash string) { + state.lastCommitHash = hash + state.lastCommit = nil +} + +func (state *getCommitInfoState) commit() (*Commit, error) { + var err error + if state.lastCommit == nil { + state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash) } + return state.lastCommit, err +} - if maxConcurrency <= 0 { - maxConcurrency = runtime.NumCPU() +func (state *getCommitInfoState) update(path string) error { + relPath, err := filepath.Rel(state.treePath, path) + if err != nil { + return nil + } + var entryPath string + if index := strings.IndexRune(relPath, '/'); index >= 0 { + entryPath = filepath.Join(state.treePath, relPath[:index]) + } else { + entryPath = path } + if _, ok := state.entries[entryPath]; !ok { + return nil + } else if _, ok := state.commits[entryPath]; ok { + return nil + } + state.commits[entryPath], err = state.commit() + return err +} - // Length of taskChan determines how many goroutines (subprocesses) can run at the same time. - // The length of revChan should be same as taskChan so goroutines whoever finished job can - // exit as early as possible, only store data inside channel. - taskChan := make(chan bool, maxConcurrency) - revChan := make(chan commitInfo, maxConcurrency) - doneChan := make(chan error) - - // Receive loop will exit when it collects same number of data pieces as tree entries. - // It notifies doneChan before exits or notify early with possible error. - infoMap := make(map[string][]interface{}, len(tes)) - go func() { - i := 0 - for info := range revChan { - if info.err != nil { - doneChan <- info.err - return - } +func getCommitsInfo(state *getCommitInfoState) error { + for len(state.entries) > len(state.commits) { + if err := getNextCommitInfos(state); err != nil { + return err + } + } + return nil +} - infoMap[info.entryName] = info.infos - i++ - if i == len(tes) { +func getNextCommitInfos(state *getCommitInfoState) error { + logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path) + if err != nil { + return err + } + lines := strings.Split(logOutput, "\n") + i := 0 + for i < len(lines) { + state.nextCommit(lines[i]) + i++ + for ; i < len(lines); i++ { + path := lines[i] + if path == "" { break } + state.update(path) } - doneChan <- nil - }() - - for i := range tes { - // When taskChan is idle (or has empty slots), put operation will not block. - // However when taskChan is full, code will block and wait any running goroutines to finish. - taskChan <- true - - if tes[i].Type != ObjectCommit { - go func(i int) { - cinfo := commitInfo{entryName: tes[i].Name()} - c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) - if err != nil { - cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) - } else { - cinfo.infos = []interface{}{tes[i], c} - } - revChan <- cinfo - <-taskChan // Clear one slot from taskChan to allow new goroutines to start. - }(i) - continue + i++ // skip blank line + if len(state.entries) == len(state.commits) { + break } - - // Handle submodule - go func(i int) { - cinfo := commitInfo{entryName: tes[i].Name()} - sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name())) - if err != nil && !IsErrNotExist(err) { - cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err) - revChan <- cinfo - return - } - - smURL := "" - if sm != nil { - smURL = sm.URL - } - - c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name())) - if err != nil { - cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err) - } else { - cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())} - } - revChan <- cinfo - <-taskChan - }(i) } + return nil +} - if err := <-doneChan; err != nil { - return nil, err +func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command { + var commitHash string + if len(exclusiveStartHash) == 0 { + commitHash = "HEAD" + } else { + commitHash = exclusiveStartHash + "^" } - - commitsInfo := make([][]interface{}, len(tes)) - for i := 0; i < len(tes); i++ { - commitsInfo[i] = infoMap[tes[i].Name()] + var command *Command + numRemainingEntries := len(state.entries) - len(state.commits) + if numRemainingEntries < 32 { + searchSize := (numRemainingEntries + 1) / 2 + command = NewCommand("log", prettyLogFormat, "--name-only", + "-"+strconv.Itoa(searchSize), commitHash, "--") + for path, entry := range state.entries { + if _, ok := state.commits[entry.Name()]; !ok { + command.AddArguments(path) + } + } + } else { + command = NewCommand("log", prettyLogFormat, "--name-only", + "-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath) } - return commitsInfo, nil + state.nextSearchSize += state.nextSearchSize + return command } diff --git a/tree_entry_test.go b/tree_entry_test.go new file mode 100644 index 000000000..a52692e67 --- /dev/null +++ b/tree_entry_test.go @@ -0,0 +1,64 @@ +// Copyright 2017 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package git + +import ( + "os" + "path/filepath" + "testing" + "time" +) + +const benchmarkReposDir = "benchmark/repos/" + +func setupGitRepo(url string, name string) (string, error) { + repoDir := filepath.Join(benchmarkReposDir, name) + if _, err := os.Stat(repoDir); err == nil { + return repoDir, nil + } + return repoDir, Clone(url, repoDir, CloneRepoOptions{ + Mirror: false, + Bare: false, + Quiet: true, + Timeout: 5 * time.Minute, + }) +} + +func BenchmarkEntries_GetCommitsInfo(b *testing.B) { + benchmarks := []struct { + url string + name string + }{ + {url: "https://github.com/go-gitea/gitea.git", name: "gitea"}, + {url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"}, + {url: "https://github.com/moby/moby.git", name: "moby"}, + {url: "https://github.com/golang/go.git", name: "go"}, + {url: "https://github.com/torvalds/linux.git", name: "linux"}, + } + for _, benchmark := range benchmarks { + b.StopTimer() + var commit *Commit + var entries Entries + if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil { + panic(err) + } else if repo, err := OpenRepository(repoPath); err != nil { + panic(err) + } else if commit, err = repo.GetBranchCommit("master"); err != nil { + panic(err) + } else if entries, err = commit.Tree.ListEntries(); err != nil { + panic(err) + } + entries.Sort() + b.StartTimer() + b.Run(benchmark.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, err := entries.GetCommitsInfo(commit, "") + if err != nil { + panic(err) + } + } + }) + } +}