Skip to content
This repository has been archived by the owner on Apr 12, 2019. It is now read-only.

Commit

Permalink
Faster implementation of GetCommitsInfo
Browse files Browse the repository at this point in the history
  • Loading branch information
ethantkoenig committed May 25, 2017
1 parent f0a094c commit ab9e103
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 95 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ _testmain.go
*.prof

coverage.out

benchmark_repos/
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
IMPORT := code.gitea.io/git

PACKAGES ?= $(shell go list ./... | grep -v /vendor/)
PACKAGES ?= $(shell go list ./... | grep -v /vendor/ | grep -v /benchmark_repos/)
GENERATE ?= code.gitea.io/git

.PHONY: all
Expand All @@ -18,7 +18,7 @@ generate:

.PHONY: fmt
fmt:
find . -name "*.go" -type f -not -path "./vendor/*" | xargs gofmt -s -w
find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark_repos/*" | xargs gofmt -s -w

.PHONY: vet
vet:
Expand All @@ -35,6 +35,10 @@ lint:
test:
for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done;

.PHONY: bench
bench:
go test -run=XXXXXX -benchtime=10s -bench=. || exit 1

.PHONY: build
build:
go build .
221 changes: 128 additions & 93 deletions tree_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
package git

import (
"fmt"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -147,112 +144,150 @@ func (tes Entries) Sort() {
sort.Sort(tes)
}

type commitInfo struct {
entryName string
infos []interface{}
err error
// getCommitInfoState transient state for getting commit info for entries
type getCommitInfoState struct {
entries map[string]*TreeEntry // map from filepath to entry
commits map[string]*Commit // map from entry name to commit
lastCommitHash string
lastCommit *Commit
treePath string
headCommit *Commit
nextSearchSize int // next number of commits to search for
}

// GetCommitsInfo takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. This method will automatically
// choose the right number of goroutine (concurrency) to use related of the host CPU.
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState {
entriesByPath := make(map[string]*TreeEntry, len(entries))
for _, entry := range entries {
entriesByPath[filepath.Join(treePath, entry.Name())] = entry
}
return &getCommitInfoState{
entries: entriesByPath,
commits: make(map[string]*Commit, len(entriesByPath)),
treePath: treePath,
headCommit: headCommit,
nextSearchSize: 16,
}
}

// GetCommitsInfo gets information of all commits that are corresponding to these entries
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0)
state := initGetCommitInfoState(tes, commit, treePath)
if err := getCommitsInfo(state); err != nil {
return nil, err
}

commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
commit = state.commits[filepath.Join(treePath, entry.Name())]
switch entry.Type {
case ObjectCommit:
subModuleURL := ""
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
return nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
default:
commitsInfo[i] = []interface{}{entry, commit}
}
}
return commitsInfo, nil
}

// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or
// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the
// host CPU.
func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) {
if len(tes) == 0 {
return nil, nil
func (state *getCommitInfoState) nextCommit(hash string) {
state.lastCommitHash = hash
state.lastCommit = nil
}

func (state *getCommitInfoState) commit() (*Commit, error) {
var err error = nil
if state.lastCommit == nil {
state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash)
}
return state.lastCommit, err
}

if maxConcurrency <= 0 {
maxConcurrency = runtime.NumCPU()
func (state *getCommitInfoState) update(path string) error {
relPath, err := filepath.Rel(state.treePath, path)
if err != nil {
return nil
}
var entryPath string
if index := strings.IndexRune(relPath, '/'); index >= 0 {
entryPath = filepath.Join(state.treePath, relPath[:index])
} else {
entryPath = path
}
if _, ok := state.entries[entryPath]; !ok {
return nil
} else if _, ok := state.commits[entryPath]; ok {
return nil
} else if commit, err := state.commit(); err != nil {
return err
} else {
state.commits[entryPath] = commit
}
return nil
}

// Length of taskChan determines how many goroutines (subprocesses) can run at the same time.
// The length of revChan should be same as taskChan so goroutines whoever finished job can
// exit as early as possible, only store data inside channel.
taskChan := make(chan bool, maxConcurrency)
revChan := make(chan commitInfo, maxConcurrency)
doneChan := make(chan error)

// Receive loop will exit when it collects same number of data pieces as tree entries.
// It notifies doneChan before exits or notify early with possible error.
infoMap := make(map[string][]interface{}, len(tes))
go func() {
i := 0
for info := range revChan {
if info.err != nil {
doneChan <- info.err
return
}
func getCommitsInfo(state *getCommitInfoState) error {
for len(state.entries) > len(state.commits) {
if err := getNextCommitInfos(state); err != nil {
return err
}
}
return nil
}

infoMap[info.entryName] = info.infos
i++
if i == len(tes) {
func getNextCommitInfos(state *getCommitInfoState) error {
logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path)
if err != nil {
return err
}
lines := strings.Split(logOutput, "\n")
i := 0
for i < len(lines) {
state.nextCommit(lines[i])
i++
for ; i < len(lines); i++ {
path := lines[i]
if path == "" {
break
}
state.update(path)
}
doneChan <- nil
}()

for i := range tes {
// When taskChan is idle (or has empty slots), put operation will not block.
// However when taskChan is full, code will block and wait any running goroutines to finish.
taskChan <- true

if tes[i].Type != ObjectCommit {
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], c}
}
revChan <- cinfo
<-taskChan // Clear one slot from taskChan to allow new goroutines to start.
}(i)
continue
i++ // skip blank line
if len(state.entries) == len(state.commits) {
break
}

// Handle submodule
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
if err != nil && !IsErrNotExist(err) {
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
revChan <- cinfo
return
}

smURL := ""
if sm != nil {
smURL = sm.URL
}

c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())}
}
revChan <- cinfo
<-taskChan
}(i)
}
return nil
}

if err := <-doneChan; err != nil {
return nil, err
func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command {
var commitHash string
if len(exclusiveStartHash) == 0 {
commitHash = "HEAD"
} else {
commitHash = exclusiveStartHash + "^"
}

commitsInfo := make([][]interface{}, len(tes))
for i := 0; i < len(tes); i++ {
commitsInfo[i] = infoMap[tes[i].Name()]
var command *Command
numRemainingEntries := len(state.entries) - len(state.commits)
if numRemainingEntries < 32 {
searchSize := (numRemainingEntries + 1) / 2
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(searchSize), commitHash, "--")
for path, entry := range state.entries {
if _, ok := state.commits[entry.Name()]; !ok {
command.AddArguments(path)
}
}
} else {
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath)
}
return commitsInfo, nil
state.nextSearchSize += state.nextSearchSize
return command
}
62 changes: 62 additions & 0 deletions tree_entry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package git

import (
"os"
"path/filepath"
"testing"
"time"
)

const benchmarkReposDir = "benchmark_repos/"

func setupGitRepo(url string, name string) (string, error) {
repoDir := filepath.Join(benchmarkReposDir, name)
if _, err := os.Stat(repoDir); err == nil {
return repoDir, nil
}
return repoDir, Clone(url, repoDir, CloneRepoOptions{
Mirror: false,
Bare: false,
Quiet: true,
Timeout: 5 * time.Minute,
})
}

func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
benchmarks := []struct {
url string
name string
}{
{url: "https://github.com/go-gitea/gitea.git", name: "gitea"},
{url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"},
{url: "https://github.com/moby/moby.git", name: "moby"},
{url: "https://github.com/golang/go.git", name: "go"},
{url: "https://github.com/torvalds/linux.git", name: "linux"},
}
for _, benchmark := range benchmarks {
var commit *Commit
var entries Entries
if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil {
panic(err)
} else if repo, err := OpenRepository(repoPath); err != nil {
panic(err)
} else if commit, err = repo.GetBranchCommit("master"); err != nil {
panic(err)
} else if entries, err = commit.Tree.ListEntries(); err != nil {
panic(err)
}
entries.Sort()
b.Run(benchmark.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := entries.GetCommitsInfo(commit, "")
if err != nil {
panic(err)
}
}
})
}
}

0 comments on commit ab9e103

Please sign in to comment.