Skip to content
This repository has been archived by the owner on Apr 12, 2019. It is now read-only.

Faster commit lookup #53

Merged
merged 8 commits into from
May 26, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,5 @@ _testmain.go
*.prof

coverage.out

benchmark/
8 changes: 6 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
IMPORT := code.gitea.io/git

PACKAGES ?= $(shell go list ./... | grep -v /vendor/)
PACKAGES ?= $(shell go list -e ./... | grep -v /vendor/ | grep -v /benchmark/)
GENERATE ?= code.gitea.io/git

.PHONY: all
Expand All @@ -18,7 +18,7 @@ generate:

.PHONY: fmt
fmt:
find . -name "*.go" -type f -not -path "./vendor/*" | xargs gofmt -s -w
find . -name "*.go" -type f -not -path "./vendor/*" -not -path "./benchmark/*" | xargs gofmt -s -w

.PHONY: vet
vet:
Expand All @@ -35,6 +35,10 @@ lint:
test:
for PKG in $(PACKAGES); do go test -cover -coverprofile $$GOPATH/src/$$PKG/coverage.out $$PKG || exit 1; done;

.PHONY: bench
bench:
go test -run=XXXXXX -benchtime=10s -bench=. || exit 1

.PHONY: build
build:
go build .
218 changes: 125 additions & 93 deletions tree_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
package git

import (
"fmt"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -147,112 +144,147 @@ func (tes Entries) Sort() {
sort.Sort(tes)
}

type commitInfo struct {
entryName string
infos []interface{}
err error
// getCommitInfoState transient state for getting commit info for entries
type getCommitInfoState struct {
entries map[string]*TreeEntry // map from filepath to entry
commits map[string]*Commit // map from entry name to commit
lastCommitHash string
lastCommit *Commit
treePath string
headCommit *Commit
nextSearchSize int // next number of commits to search for
}

// GetCommitsInfo takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. This method will automatically
// choose the right number of goroutine (concurrency) to use related of the host CPU.
func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState {
entriesByPath := make(map[string]*TreeEntry, len(entries))
for _, entry := range entries {
entriesByPath[filepath.Join(treePath, entry.Name())] = entry
}
return &getCommitInfoState{
entries: entriesByPath,
commits: make(map[string]*Commit, len(entriesByPath)),
treePath: treePath,
headCommit: headCommit,
nextSearchSize: 16,
}
}

// GetCommitsInfo gets information of all commits that are corresponding to these entries
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0)
state := initGetCommitInfoState(tes, commit, treePath)
if err := getCommitsInfo(state); err != nil {
return nil, err
}

commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
commit = state.commits[filepath.Join(treePath, entry.Name())]
switch entry.Type {
case ObjectCommit:
subModuleURL := ""
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
return nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
default:
commitsInfo[i] = []interface{}{entry, commit}
}
}
return commitsInfo, nil
}

// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or
// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the
// host CPU.
func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) {
if len(tes) == 0 {
return nil, nil
func (state *getCommitInfoState) nextCommit(hash string) {
state.lastCommitHash = hash
state.lastCommit = nil
}

func (state *getCommitInfoState) commit() (*Commit, error) {
var err error
if state.lastCommit == nil {
state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash)
}
return state.lastCommit, err
}

if maxConcurrency <= 0 {
maxConcurrency = runtime.NumCPU()
func (state *getCommitInfoState) update(path string) error {
relPath, err := filepath.Rel(state.treePath, path)
if err != nil {
return nil
}
var entryPath string
if index := strings.IndexRune(relPath, '/'); index >= 0 {
entryPath = filepath.Join(state.treePath, relPath[:index])
} else {
entryPath = path
}
if _, ok := state.entries[entryPath]; !ok {
return nil
} else if _, ok := state.commits[entryPath]; ok {
return nil
}
state.commits[entryPath], err = state.commit()
return err
}

// Length of taskChan determines how many goroutines (subprocesses) can run at the same time.
// The length of revChan should be same as taskChan so goroutines whoever finished job can
// exit as early as possible, only store data inside channel.
taskChan := make(chan bool, maxConcurrency)
revChan := make(chan commitInfo, maxConcurrency)
doneChan := make(chan error)

// Receive loop will exit when it collects same number of data pieces as tree entries.
// It notifies doneChan before exits or notify early with possible error.
infoMap := make(map[string][]interface{}, len(tes))
go func() {
i := 0
for info := range revChan {
if info.err != nil {
doneChan <- info.err
return
}
func getCommitsInfo(state *getCommitInfoState) error {
for len(state.entries) > len(state.commits) {
if err := getNextCommitInfos(state); err != nil {
return err
}
}
return nil
}

infoMap[info.entryName] = info.infos
i++
if i == len(tes) {
func getNextCommitInfos(state *getCommitInfoState) error {
logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path)
if err != nil {
return err
}
lines := strings.Split(logOutput, "\n")
i := 0
for i < len(lines) {
state.nextCommit(lines[i])
i++
for ; i < len(lines); i++ {
path := lines[i]
if path == "" {
break
}
state.update(path)
}
doneChan <- nil
}()

for i := range tes {
// When taskChan is idle (or has empty slots), put operation will not block.
// However when taskChan is full, code will block and wait any running goroutines to finish.
taskChan <- true

if tes[i].Type != ObjectCommit {
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], c}
}
revChan <- cinfo
<-taskChan // Clear one slot from taskChan to allow new goroutines to start.
}(i)
continue
i++ // skip blank line
if len(state.entries) == len(state.commits) {
break
}

// Handle submodule
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
if err != nil && !IsErrNotExist(err) {
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
revChan <- cinfo
return
}

smURL := ""
if sm != nil {
smURL = sm.URL
}

c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())}
}
revChan <- cinfo
<-taskChan
}(i)
}
return nil
}

if err := <-doneChan; err != nil {
return nil, err
func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command {
var commitHash string
if len(exclusiveStartHash) == 0 {
commitHash = "HEAD"
} else {
commitHash = exclusiveStartHash + "^"
}

commitsInfo := make([][]interface{}, len(tes))
for i := 0; i < len(tes); i++ {
commitsInfo[i] = infoMap[tes[i].Name()]
var command *Command
numRemainingEntries := len(state.entries) - len(state.commits)
if numRemainingEntries < 32 {
searchSize := (numRemainingEntries + 1) / 2
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(searchSize), commitHash, "--")
for path, entry := range state.entries {
if _, ok := state.commits[entry.Name()]; !ok {
command.AddArguments(path)
}
}
} else {
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath)
}
return commitsInfo, nil
state.nextSearchSize += state.nextSearchSize
return command
}
64 changes: 64 additions & 0 deletions tree_entry_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package git

import (
"os"
"path/filepath"
"testing"
"time"
)

const benchmarkReposDir = "benchmark/repos/"

func setupGitRepo(url string, name string) (string, error) {
repoDir := filepath.Join(benchmarkReposDir, name)
if _, err := os.Stat(repoDir); err == nil {
return repoDir, nil
}
return repoDir, Clone(url, repoDir, CloneRepoOptions{
Mirror: false,
Bare: false,
Quiet: true,
Timeout: 5 * time.Minute,
})
}

func BenchmarkEntries_GetCommitsInfo(b *testing.B) {
benchmarks := []struct {
url string
name string
}{
{url: "https://github.com/go-gitea/gitea.git", name: "gitea"},
{url: "https://github.com/ethantkoenig/manyfiles.git", name: "manyfiles"},
{url: "https://github.com/moby/moby.git", name: "moby"},
{url: "https://github.com/golang/go.git", name: "go"},
{url: "https://github.com/torvalds/linux.git", name: "linux"},
}
for _, benchmark := range benchmarks {
b.StopTimer()
var commit *Commit
var entries Entries
if repoPath, err := setupGitRepo(benchmark.url, benchmark.name); err != nil {
panic(err)
} else if repo, err := OpenRepository(repoPath); err != nil {
panic(err)
} else if commit, err = repo.GetBranchCommit("master"); err != nil {
panic(err)
} else if entries, err = commit.Tree.ListEntries(); err != nil {
panic(err)
}
entries.Sort()
b.StartTimer()
b.Run(benchmark.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err := entries.GetCommitsInfo(commit, "")
if err != nil {
panic(err)
}
}
})
}
}