Skip to content

Commit

Permalink
Revert to old implementation of GetCommitsInfo (go-gitea#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
ethantkoenig authored and lunny committed Jun 28, 2017
1 parent 7546898 commit 7c4fc4e
Showing 1 changed file with 90 additions and 135 deletions.
225 changes: 90 additions & 135 deletions tree_entry.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ package git
import (
"fmt"
"path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -145,159 +147,112 @@ func (tes Entries) Sort() {
sort.Sort(tes)
}

// getCommitInfoState transient state for getting commit info for entries
type getCommitInfoState struct {
entries map[string]*TreeEntry // map from filepath to entry
commits map[string]*Commit // map from filepath to commit
lastCommitHash string
lastCommit *Commit
treePath string
headCommit *Commit
nextSearchSize int // next number of commits to search for
type commitInfo struct {
entryName string
infos []interface{}
err error
}

func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitInfoState {
entriesByPath := make(map[string]*TreeEntry, len(entries))
for _, entry := range entries {
entriesByPath[path.Join(treePath, entry.Name())] = entry
}
if treePath = path.Clean(treePath); treePath == "." {
treePath = ""
}
return &getCommitInfoState{
entries: entriesByPath,
commits: make(map[string]*Commit, len(entriesByPath)),
treePath: treePath,
headCommit: headCommit,
nextSearchSize: 16,
}
}

// GetCommitsInfo gets information of all commits that are corresponding to these entries
// GetCommitsInfo takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. This method will automatically
// choose the right number of goroutine (concurrency) to use related of the host CPU.
func (tes Entries) GetCommitsInfo(commit *Commit, treePath string) ([][]interface{}, error) {
state := initGetCommitInfoState(tes, commit, treePath)
if err := getCommitsInfo(state); err != nil {
return nil, err
}

commitsInfo := make([][]interface{}, len(tes))
for i, entry := range tes {
commit = state.commits[path.Join(treePath, entry.Name())]
switch entry.Type {
case ObjectCommit:
subModuleURL := ""
if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
return nil, err
} else if subModule != nil {
subModuleURL = subModule.URL
}
subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
commitsInfo[i] = []interface{}{entry, subModuleFile}
default:
commitsInfo[i] = []interface{}{entry, commit}
}
}
return commitsInfo, nil
}

func (state *getCommitInfoState) nextCommit(hash string) {
state.lastCommitHash = hash
state.lastCommit = nil
return tes.GetCommitsInfoWithCustomConcurrency(commit, treePath, 0)
}

func (state *getCommitInfoState) commit() (*Commit, error) {
var err error
if state.lastCommit == nil {
state.lastCommit, err = state.headCommit.repo.GetCommit(state.lastCommitHash)
// GetCommitsInfoWithCustomConcurrency takes advantages of concurrency to speed up getting information
// of all commits that are corresponding to these entries. If the given maxConcurrency is negative or
// equal to zero: the right number of goroutine (concurrency) to use will be chosen related of the
// host CPU.
func (tes Entries) GetCommitsInfoWithCustomConcurrency(commit *Commit, treePath string, maxConcurrency int) ([][]interface{}, error) {
if len(tes) == 0 {
return nil, nil
}
return state.lastCommit, err
}

func (state *getCommitInfoState) update(entryPath string) error {
var entryNameStartIndex int
if len(state.treePath) > 0 {
entryNameStartIndex = len(state.treePath) + 1
if maxConcurrency <= 0 {
maxConcurrency = runtime.NumCPU()
}

if index := strings.IndexByte(entryPath[entryNameStartIndex:], '/'); index >= 0 {
entryPath = entryPath[:entryNameStartIndex+index]
}

if _, ok := state.entries[entryPath]; !ok {
return nil
} else if _, ok := state.commits[entryPath]; ok {
return nil
}

var err error
state.commits[entryPath], err = state.commit()
return err
}

func getCommitsInfo(state *getCommitInfoState) error {
for len(state.entries) > len(state.commits) {
if err := getNextCommitInfos(state); err != nil {
return err
}
}
return nil
}
// Length of taskChan determines how many goroutines (subprocesses) can run at the same time.
// The length of revChan should be same as taskChan so goroutines whoever finished job can
// exit as early as possible, only store data inside channel.
taskChan := make(chan bool, maxConcurrency)
revChan := make(chan commitInfo, maxConcurrency)
doneChan := make(chan error)

// Receive loop will exit when it collects same number of data pieces as tree entries.
// It notifies doneChan before exits or notify early with possible error.
infoMap := make(map[string][]interface{}, len(tes))
go func() {
i := 0
for info := range revChan {
if info.err != nil {
doneChan <- info.err
return
}

func getNextCommitInfos(state *getCommitInfoState) error {
logOutput, err := logCommand(state.lastCommitHash, state).RunInDir(state.headCommit.repo.Path)
if err != nil {
return err
}
lines := strings.Split(logOutput, "\n")
i := 0
for i < len(lines) {
state.nextCommit(lines[i])
i++
for ; i < len(lines); i++ {
entryPath := lines[i]
if entryPath == "" {
infoMap[info.entryName] = info.infos
i++
if i == len(tes) {
break
}
if entryPath[0] == '"' {
entryPath, err = strconv.Unquote(entryPath)
}
doneChan <- nil
}()

for i := range tes {
// When taskChan is idle (or has empty slots), put operation will not block.
// However when taskChan is full, code will block and wait any running goroutines to finish.
taskChan <- true

if tes[i].Type != ObjectCommit {
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
return fmt.Errorf("Unquote: %v", err)
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], c}
}
revChan <- cinfo
<-taskChan // Clear one slot from taskChan to allow new goroutines to start.
}(i)
continue
}

// Handle submodule
go func(i int) {
cinfo := commitInfo{entryName: tes[i].Name()}
sm, err := commit.GetSubModule(path.Join(treePath, tes[i].Name()))
if err != nil && !IsErrNotExist(err) {
cinfo.err = fmt.Errorf("GetSubModule (%s/%s): %v", treePath, tes[i].Name(), err)
revChan <- cinfo
return
}
if err = state.update(entryPath); err != nil {
return err

smURL := ""
if sm != nil {
smURL = sm.URL
}
}
i++ // skip blank line
if len(state.entries) == len(state.commits) {
break
}

c, err := commit.GetCommitByPath(filepath.Join(treePath, tes[i].Name()))
if err != nil {
cinfo.err = fmt.Errorf("GetCommitByPath (%s/%s): %v", treePath, tes[i].Name(), err)
} else {
cinfo.infos = []interface{}{tes[i], NewSubModuleFile(c, smURL, tes[i].ID.String())}
}
revChan <- cinfo
<-taskChan
}(i)
}
return nil
}

func logCommand(exclusiveStartHash string, state *getCommitInfoState) *Command {
var commitHash string
if len(exclusiveStartHash) == 0 {
commitHash = state.headCommit.ID.String()
} else {
commitHash = exclusiveStartHash + "^"
if err := <-doneChan; err != nil {
return nil, err
}
var command *Command
numRemainingEntries := len(state.entries) - len(state.commits)
if numRemainingEntries < 32 {
searchSize := (numRemainingEntries + 1) / 2
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(searchSize), commitHash, "--")
for entryPath := range state.entries {
if _, ok := state.commits[entryPath]; !ok {
command.AddArguments(entryPath)
}
}
} else {
command = NewCommand("log", prettyLogFormat, "--name-only",
"-"+strconv.Itoa(state.nextSearchSize), commitHash, "--", state.treePath)

commitsInfo := make([][]interface{}, len(tes))
for i := 0; i < len(tes); i++ {
commitsInfo[i] = infoMap[tes[i].Name()]
}
state.nextSearchSize += state.nextSearchSize
return command
return commitsInfo, nil
}

0 comments on commit 7c4fc4e

Please sign in to comment.