Skip to content

Commit

Permalink
Fix language stat calculation (go-gitea#11692)
Browse files Browse the repository at this point in the history
* Fix language stat calculation

* Group languages and ignore 0 size files

* remove unneeded code
  • Loading branch information
CirnoT authored and Yohann Delafollye committed Jul 31, 2020
1 parent 6c2b002 commit 6e23696
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 41 deletions.
35 changes: 1 addition & 34 deletions models/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,6 @@ type LanguageStat struct {
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}

// specialLanguages defines list of languages that are excluded from the calculation
// unless they are the only language present in repository. Only languages which under
// normal circumstances are not considered to be code should be listed here.
var specialLanguages = map[string]struct{}{
"XML": {},
"JSON": {},
"TOML": {},
"YAML": {},
"INI": {},
"SQL": {},
"SVG": {},
"Text": {},
"Markdown": {},
"other": {},
}

// LanguageStatList defines a list of language statistics
type LanguageStatList []*LanguageStat

Expand All @@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc := make(map[string]float32)
var otherPerc float32 = 100
var total int64
// Check that repository has at least one non-special language
var skipSpecial bool
for _, stat := range stats {
if _, ok := specialLanguages[stat.Language]; !ok {
skipSpecial = true
break
}
}

for _, stat := range stats {
// Exclude specific languages from percentage calculation
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
continue
}
total += stat.Size
}
if total > 0 {
for _, stat := range stats {
// Exclude specific languages from percentage calculation
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
continue
}
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
Expand All @@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc[stat.Language] = perc
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
} else {
otherPerc = 100
}
if otherPerc > 0 {
langPerc["other"] = otherPerc
Expand Down
31 changes: 27 additions & 4 deletions modules/git/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,20 @@ import (

const fileSizeLimit int64 = 16 * 1024 * 1024

// specialLanguages defines list of languages that are excluded from the calculation
// unless they are the only language present in repository. Only languages which under
// normal circumstances are not considered to be code should be listed here.
var specialLanguages = []string{
"XML",
"JSON",
"TOML",
"YAML",
"INI",
"SVG",
"Text",
"Markdown",
}

// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
Expand All @@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

sizes := make(map[string]int64)
err = tree.Files().ForEach(func(f *object.File) error {
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
Expand All @@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
language = "other"
return nil
}

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if group != "" {
language = group
}

sizes[language] += f.Size
Expand All @@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}

if len(sizes) == 0 {
sizes["other"] = 0
// filter special languages unless they are the only language
if len(sizes) > 1 {
for _, language := range specialLanguages {
delete(sizes, language)
}
}

return sizes, nil
Expand Down
4 changes: 1 addition & 3 deletions modules/indexer/stats/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) {
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
assert.Len(t, langs, 1)
assert.Equal(t, "other", langs[0].Language)
assert.Equal(t, float32(100), langs[0].Percentage)
assert.Empty(t, langs)
}

0 comments on commit 6e23696

Please sign in to comment.