Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change language statistics to save size instead of percentage (#11681) #11690

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions models/migrations/migrations.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ var migrations = []Migration{
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
// v139 -> v140
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
// v140 -> v141
NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
}

// GetCurrentDBVersion returns the current db version
Expand Down
56 changes: 56 additions & 0 deletions models/migrations/v140.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Copyright 2020 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package migrations

import (
"fmt"

"code.gitea.io/gitea/modules/setting"

"xorm.io/xorm"
)

func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
// LanguageStat see models/repo_language_stats.go
type LanguageStat struct {
Size int64 `xorm:"NOT NULL DEFAULT 0"`
}

// RepoIndexerType specifies the repository indexer type
type RepoIndexerType int

const (
// RepoIndexerTypeCode code indexer
RepoIndexerTypeCode RepoIndexerType = iota // 0
// RepoIndexerTypeStats repository stats indexer
RepoIndexerTypeStats // 1
)

// RepoIndexerStatus see models/repo_indexer.go
type RepoIndexerStatus struct {
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
}

if err := x.Sync2(new(LanguageStat)); err != nil {
return fmt.Errorf("Sync2: %v", err)
}

x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})

// Delete language stat statuses
truncExpr := "TRUNCATE TABLE"
if setting.Database.UseSQLite3 {
truncExpr = "DELETE FROM"
}

// Delete language stats
if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
return err
}

sess := x.NewSession()
defer sess.Close()
return dropTableColumns(sess, "language_stat", "percentage")
}
67 changes: 49 additions & 18 deletions models/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ type LanguageStat struct {
CommitID string
IsPrimary bool
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
Percentage float32 `xorm:"-"`
Size int64 `xorm:"NOT NULL DEFAULT 0"`
lafriks marked this conversation as resolved.
Show resolved Hide resolved
Color string `xorm:"-"`
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
}
Expand All @@ -34,12 +35,36 @@ func (stats LanguageStatList) loadAttributes() {
}
}

func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
langPerc := make(map[string]float32)
var otherPerc float32 = 100
var total int64

for _, stat := range stats {
total += stat.Size
}
if total > 0 {
for _, stat := range stats {
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
}
otherPerc -= perc
langPerc[stat.Language] = perc
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
}
if otherPerc > 0 {
langPerc["other"] = otherPerc
}
return langPerc
}

func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
stats := make(LanguageStatList, 0, 6)
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
return nil, err
}
stats.loadAttributes()
return stats, nil
}

Expand All @@ -54,13 +79,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
if err != nil {
return nil, err
}
perc := stats.getLanguagePercentages()
topstats := make(LanguageStatList, 0, limit)
var other float32
for i := range stats {
if _, ok := perc[stats[i].Language]; !ok {
continue
}
if stats[i].Language == "other" || len(topstats) >= limit {
other += stats[i].Percentage
other += perc[stats[i].Language]
continue
}
stats[i].Percentage = perc[stats[i].Language]
topstats = append(topstats, stats[i])
}
if other > 0 {
Expand All @@ -71,11 +101,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
Percentage: float32(math.Round(float64(other)*10) / 10),
})
}
topstats.loadAttributes()
return topstats, nil
}

// UpdateLanguageStats updates the language statistics for repository
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
sess := x.NewSession()
if err := sess.Begin(); err != nil {
return err
Expand All @@ -87,24 +118,24 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
return err
}
var topLang string
var p float32
for lang, perc := range stats {
if perc > p {
p = perc
var s int64
for lang, size := range stats {
if size > s {
s = size
topLang = strings.ToLower(lang)
}
}

for lang, perc := range stats {
for lang, size := range stats {
upd := false
llang := strings.ToLower(lang)
for _, s := range oldstats {
// Update already existing language
if strings.ToLower(s.Language) == llang {
s.CommitID = commitID
s.IsPrimary = llang == topLang
s.Percentage = perc
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
s.Size = size
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
return err
}
upd = true
Expand All @@ -114,11 +145,11 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
// Insert new language
if !upd {
if _, err := sess.Insert(&LanguageStat{
RepoID: repo.ID,
CommitID: commitID,
IsPrimary: llang == topLang,
Language: lang,
Percentage: perc,
RepoID: repo.ID,
CommitID: commitID,
IsPrimary: llang == topLang,
Language: lang,
Size: size,
}); err != nil {
return err
}
Expand Down Expand Up @@ -153,7 +184,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
return err
}
RepoLang := make(LanguageStatList, 0, 6)
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
return err
}
if len(RepoLang) > 0 {
Expand Down
46 changes: 28 additions & 18 deletions modules/git/repo_language_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"bytes"
"io"
"io/ioutil"
"math"

"code.gitea.io/gitea/modules/analyze"

Expand All @@ -20,8 +19,22 @@ import (

const fileSizeLimit int64 = 16 * 1024 * 1024

// specialLanguages defines list of languages that are excluded from the calculation
// unless they are the only language present in repository. Only languages which under
// normal circumstances are not considered to be code should be listed here.
var specialLanguages = []string{
"XML",
"JSON",
"TOML",
"YAML",
"INI",
"SVG",
"Text",
"Markdown",
}

// GetLanguageStats calculates language stats for git repository at specified commit
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
r, err := git.PlainOpen(repo.Path)
if err != nil {
return nil, err
Expand All @@ -43,9 +56,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
}

sizes := make(map[string]int64)
var total int64
err = tree.Files().ForEach(func(f *object.File) error {
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
return nil
}
Expand All @@ -63,30 +75,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
return nil
}

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if group != "" {
language = group
}

sizes[language] += f.Size
total += f.Size

return nil
})
if err != nil {
return nil, err
}

stats := make(map[string]float32)
var otherPerc float32 = 100
for language, size := range sizes {
perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
if perc <= 0.1 {
continue
// filter special languages unless they are the only language
if len(sizes) > 1 {
for _, language := range specialLanguages {
delete(sizes, language)
}
otherPerc -= perc
stats[language] = perc
}
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
if otherPerc > 0 {
stats["other"] = otherPerc
}
return stats, nil

return sizes, nil
}

func readFile(f *object.File, limit int64) ([]byte, error) {
Expand Down
7 changes: 4 additions & 3 deletions modules/indexer/stats/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ func TestRepoStatsIndex(t *testing.T) {

repo, err := models.GetRepositoryByID(1)
assert.NoError(t, err)
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
assert.NoError(t, err)
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
langs, err := repo.GetTopLanguageStats(5)
assert.NoError(t, err)
assert.Len(t, langs, 1)
assert.Equal(t, "other", langs[0].Language)
assert.Equal(t, float32(100), langs[0].Percentage)
assert.Empty(t, langs)
}