Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Search for issues/pulls #530

Merged
merged 1 commit into from
Jan 25, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@ coverage.out
/dist
/custom
/data
/indexers
/log
/public/img/avatar
4 changes: 4 additions & 0 deletions conf/app.ini
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ SSL_MODE = disable
; For "sqlite3" and "tidb", use absolute path when you start as service
PATH = data/gitea.db

[indexer]
ISSUE_INDEXER_PATH = indexers/issues.bleve
UPDATE_BUFFER_LEN = 20

[admin]

[security]
Expand Down
74 changes: 60 additions & 14 deletions models/issue.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
)

var (
Expand Down Expand Up @@ -451,8 +452,11 @@ func (issue *Issue) ReadBy(userID int64) error {
}

func updateIssueCols(e Engine, issue *Issue, cols ...string) error {
_, err := e.Id(issue.ID).Cols(cols...).Update(issue)
return err
if _, err := e.Id(issue.ID).Cols(cols...).Update(issue); err != nil {
return err
}
UpdateIssueIndexer(issue)
return nil
}

// UpdateIssueCols only updates values of specific columns for given issue.
Expand Down Expand Up @@ -733,6 +737,8 @@ func newIssue(e *xorm.Session, opts NewIssueOptions) (err error) {
return err
}

UpdateIssueIndexer(opts.Issue)

if len(opts.Attachments) > 0 {
attachments, err := getAttachmentsByUUIDs(e, opts.Attachments)
if err != nil {
Expand Down Expand Up @@ -865,10 +871,11 @@ type IssuesOptions struct {
MilestoneID int64
RepoIDs []int64
Page int
IsClosed bool
IsPull bool
IsClosed util.OptionalBool
IsPull util.OptionalBool
Labels string
SortType string
IssueIDs []int64
}

// sortIssuesSession sort an issues-related session based on the provided
Expand All @@ -894,19 +901,37 @@ func sortIssuesSession(sess *xorm.Session, sortType string) {

// Issues returns a list of issues by given conditions.
func Issues(opts *IssuesOptions) ([]*Issue, error) {
if opts.Page <= 0 {
opts.Page = 1
var sess *xorm.Session
if opts.Page >= 0 {
var start int
if opts.Page == 0 {
start = 0
} else {
start = (opts.Page - 1) * setting.UI.IssuePagingNum
}
sess = x.Limit(setting.UI.IssuePagingNum, start)
} else {
sess = x.NewSession()
defer sess.Close()
}

sess := x.Limit(setting.UI.IssuePagingNum, (opts.Page-1)*setting.UI.IssuePagingNum)
if len(opts.IssueIDs) > 0 {
sess.In("issue.id", opts.IssueIDs)
}

if opts.RepoID > 0 {
sess.And("issue.repo_id=?", opts.RepoID)
} else if len(opts.RepoIDs) > 0 {
// In case repository IDs are provided but actually no repository has issue.
sess.In("issue.repo_id", opts.RepoIDs)
}
sess.And("issue.is_closed=?", opts.IsClosed)

switch opts.IsClosed {
case util.OptionalBoolTrue:
sess.And("issue.is_closed=true")
case util.OptionalBoolFalse:
sess.And("issue.is_closed=false")
}

if opts.AssigneeID > 0 {
sess.And("issue.assignee_id=?", opts.AssigneeID)
Expand All @@ -926,7 +951,12 @@ func Issues(opts *IssuesOptions) ([]*Issue, error) {
sess.And("issue.milestone_id=?", opts.MilestoneID)
}

sess.And("issue.is_pull=?", opts.IsPull)
switch opts.IsPull {
case util.OptionalBoolTrue:
sess.And("issue.is_pull=true")
case util.OptionalBoolFalse:
sess.And("issue.is_pull=false")
}

sortIssuesSession(sess, opts.SortType)

Expand Down Expand Up @@ -1168,17 +1198,22 @@ type IssueStatsOptions struct {
MentionedID int64
PosterID int64
IsPull bool
IssueIDs []int64
}

// GetIssueStats returns issue statistic information by given conditions.
func GetIssueStats(opts *IssueStatsOptions) *IssueStats {
func GetIssueStats(opts *IssueStatsOptions) (*IssueStats, error) {
stats := &IssueStats{}

countSession := func(opts *IssueStatsOptions) *xorm.Session {
sess := x.
Where("issue.repo_id = ?", opts.RepoID).
And("is_pull = ?", opts.IsPull)

if len(opts.IssueIDs) > 0 {
sess.In("issue.id", opts.IssueIDs)
}

if len(opts.Labels) > 0 && opts.Labels != "0" {
labelIDs, err := base.StringsToInt64s(strings.Split(opts.Labels, ","))
if err != nil {
Expand Down Expand Up @@ -1210,13 +1245,20 @@ func GetIssueStats(opts *IssueStatsOptions) *IssueStats {
return sess
}

stats.OpenCount, _ = countSession(opts).
var err error
stats.OpenCount, err = countSession(opts).
And("is_closed = ?", false).
Count(&Issue{})
stats.ClosedCount, _ = countSession(opts).
if err != nil {
return nil, err
}
stats.ClosedCount, err = countSession(opts).
And("is_closed = ?", true).
Count(&Issue{})
return stats
if err != nil {
return nil, err
}
return stats, nil
}

// GetUserIssueStats returns issue statistic information for dashboard by given conditions.
Expand Down Expand Up @@ -1294,7 +1336,11 @@ func GetRepoIssueStats(repoID, uid int64, filterMode int, isPull bool) (numOpen

func updateIssue(e Engine, issue *Issue) error {
_, err := e.Id(issue.ID).AllCols().Update(issue)
return err
if err != nil {
return err
}
UpdateIssueIndexer(issue)
return nil
}

// UpdateIssue updates all fields of given issue.
Expand Down
18 changes: 5 additions & 13 deletions models/issue_comment.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,28 +454,20 @@ func UpdateComment(c *Comment) error {
return err
}

// DeleteCommentByID deletes the comment by given ID.
func DeleteCommentByID(id int64) error {
comment, err := GetCommentByID(id)
if err != nil {
if IsErrCommentNotExist(err) {
return nil
}
return err
}

// DeleteComment deletes the comment
func DeleteComment(comment *Comment) error {
sess := x.NewSession()
defer sessionRelease(sess)
if err = sess.Begin(); err != nil {
if err := sess.Begin(); err != nil {
return err
}

if _, err = sess.Id(comment.ID).Delete(new(Comment)); err != nil {
if _, err := sess.Id(comment.ID).Delete(new(Comment)); err != nil {
return err
}

if comment.Type == CommentTypeComment {
if _, err = sess.Exec("UPDATE `issue` SET num_comments = num_comments - 1 WHERE id = ?", comment.IssueID); err != nil {
if _, err := sess.Exec("UPDATE `issue` SET num_comments = num_comments - 1 WHERE id = ?", comment.IssueID); err != nil {
return err
}
}
Expand Down
183 changes: 183 additions & 0 deletions models/issue_indexer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// Copyright 2017 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package models

import (
"fmt"
"os"
"strconv"
"strings"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/blevesearch/bleve"
"github.com/blevesearch/bleve/analysis/analyzer/simple"
"github.com/blevesearch/bleve/search/query"
)

// issueIndexerUpdateQueue queue of issues that need to be updated in the issues
// indexer
var issueIndexerUpdateQueue chan *Issue

// issueIndexer (thread-safe) index for searching issues
var issueIndexer bleve.Index

// issueIndexerData data stored in the issue indexer
type issueIndexerData struct {
ID int64
RepoID int64

Title string
Content string
}

// numericQuery an numeric-equality query for the given value and field
func numericQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}

// SearchIssuesByKeyword searches for issues by given conditions.
// Returns the matching issue IDs
func SearchIssuesByKeyword(repoID int64, keyword string) ([]int64, error) {
fields := strings.Fields(strings.ToLower(keyword))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"fields" should actually be "terms", API: NewPhraseQuery(terms []string, field string)

Copy link
Member Author

@ethantkoenig ethantkoenig Feb 24, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The variable name has since been updated to terms (#1031)

indexerQuery := bleve.NewConjunctionQuery(
numericQuery(repoID, "RepoID"),
bleve.NewDisjunctionQuery(
bleve.NewPhraseQuery(fields, "Title"),
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At least we should have a Match Phrase Query, if not a Match Query or best a Query String Query.

For both fields "Title" and "Content".

bleve.NewPhraseQuery(fields, "Content"),
))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comments should be indexed, too, and queried here.

search := bleve.NewSearchRequestOptions(indexerQuery, 2147483647, 0, false)
search.Fields = []string{"ID"}

result, err := issueIndexer.Search(search)
if err != nil {
return nil, err
}

issueIDs := make([]int64, len(result.Hits))
for i, hit := range result.Hits {
issueIDs[i] = int64(hit.Fields["ID"].(float64))
}
return issueIDs, nil
}

// InitIssueIndexer initialize issue indexer
func InitIssueIndexer() {
_, err := os.Stat(setting.Indexer.IssuePath)
if err != nil {
if os.IsNotExist(err) {
if err = createIssueIndexer(); err != nil {
log.Fatal(4, "CreateIssuesIndexer: %v", err)
}
if err = populateIssueIndexer(); err != nil {
log.Fatal(4, "PopulateIssuesIndex: %v", err)
}
} else {
log.Fatal(4, "InitIssuesIndexer: %v", err)
}
} else {
issueIndexer, err = bleve.Open(setting.Indexer.IssuePath)
if err != nil {
log.Fatal(4, "InitIssuesIndexer, open index: %v", err)
}
}
issueIndexerUpdateQueue = make(chan *Issue, setting.Indexer.UpdateQueueLength)
go processIssueIndexerUpdateQueue()
// TODO close issueIndexer when Gitea closes
}

// createIssueIndexer create an issue indexer if one does not already exist
func createIssueIndexer() error {
mapping := bleve.NewIndexMapping()
docMapping := bleve.NewDocumentMapping()

docMapping.AddFieldMappingsAt("ID", bleve.NewNumericFieldMapping())
docMapping.AddFieldMappingsAt("RepoID", bleve.NewNumericFieldMapping())

textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = simple.Name
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Issue comments should also be indexed here.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More token filters should be added (http://www.blevesearch.com/docs/Token-Filters/), to allow partial matches. My suggestion:

  • Short n-grams to allow partial matches (an issue tracker won't work without partial matching)
  • Strong Unicode normalizing (because of diacritical letters)
  • Multi-lingual stemming

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sha-red exactly. Would you have time to send some PRs to help improve it?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lunny Basically I'd love to help with actual code, with pleasure, but because I'd have to set up a whole Go development environment and lot's of other work to do, this will take some time to get everything running.

Please also have a look at our minimalistic issue search path for gogs: gogs/gogs#4015

IMHO whole phrase search is better than nothing, but at least comment indexing should be added before releasing.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can improve the search in v1.2, comment index should be added in that release cycle and of course the UI should be changed for keyword highlight.

mapping.AddDocumentMapping("issues", docMapping)

var err error
issueIndexer, err = bleve.New(setting.Indexer.IssuePath, mapping)
return err
}

// populateIssueIndexer populate the issue indexer with issue data
func populateIssueIndexer() error {
for page := 1; ; page++ {
repos, err := Repositories(&SearchRepoOptions{
Page: page,
PageSize: 10,
})
if err != nil {
return fmt.Errorf("Repositories: %v", err)
}
if len(repos) == 0 {
return nil
}
batch := issueIndexer.NewBatch()
for _, repo := range repos {
issues, err := Issues(&IssuesOptions{
RepoID: repo.ID,
IsClosed: util.OptionalBoolNone,
IsPull: util.OptionalBoolNone,
Page: -1, // do not page
})
if err != nil {
return fmt.Errorf("Issues: %v", err)
}
for _, issue := range issues {
err = batch.Index(issue.indexUID(), issue.issueData())
if err != nil {
return fmt.Errorf("batch.Index: %v", err)
}
}
}
if err = issueIndexer.Batch(batch); err != nil {
return fmt.Errorf("index.Batch: %v", err)
}
}
}

func processIssueIndexerUpdateQueue() {
for {
select {
case issue := <-issueIndexerUpdateQueue:
if err := issueIndexer.Index(issue.indexUID(), issue.issueData()); err != nil {
log.Error(4, "issuesIndexer.Index: %v", err)
}
}
}
}

// indexUID a unique identifier for an issue used in full-text indices
func (issue *Issue) indexUID() string {
return strconv.FormatInt(issue.ID, 36)
}

func (issue *Issue) issueData() *issueIndexerData {
return &issueIndexerData{
ID: issue.ID,
RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
}
}

// UpdateIssueIndexer add/update an issue to the issue indexer
func UpdateIssueIndexer(issue *Issue) {
go func() {
issueIndexerUpdateQueue <- issue
}()
}
Loading