Skip to content

Commit

Permalink
Better sorting of search results (zealdocs#613, zealdocs#100)
Browse files Browse the repository at this point in the history
Uses an O(m+n) algorithm based on https://github.com/bevacqua/fuzzysearch
- should be faster than the one initially proposed in PR zealdocs#281.
  • Loading branch information
jkozera committed Oct 9, 2016
1 parent 188a247 commit f001b4b
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 10 deletions.
174 changes: 165 additions & 9 deletions src/libs/registry/docset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@
#include <QRegularExpression>
#include <QVariant>

#include <string>

#include <sqlite3.h>
#include <string.h>

static void scoreFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
);

using namespace Zeal::Registry;

namespace {
Expand Down Expand Up @@ -121,7 +132,9 @@ Docset::Docset(const QString &path) :
return;
}

m_type = m_db->tables().contains(QStringLiteral("searchIndex")) ? Type::Dash : Type::ZDash;
sqlite3 *handle = m_database->handle();
sqlite3_create_function(handle, "zealScore", 2, SQLITE_UTF8, 0, scoreFunc, 0, 0);
m_type = m_database->tables().contains(QStringLiteral("searchIndex")) ? Type::Dash : Type::ZDash;

createIndex();

Expand Down Expand Up @@ -252,21 +265,21 @@ QList<SearchResult> Docset::search(const QString &query, const CancellationToken

QString queryStr;
if (m_type == Docset::Type::Dash) {
queryStr = QStringLiteral("SELECT name, type, path "
queryStr = QStringLiteral("SELECT name, type, path, zealScore('%1', name) as score "
" FROM searchIndex "
"WHERE (name LIKE '%%1%' ESCAPE '\\') "
"ORDER BY name COLLATE NOCASE").arg(sanitizedQuery);
"WHERE score > 0 "
"ORDER BY score DESC").arg(sanitizedQuery);
} else {
queryStr = QStringLiteral("SELECT ztokenname, ztypename, zpath, zanchor "
queryStr = QStringLiteral("SELECT ztokenname, ztypename, zpath, zanchor, zealScore('%1', ztokenname) as score "
" FROM ztoken "
"LEFT JOIN ztokenmetainformation "
" ON ztoken.zmetainformation = ztokenmetainformation.z_pk "
"LEFT JOIN zfilepath "
" ON ztokenmetainformation.zfile = zfilepath.z_pk "
"LEFT JOIN ztokentype "
" ON ztoken.ztokentype = ztokentype.z_pk "
"WHERE (ztokenname LIKE '%%1%' ESCAPE '\\') "
"ORDER BY ztokenname COLLATE NOCASE").arg(sanitizedQuery);
"WHERE score > 0 "
"ORDER BY score DESC").arg(sanitizedQuery);
}

// Limit for very short queries.
Expand All @@ -281,7 +294,8 @@ QList<SearchResult> Docset::search(const QString &query, const CancellationToken
results.append({m_db->value(0).toString(),
parseSymbolType(m_db->value(1).toString()),
const_cast<Docset *>(this),
createPageUrl(m_db->value(2).toString(), m_db->value(3).toString())});
createPageUrl(m_db->value(2).toString(), m_db->value(3).toString()),
m_database->value(4).toInt()});
}

return results;
Expand Down Expand Up @@ -320,7 +334,8 @@ QList<SearchResult> Docset::relatedLinks(const QUrl &url) const
results.append({m_db->value(0).toString(),
parseSymbolType(m_db->value(1).toString()),
const_cast<Docset *>(this),
createPageUrl(m_db->value(2).toString(), m_db->value(3).toString())});
createPageUrl(m_db->value(2).toString(), m_db->value(3).toString()),
0});
}

if (results.size() == 1)
Expand Down Expand Up @@ -607,3 +622,144 @@ QString Docset::parseSymbolType(const QString &str)

return aliases.value(str, str);
}

// ported from DevDocs' searcher.coffee:
// (https://github.com/Thibaut/devdocs/blob/50f583246d5fbd92be7b71a50bfa56cf4e239c14/assets/javascripts/app/searcher.coffee#L91)
static void match_fuzzy(
const char* needle, const char* haystack,
int* start, int* len, int* needle_len
) {
int i = 0, j = 0;
for (; needle[i] != 0; ++i) {
while(haystack[j] != 0) {
if (needle[i] == haystack[j++]) {
if (*start == -1) *start = j - 1; // first matched char
*len = j - *start;
goto outer;
}
}
*start = -1; // end of haystack, char not found
return;
outer: continue;
}
if (needle_len)
*needle_len = i;
}

static int score_exact(int match_index, int match_len, const char* value) {
int score = 100, value_len = strlen((const char*)value);
// Remove one point for each unmatched character.
score -= (value_len - match_len);
if (match_index > 0) {
if (value[match_index - 1] == '.') {
// If the character preceding the query is a dot, assign the same
// score as if the query was found at the beginning of the string,
// minus one.
score += (match_index - 1);
} else if (match_len == 1) {
// Don't match a single-character query unless it's found at the
// beginning of the string or is preceded by a dot.
return 0;
} else {
// (1) Remove one point for each unmatched character up to
// the nearest preceding dot or the beginning of the
// string.
// (2) Remove one point for each unmatched character
// following the query.
int i = match_index - 2;
while (i >= 0 && value[i] != '.') --i;
score -= (match_index - i) + // (1)
(value_len - match_len - match_index); // (2)
}
// Remove one point for each dot preceding the query, except for the
// one immediately before the query.
int separators = 0,
i = match_index - 2;
while (i >= 0) {
if (value[i] == '.') {
separators += 1;
}
i--;
}
score -= separators;
}

// Remove five points for each dot following the query.
int separators = 0;
int i = value_len - match_len - match_index - 1;
while (i >= 0){
if (value[match_index + match_len + i] == '.') {
separators += 1;
}
i--;
}
score -= separators * 5;

return qMax(1, score);
}

static int score_fuzzy(int match_index, int match_len, const char* value) {
if (match_index == 0 || value[match_index-1] == '.') {
return qMax(66, 100 - match_len);
} else {
if (value[match_index + match_len - 1] == 0) {
return qMax(33, 67 - match_len);
} else {
return qMax(1, 34 - match_len);
}
}
}

static void replace_all(std::string str, const std::string& from, const std::string& to) {
size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
str.replace(start_pos, from.length(), to);
start_pos += to.length();
}
}

static void scoreFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
) {
const char* needle = (const char*)sqlite3_value_text(argv[0]);
const char* haystack_with_separators = (const char*)sqlite3_value_text(argv[1]);
std::string haystack_str(haystack_with_separators);
replace_all(haystack_str, "::", "."); // C++
replace_all(haystack_str, "/", "."); // Go
replace_all(haystack_str, "_", ".");
const char* haystack = haystack_str.c_str();
int match1 = -1, match1_len, needle_len;
match_fuzzy(needle, haystack, &match1, &match1_len, &needle_len);
if (match1 == -1) {
sqlite3_result_int(context, 0);
return;
}

if (needle_len == match1_len) { // exact match
sqlite3_result_int(context, score_exact(
match1, match1_len, haystack
));
return;
}

int best = score_fuzzy(match1, match1_len, haystack);
int last_index_of_dot = -1, i;
for (i = 0; haystack[i] != 0; ++i) {
if (haystack[i] == '.') last_index_of_dot = i;
}
if (last_index_of_dot != -1) {
int match2 = -1, match2_len;
match_fuzzy(
needle, haystack + last_index_of_dot + 1, &match2, &match2_len,
0
);
if (match2 != -1) {
best = qMax(best,
score_fuzzy(match2, match2_len, haystack + last_index_of_dot + 1)
);
}
}
sqlite3_result_int(context, best);
}
31 changes: 31 additions & 0 deletions src/libs/registry/qsqlitelite.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#ifndef QSQLITELITE_H
#define QSQLITELITE_H

#include <QString>
#include <QStringList>

#include <sqlite3.h>

class QSQLiteLite
{
public:
explicit QSQLiteLite(const QString &path);
~QSQLiteLite();
bool isOpen();
QString lastError();
QStringList tables();
bool execute(const QString &queryStr);
bool next();
void finalize();
QString stringValue(int index);
sqlite3_int64 intValue(int index);
sqlite3* handle();

private:
void updateLastError();
QString m_lastError;
sqlite3* m_db = nullptr;
sqlite3_stmt *m_stmt = nullptr;
};

#endif // QSQLITELITE_H
5 changes: 4 additions & 1 deletion src/libs/registry/searchresult.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,12 @@ struct SearchResult

QUrl url;

int score;

inline bool operator<(const SearchResult &other) const
{
return QString::compare(name, other.name, Qt::CaseInsensitive) < 0;
if (score == other.score) return QString::compare(name, other.name, Qt::CaseInsensitive) < 0;
return score > other.score;
}
};

Expand Down
5 changes: 5 additions & 0 deletions src/libs/util/sqlitedatabase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,8 @@ void SQLiteDatabase::updateLastError()
return;
m_lastError = QString(reinterpret_cast<const QChar *>(sqlite3_errmsg16(m_db)));
}

sqlite3 *SQLiteDriver::handle()
{
return m_db;
}
1 change: 1 addition & 0 deletions src/libs/util/sqlitedatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class SQLiteDatabase
bool execute(const QString &queryStr);
bool next();
QVariant value(int index) const;
sqlite3* handle();

private:
void close();
Expand Down

0 comments on commit f001b4b

Please sign in to comment.