diff --git a/src/libs/registry/docset.cpp b/src/libs/registry/docset.cpp index 281e4ee25..7def1cca0 100644 --- a/src/libs/registry/docset.cpp +++ b/src/libs/registry/docset.cpp @@ -36,6 +36,17 @@ #include #include +#include + +#include +#include + +static void scoreFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +); + using namespace Zeal::Registry; namespace { @@ -121,7 +132,9 @@ Docset::Docset(const QString &path) : return; } - m_type = m_db->tables().contains(QStringLiteral("searchIndex")) ? Type::Dash : Type::ZDash; + sqlite3 *handle = m_database->handle(); + sqlite3_create_function(handle, "zealScore", 2, SQLITE_UTF8, 0, scoreFunc, 0, 0); + m_type = m_database->tables().contains(QStringLiteral("searchIndex")) ? Type::Dash : Type::ZDash; createIndex(); @@ -252,12 +265,12 @@ QList Docset::search(const QString &query, const CancellationToken QString queryStr; if (m_type == Docset::Type::Dash) { - queryStr = QStringLiteral("SELECT name, type, path " + queryStr = QStringLiteral("SELECT name, type, path, zealScore('%1', name) as score " " FROM searchIndex " - "WHERE (name LIKE '%%1%' ESCAPE '\\') " - "ORDER BY name COLLATE NOCASE").arg(sanitizedQuery); + "WHERE score > 0 " + "ORDER BY score DESC").arg(sanitizedQuery); } else { - queryStr = QStringLiteral("SELECT ztokenname, ztypename, zpath, zanchor " + queryStr = QStringLiteral("SELECT ztokenname, ztypename, zpath, zanchor, zealScore('%1', ztokenname) as score " " FROM ztoken " "LEFT JOIN ztokenmetainformation " " ON ztoken.zmetainformation = ztokenmetainformation.z_pk " @@ -265,8 +278,8 @@ QList Docset::search(const QString &query, const CancellationToken " ON ztokenmetainformation.zfile = zfilepath.z_pk " "LEFT JOIN ztokentype " " ON ztoken.ztokentype = ztokentype.z_pk " - "WHERE (ztokenname LIKE '%%1%' ESCAPE '\\') " - "ORDER BY ztokenname COLLATE NOCASE").arg(sanitizedQuery); + "WHERE score > 0 " + "ORDER BY score DESC").arg(sanitizedQuery); } // Limit for very short queries. @@ -281,7 +294,8 @@ QList Docset::search(const QString &query, const CancellationToken results.append({m_db->value(0).toString(), parseSymbolType(m_db->value(1).toString()), const_cast(this), - createPageUrl(m_db->value(2).toString(), m_db->value(3).toString())}); + createPageUrl(m_db->value(2).toString(), m_db->value(3).toString()), + m_database->value(4).toInt()}); } return results; @@ -320,7 +334,8 @@ QList Docset::relatedLinks(const QUrl &url) const results.append({m_db->value(0).toString(), parseSymbolType(m_db->value(1).toString()), const_cast(this), - createPageUrl(m_db->value(2).toString(), m_db->value(3).toString())}); + createPageUrl(m_db->value(2).toString(), m_db->value(3).toString()), + 0}); } if (results.size() == 1) @@ -607,3 +622,144 @@ QString Docset::parseSymbolType(const QString &str) return aliases.value(str, str); } + +// ported from DevDocs' searcher.coffee: +// (https://github.com/Thibaut/devdocs/blob/50f583246d5fbd92be7b71a50bfa56cf4e239c14/assets/javascripts/app/searcher.coffee#L91) +static void match_fuzzy( + const char* needle, const char* haystack, + int* start, int* len, int* needle_len +) { + int i = 0, j = 0; + for (; needle[i] != 0; ++i) { + while(haystack[j] != 0) { + if (needle[i] == haystack[j++]) { + if (*start == -1) *start = j - 1; // first matched char + *len = j - *start; + goto outer; + } + } + *start = -1; // end of haystack, char not found + return; + outer: continue; + } + if (needle_len) + *needle_len = i; +} + +static int score_exact(int match_index, int match_len, const char* value) { + int score = 100, value_len = strlen((const char*)value); + // Remove one point for each unmatched character. + score -= (value_len - match_len); + if (match_index > 0) { + if (value[match_index - 1] == '.') { + // If the character preceding the query is a dot, assign the same + // score as if the query was found at the beginning of the string, + // minus one. + score += (match_index - 1); + } else if (match_len == 1) { + // Don't match a single-character query unless it's found at the + // beginning of the string or is preceded by a dot. + return 0; + } else { + // (1) Remove one point for each unmatched character up to + // the nearest preceding dot or the beginning of the + // string. + // (2) Remove one point for each unmatched character + // following the query. + int i = match_index - 2; + while (i >= 0 && value[i] != '.') --i; + score -= (match_index - i) + // (1) + (value_len - match_len - match_index); // (2) + } + // Remove one point for each dot preceding the query, except for the + // one immediately before the query. + int separators = 0, + i = match_index - 2; + while (i >= 0) { + if (value[i] == '.') { + separators += 1; + } + i--; + } + score -= separators; + } + + // Remove five points for each dot following the query. + int separators = 0; + int i = value_len - match_len - match_index - 1; + while (i >= 0){ + if (value[match_index + match_len + i] == '.') { + separators += 1; + } + i--; + } + score -= separators * 5; + + return qMax(1, score); +} + +static int score_fuzzy(int match_index, int match_len, const char* value) { + if (match_index == 0 || value[match_index-1] == '.') { + return qMax(66, 100 - match_len); + } else { + if (value[match_index + match_len - 1] == 0) { + return qMax(33, 67 - match_len); + } else { + return qMax(1, 34 - match_len); + } + } +} + +static void replace_all(std::string str, const std::string& from, const std::string& to) { + size_t start_pos = 0; + while((start_pos = str.find(from, start_pos)) != std::string::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); + } +} + +static void scoreFunc( + sqlite3_context *context, + int argc, + sqlite3_value **argv +) { + const char* needle = (const char*)sqlite3_value_text(argv[0]); + const char* haystack_with_separators = (const char*)sqlite3_value_text(argv[1]); + std::string haystack_str(haystack_with_separators); + replace_all(haystack_str, "::", "."); // C++ + replace_all(haystack_str, "/", "."); // Go + replace_all(haystack_str, "_", "."); + const char* haystack = haystack_str.c_str(); + int match1 = -1, match1_len, needle_len; + match_fuzzy(needle, haystack, &match1, &match1_len, &needle_len); + if (match1 == -1) { + sqlite3_result_int(context, 0); + return; + } + + if (needle_len == match1_len) { // exact match + sqlite3_result_int(context, score_exact( + match1, match1_len, haystack + )); + return; + } + + int best = score_fuzzy(match1, match1_len, haystack); + int last_index_of_dot = -1, i; + for (i = 0; haystack[i] != 0; ++i) { + if (haystack[i] == '.') last_index_of_dot = i; + } + if (last_index_of_dot != -1) { + int match2 = -1, match2_len; + match_fuzzy( + needle, haystack + last_index_of_dot + 1, &match2, &match2_len, + 0 + ); + if (match2 != -1) { + best = qMax(best, + score_fuzzy(match2, match2_len, haystack + last_index_of_dot + 1) + ); + } + } + sqlite3_result_int(context, best); +} diff --git a/src/libs/registry/qsqlitelite.h b/src/libs/registry/qsqlitelite.h new file mode 100644 index 000000000..a17e22df4 --- /dev/null +++ b/src/libs/registry/qsqlitelite.h @@ -0,0 +1,31 @@ +#ifndef QSQLITELITE_H +#define QSQLITELITE_H + +#include +#include + +#include + +class QSQLiteLite +{ +public: + explicit QSQLiteLite(const QString &path); + ~QSQLiteLite(); + bool isOpen(); + QString lastError(); + QStringList tables(); + bool execute(const QString &queryStr); + bool next(); + void finalize(); + QString stringValue(int index); + sqlite3_int64 intValue(int index); + sqlite3* handle(); + +private: + void updateLastError(); + QString m_lastError; + sqlite3* m_db = nullptr; + sqlite3_stmt *m_stmt = nullptr; +}; + +#endif // QSQLITELITE_H diff --git a/src/libs/registry/searchresult.h b/src/libs/registry/searchresult.h index 5ece8073b..899ac2cb6 100644 --- a/src/libs/registry/searchresult.h +++ b/src/libs/registry/searchresult.h @@ -41,9 +41,12 @@ struct SearchResult QUrl url; + int score; + inline bool operator<(const SearchResult &other) const { - return QString::compare(name, other.name, Qt::CaseInsensitive) < 0; + if (score == other.score) return QString::compare(name, other.name, Qt::CaseInsensitive) < 0; + return score > other.score; } }; diff --git a/src/libs/util/sqlitedatabase.cpp b/src/libs/util/sqlitedatabase.cpp index 895a8ff10..70d633357 100644 --- a/src/libs/util/sqlitedatabase.cpp +++ b/src/libs/util/sqlitedatabase.cpp @@ -158,3 +158,8 @@ void SQLiteDatabase::updateLastError() return; m_lastError = QString(reinterpret_cast(sqlite3_errmsg16(m_db))); } + +sqlite3 *SQLiteDriver::handle() +{ + return m_db; +} diff --git a/src/libs/util/sqlitedatabase.h b/src/libs/util/sqlitedatabase.h index 87fb05492..3cbd8d0da 100644 --- a/src/libs/util/sqlitedatabase.h +++ b/src/libs/util/sqlitedatabase.h @@ -43,6 +43,7 @@ class SQLiteDatabase bool execute(const QString &queryStr); bool next(); QVariant value(int index) const; + sqlite3* handle(); private: void close();