Skip to content

Commit

Permalink
Merge pull request #843 from kiwix/backslash_handling_in_suggestions
Browse files Browse the repository at this point in the history
Backslash handling in suggestions
  • Loading branch information
kelson42 authored Nov 17, 2022
2 parents 6285599 + d66cc62 commit 3568ccd
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 41 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v2
- name: Setup python 3.10
uses: actions/setup-python@v2
- name: Setup python 3.9
uses: actions/setup-python@v1
with:
python-version: '3.10'
python-version: '3.9'
- name: Install packages
run: |
brew update
brew install gcovr pkg-config ninja
brew install gcovr pkg-config ninja || brew link --overwrite python
- name: Install python modules
run: pip3 install meson==0.49.2 pytest
- name: Install deps
Expand Down
41 changes: 4 additions & 37 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,6 @@ std::string renderUrl(const std::string& root, const std::string& urlTemplate)
return url;
}

std::string makeFulltextSearchSuggestion(const std::string& lang, const std::string& queryString)
{
return i18n::expandParameterizedString(lang, "suggest-full-text-search",
{
{"SEARCH_TERMS", queryString}
}
);
}

ParameterizedMessage noSuchBookErrorMsg(const std::string& bookName)
{
return ParameterizedMessage("no-such-book", { {"BOOK_NAME", bookName} });
Expand Down Expand Up @@ -700,9 +691,7 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
printf("Searching suggestions for: \"%s\"\n", queryString.c_str());
}

MustacheData results{MustacheData::type::list};

bool first = true;
Suggestions results;

/* Get the suggestions */
auto searcher = suggestionSearcherCache.getOrPut(bookId,
Expand All @@ -713,38 +702,16 @@ std::unique_ptr<Response> InternalServer::handle_suggest(const RequestContext& r
auto srs = search.getResults(start, count);

for(auto& suggestion: srs) {
MustacheData result;
result.set("label", suggestion.getTitle());

if (suggestion.hasSnippet()) {
result.set("label", suggestion.getSnippet());
}

result.set("value", suggestion.getTitle());
result.set("kind", "path");
result.set("path", suggestion.getPath());
result.set("first", first);
first = false;
results.push_back(result);
results.add(suggestion);
}


/* Propose the fulltext search if possible */
if (archive->hasFulltextIndex()) {
MustacheData result;
const auto lang = request.get_user_language();
result.set("label", makeFulltextSearchSuggestion(lang, queryString));
result.set("value", queryString + " ");
result.set("kind", "pattern");
result.set("first", first);
results.push_back(result);
results.addFTSearchSuggestion(request.get_user_language(), queryString);
}

auto data = get_default_data();
data.set("suggestions", results);

auto response = ContentResponse::build(*this, RESOURCE::templates::suggestion_json, data, "application/json; charset=utf-8");
return std::move(response);
return ContentResponse::build(*this, results.getJSON(), "application/json; charset=utf-8");
}

std::unique_ptr<Response> InternalServer::handle_viewer_settings(const RequestContext& request)
Expand Down
72 changes: 72 additions & 0 deletions src/tools/otherTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,15 @@
#endif

#include "tools/stringTools.h"
#include "server/i18n.h"
#include "libkiwix-resources.h"

#include <map>
#include <sstream>
#include <pugixml.hpp>

#include <zim/uuid.h>
#include <zim/suggestion_iterator.h>


static std::map<std::string, std::string> codeisomapping {
Expand Down Expand Up @@ -326,3 +329,72 @@ std::string kiwix::render_template(const std::string& template_str, kainjow::mus
tmpl.render(data, [&ss](const std::string& str) { ss << str; });
return ss.str();
}

namespace
{

std::string escapeBackslashes(const std::string& s)
{
std::string es;
es.reserve(s.size());
for (char c : s) {
if ( c == '\\' ) {
es.push_back('\\');
}
es.push_back(c);
}
return es;
}

std::string makeFulltextSearchSuggestion(const std::string& lang,
const std::string& queryString)
{
return kiwix::i18n::expandParameterizedString(lang, "suggest-full-text-search",
{
{"SEARCH_TERMS", queryString}
}
);
}

} // unnamed namespace

kiwix::Suggestions::Suggestions()
: m_data(kainjow::mustache::data::type::list)
{
}

void kiwix::Suggestions::add(const zim::SuggestionItem& suggestion)
{
kainjow::mustache::data result;

const std::string label = suggestion.hasSnippet()
? suggestion.getSnippet()
: suggestion.getTitle();

result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(suggestion.getTitle()));
result.set("kind", "path");
result.set("path", escapeBackslashes(suggestion.getPath()));
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}

void kiwix::Suggestions::addFTSearchSuggestion(const std::string& uiLang,
const std::string& queryString)
{
kainjow::mustache::data result;
const std::string label = makeFulltextSearchSuggestion(uiLang, queryString);
result.set("label", escapeBackslashes(label));
result.set("value", escapeBackslashes(queryString + " "));
result.set("kind", "pattern");
result.set("first", m_data.is_empty_list());
m_data.push_back(result);
}

std::string kiwix::Suggestions::getJSON() const
{
kainjow::mustache::data data;
data.set("suggestions", m_data);

return render_template(RESOURCE::templates::suggestion_json, data);
}
20 changes: 20 additions & 0 deletions src/tools/otherTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ namespace pugi {
class xml_node;
}

namespace zim {
class SuggestionItem;
}

namespace kiwix
{
std::string nodeToString(const pugi::xml_node& node);
Expand Down Expand Up @@ -67,6 +71,22 @@ namespace kiwix

return defaultValue;
}

class Suggestions
{
public:
Suggestions();

void add(const zim::SuggestionItem& suggestion);

void addFTSearchSuggestion(const std::string& uiLang,
const std::string& query);

std::string getJSON() const;

private:
kainjow::mustache::data m_data;
};
}

#endif
1 change: 1 addition & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ tests = [
'tagParsing',
'stringTools',
'pathTools',
'otherTools',
'kiwixserve',
'book',
'manager',
Expand Down
174 changes: 174 additions & 0 deletions test/otherTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* Copyright (C) 2022 Veloman Yunkan
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "gtest/gtest.h"
#include "../src/tools/otherTools.h"
#include "zim/suggestion_iterator.h"

#include <regex>

namespace
{

// Output generated via mustache templates sometimes contains end-of-line
// whitespace. This complicates representing the expected output of a unit-test
// as C++ raw strings in editors that are configured to delete EOL whitespace.
// A workaround is to put special markers (//EOLWHITESPACEMARKER) at the end
// of such lines in the expected output string and remove them at runtime.
// This is exactly what this function is for.
std::string removeEOLWhitespaceMarkers(const std::string& s)
{
const std::regex pattern("//EOLWHITESPACEMARKER");
return std::regex_replace(s, pattern, "");
}

} // unnamed namespace

#define CHECK_SUGGESTIONS(actual, expected) \
EXPECT_EQ(actual, removeEOLWhitespaceMarkers(expected))

TEST(Suggestions, basicTest)
{
kiwix::Suggestions s;
CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
//EOLWHITESPACEMARKER
]
)EXPECTEDJSON"
);

s.add(zim::SuggestionItem("Title", "/PATH", "Snippet"));

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title",
"label" : "Snippet",
"kind" : "path"
, "path" : "/PATH"
}
]
)EXPECTEDJSON"
);

s.add(zim::SuggestionItem("Title Without Snippet", "/P/a/t/h"));
s.addFTSearchSuggestion("en", "kiwi");

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title",
"label" : "Snippet",
"kind" : "path"
, "path" : "/PATH"
},
{
"value" : "Title Without Snippet",
"label" : "Title Without Snippet",
"kind" : "path"
, "path" : "/P/a/t/h"
},
{
"value" : "kiwi ",
"label" : "containing &apos;kiwi&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}

TEST(Suggestions, specialCharHandling)
{
// HTML special symbols (<, >, &, ", and ') must be HTML-escaped
// Backslash symbols (\) must be duplicated.
const std::string SYMBOLS(R"(\<>&'"~!@#$%^*()_+`-=[]{}|:;,.?)");
{
kiwix::Suggestions s;
s.add(zim::SuggestionItem("Title with " + SYMBOLS,
"Path with " + SYMBOLS,
"Snippet with " + SYMBOLS));

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippet with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path"
, "path" : "Path with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
}
]
)EXPECTEDJSON"
);
}

{
kiwix::Suggestions s;
s.add(zim::SuggestionItem("Snippetless title with " + SYMBOLS,
"Path with " + SYMBOLS));

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "Snippetless title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"label" : "Snippetless title with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?",
"kind" : "path"
, "path" : "Path with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?"
}
]
)EXPECTEDJSON"
);
}

{
kiwix::Suggestions s;
s.addFTSearchSuggestion("eng", "text with " + SYMBOLS);

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "text with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.? ",
"label" : "containing &apos;text with \\&lt;&gt;&amp;&apos;&quot;~!@#$%^*()_+`-=[]{}|:;,.?&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}
}

TEST(Suggestions, fulltextSearchSuggestionIsTranslated)
{
kiwix::Suggestions s;
s.addFTSearchSuggestion("it", "kiwi");

CHECK_SUGGESTIONS(s.getJSON(),
R"EXPECTEDJSON([
{
"value" : "kiwi ",
"label" : "contenente &apos;kiwi&apos;...",
"kind" : "pattern"
//EOLWHITESPACEMARKER
}
]
)EXPECTEDJSON"
);
}

0 comments on commit 3568ccd

Please sign in to comment.