From 7167ca1e6a190fcacde5aef3effc6b580ad72192 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 15:43:20 +0400 Subject: [PATCH 1/7] Adios kiwix::getArchiveId() --- src/book.cpp | 2 +- src/tools/archiveTools.cpp | 4 ---- src/tools/archiveTools.h | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/src/book.cpp b/src/book.cpp index 22b9e6931..cf340c04a 100644 --- a/src/book.cpp +++ b/src/book.cpp @@ -66,7 +66,7 @@ bool Book::update(const kiwix::Book& other) void Book::update(const zim::Archive& archive) { m_path = archive.getFilename(); m_pathValid = true; - m_id = getArchiveId(archive); + m_id = std::string(archive.getUuid()); m_title = getArchiveTitle(archive); m_description = getMetaDescription(archive); m_language = getMetaLanguage(archive); diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 5971ec495..6fceeb97a 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -93,10 +93,6 @@ std::string getMetaFlavour(const zim::Archive& archive) { return getMetadata(archive, "Flavour"); } -std::string getArchiveId(const zim::Archive& archive) { - return (std::string) archive.getUuid(); -} - bool getArchiveFavicon(const zim::Archive& archive, unsigned size, std::string& content, std::string& mimeType){ try { diff --git a/src/tools/archiveTools.h b/src/tools/archiveTools.h index 0e2108045..5b159df34 100644 --- a/src/tools/archiveTools.h +++ b/src/tools/archiveTools.h @@ -40,7 +40,6 @@ namespace kiwix std::string getMetaCreator(const zim::Archive& archive); std::string getMetaPublisher(const zim::Archive& archive); std::string getMetaFlavour(const zim::Archive& archive); - std::string getArchiveId(const zim::Archive& archive); bool getArchiveFavicon(const zim::Archive& archive, unsigned size, std::string& content, std::string& mimeType); From 9d2cc354474cb09de923cee929f11b8de87904a7 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 16:36:26 +0400 Subject: [PATCH 2/7] Extracted InternalServer::handle_search_request() --- src/server/internalServer.cpp | 15 ++++++++++----- src/server/internalServer.h | 1 + 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index dd65d7e17..8d253fac0 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -813,6 +813,16 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } try { + return handle_search_request(request); + } catch (const Error& e) { + return HTTP400Response(*this, request) + + invalidUrlMsg + + e.message(); + } +} + +std::unique_ptr InternalServer::handle_search_request(const RequestContext& request) +{ auto searchInfo = getSearchInfo(request); auto bookIds = searchInfo.getBookIds(); @@ -888,11 +898,6 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } */ return std::move(response); - } catch (const Error& e) { - return HTTP400Response(*this, request) - + invalidUrlMsg - + e.message(); - } } std::unique_ptr InternalServer::handle_random(const RequestContext& request) diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 02360443a..c3990c44d 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -134,6 +134,7 @@ class InternalServer { std::unique_ptr handle_catalog_v2_languages(const RequestContext& request); std::unique_ptr handle_catalog_v2_illustration(const RequestContext& request); std::unique_ptr handle_search(const RequestContext& request); + std::unique_ptr handle_search_request(const RequestContext& request); std::unique_ptr handle_suggest(const RequestContext& request); std::unique_ptr handle_random(const RequestContext& request); std::unique_ptr handle_catch(const RequestContext& request); From 414d7ae4fe58652d96fef503d69baef740c45d26 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 16:37:06 +0400 Subject: [PATCH 3/7] Fixed indentation --- src/server/internalServer.cpp | 128 +++++++++++++++++----------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 8d253fac0..0e3137a8d 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -823,81 +823,81 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re std::unique_ptr InternalServer::handle_search_request(const RequestContext& request) { - auto searchInfo = getSearchInfo(request); - auto bookIds = searchInfo.getBookIds(); + auto searchInfo = getSearchInfo(request); + auto bookIds = searchInfo.getBookIds(); - /* Make the search */ - // Try to get a search from the searchInfo, else build it - auto searcher = mp_library->getSearcherByIds(bookIds); - auto lock(searcher->getLock()); + /* Make the search */ + // Try to get a search from the searchInfo, else build it + auto searcher = mp_library->getSearcherByIds(bookIds); + auto lock(searcher->getLock()); - std::shared_ptr search; - try { - search = searchCache.getOrPut(searchInfo, - [=](){ - return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); - } - ); - } catch(std::runtime_error& e) { - // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. - // (in case of zim file not containing a index) - const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); - HTTPErrorResponse response(*this, request, MHD_HTTP_NOT_FOUND, - "fulltext-search-unavailable", - "404-page-heading", - cssUrl); - response += nonParameterizedMessage("no-search-results"); - // XXX: Now this has to be handled by the iframe-based viewer which - // XXX: has to resolve if the book selection resulted in a single book. - /* - if(bookIds.size() == 1) { - auto bookId = *bookIds.begin(); - auto bookName = mp_nameMapper->getNameForId(bookId); - response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); + std::shared_ptr search; + try { + search = searchCache.getOrPut(searchInfo, + [=](){ + return make_shared(searcher->search(searchInfo.getZimQuery(m_verbose.load()))); } - */ - return response; - } - - auto start = 1; - try { - start = request.get_argument("start"); - } catch (const std::exception&) {} - start = max(1, start); - - auto pageLength = 25; - try { - pageLength = request.get_argument("pageLength"); - } catch (const std::exception&) {} - if (pageLength > MAX_SEARCH_LEN) { - pageLength = MAX_SEARCH_LEN; - } - if (pageLength == 0) { - pageLength = 25; - } - - /* Get the results */ - SearchRenderer renderer(search->getResults(start-1, pageLength), mp_nameMapper, mp_library, start, - search->getEstimatedMatches()); - renderer.setSearchPattern(searchInfo.pattern); - renderer.setSearchBookQuery(searchInfo.bookFilterQuery); - renderer.setProtocolPrefix(m_root + "/content/"); - renderer.setSearchProtocolPrefix(m_root + "/search"); - renderer.setPageLength(pageLength); - if (request.get_requested_format() == "xml") { - return ContentResponse::build(*this, renderer.getXml(), "application/rss+xml; charset=utf-8"); - } - auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); + ); + } catch(std::runtime_error& e) { + // Searcher->search will throw a runtime error if there is no valid xapian database to do the search. + // (in case of zim file not containing a index) + const auto cssUrl = renderUrl(m_root, RESOURCE::templates::url_of_search_results_css); + HTTPErrorResponse response(*this, request, MHD_HTTP_NOT_FOUND, + "fulltext-search-unavailable", + "404-page-heading", + cssUrl); + response += nonParameterizedMessage("no-search-results"); // XXX: Now this has to be handled by the iframe-based viewer which // XXX: has to resolve if the book selection resulted in a single book. /* if(bookIds.size() == 1) { auto bookId = *bookIds.begin(); auto bookName = mp_nameMapper->getNameForId(bookId); - response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); + response += TaskbarInfo(bookName, mp_library->getArchiveById(bookId).get()); } */ - return std::move(response); + return response; + } + + auto start = 1; + try { + start = request.get_argument("start"); + } catch (const std::exception&) {} + start = max(1, start); + + auto pageLength = 25; + try { + pageLength = request.get_argument("pageLength"); + } catch (const std::exception&) {} + if (pageLength > MAX_SEARCH_LEN) { + pageLength = MAX_SEARCH_LEN; + } + if (pageLength == 0) { + pageLength = 25; + } + + /* Get the results */ + SearchRenderer renderer(search->getResults(start-1, pageLength), mp_nameMapper, mp_library, start, + search->getEstimatedMatches()); + renderer.setSearchPattern(searchInfo.pattern); + renderer.setSearchBookQuery(searchInfo.bookFilterQuery); + renderer.setProtocolPrefix(m_root + "/content/"); + renderer.setSearchProtocolPrefix(m_root + "/search"); + renderer.setPageLength(pageLength); + if (request.get_requested_format() == "xml") { + return ContentResponse::build(*this, renderer.getXml(), "application/rss+xml; charset=utf-8"); + } + auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); + // XXX: Now this has to be handled by the iframe-based viewer which + // XXX: has to resolve if the book selection resulted in a single book. + /* + if(bookIds.size() == 1) { + auto bookId = *bookIds.begin(); + auto bookName = mp_nameMapper->getNameForId(bookId); + response->set_taskbar(bookName, mp_library->getArchiveById(bookId).get()); + } + */ + return std::move(response); } std::unique_ptr InternalServer::handle_random(const RequestContext& request) From cd62b5dd915c5f481784d47ad80e0b2cff25e7b9 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 16:54:27 +0400 Subject: [PATCH 4/7] Some clean-up --- src/server/internalServer.cpp | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 0e3137a8d..f593b6b20 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -77,7 +77,6 @@ extern "C" { #include "request_context.h" #include "response.h" -#define MAX_SEARCH_LEN 140 #define DEFAULT_CACHE_SIZE 2 namespace kiwix { @@ -821,6 +820,22 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } } +namespace +{ + +unsigned getSearchPageSize(const RequestContext& r) +{ + const auto DEFAULT_PAGE_LEN = 25u; + const auto MAX_PAGE_LEN = 140u; + + const auto pageLength = r.get_optional_param("pageLength", DEFAULT_PAGE_LEN); + return pageLength == 0 + ? DEFAULT_PAGE_LEN + : min(MAX_PAGE_LEN, pageLength); +} + +} // unnamed namespace + std::unique_ptr InternalServer::handle_search_request(const RequestContext& request) { auto searchInfo = getSearchInfo(request); @@ -859,22 +874,8 @@ std::unique_ptr InternalServer::handle_search_request(const RequestCon return response; } - auto start = 1; - try { - start = request.get_argument("start"); - } catch (const std::exception&) {} - start = max(1, start); - - auto pageLength = 25; - try { - pageLength = request.get_argument("pageLength"); - } catch (const std::exception&) {} - if (pageLength > MAX_SEARCH_LEN) { - pageLength = MAX_SEARCH_LEN; - } - if (pageLength == 0) { - pageLength = 25; - } + const auto start = max(1u, request.get_optional_param("start", 1u)); + const auto pageLength = getSearchPageSize(request); /* Get the results */ SearchRenderer renderer(search->getResults(start-1, pageLength), mp_nameMapper, mp_library, start, From 9409e8bd9126d8fcbf9a9e55251438f8f5ef1251 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 18:24:27 +0400 Subject: [PATCH 5/7] Preventing confusion of tongues in multizim search Multizim search requires that all selected books be in the same language. No new URL query parameter was introduced for specifying the intended search language - `books.filter.lang` can be used for that purpose. The server_search unit-test was updated to use a slightly cheating library xml file where the language of example.zim was tweaked from "en" to "eng" in order to match that of zimfile.zim. Note that this change drops from the tested server two other goofy ZIM files corner_cases.zim and poor.zim that have been/are included in ServerTest. --- src/server/internalServer.cpp | 14 ++++++++++++ static/i18n/en.json | 1 + test/data/lib_for_server_search_test.xml | 4 ++++ test/meson.build | 1 + test/server_search.cpp | 27 +++++++++++++++++++----- 5 files changed, 42 insertions(+), 5 deletions(-) create mode 100644 test/data/lib_for_server_search_test.xml diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index f593b6b20..378f516fd 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -211,6 +211,16 @@ void checkBookNumber(const Library::BookIdSet& bookIds, size_t limit) { } } +typedef std::set Languages; + +Languages getLanguages(const Library& lib, const Library::BookIdSet& bookIds) { + Languages langs; + for ( const auto& b : bookIds ) { + langs.insert(lib.getBookById(b).getLanguage()); + } + return langs; +} + struct CustomizedResourceData { std::string mimeType; @@ -306,6 +316,10 @@ SearchInfo InternalServer::getSearchInfo(const RequestContext& request) const { auto bookIds = selectBooks(request); checkBookNumber(bookIds.second, m_multizimSearchLimit); + if ( getLanguages(*mp_library, bookIds.second).size() != 1 ) { + throw Error(nonParameterizedMessage("confusion-of-tongues")); + } + auto pattern = request.get_optional_param("pattern", ""); GeoQuery geoQuery; diff --git a/static/i18n/en.json b/static/i18n/en.json index 89c83c915..bf8441fc1 100644 --- a/static/i18n/en.json +++ b/static/i18n/en.json @@ -27,4 +27,5 @@ , "home-button-text": "Go to the main page of '{{BOOK_TITLE}}'" , "random-page-button-text": "Go to a randomly selected page" , "searchbox-tooltip": "Search '{{BOOK_TITLE}}'" + , "confusion-of-tongues": "Two or more books in different languages would participate in search, which may lead to confusing results." } diff --git a/test/data/lib_for_server_search_test.xml b/test/data/lib_for_server_search_test.xml new file mode 100644 index 000000000..994c6a6fa --- /dev/null +++ b/test/data/lib_for_server_search_test.xml @@ -0,0 +1,4 @@ + + + + diff --git a/test/meson.build b/test/meson.build index b91f8cd0f..90b7ce892 100644 --- a/test/meson.build +++ b/test/meson.build @@ -37,6 +37,7 @@ if gtest_dep.found() and not meson.is_cross_build() 'corner_cases.zim', 'poor.zim', 'library.xml', + 'lib_for_server_search_test.xml', 'customized_resources.txt', 'helloworld.txt', 'welcome.html', diff --git a/test/server_search.cpp b/test/server_search.cpp index 4f301f3e3..ca6bc1651 100644 --- a/test/server_search.cpp +++ b/test/server_search.cpp @@ -6,6 +6,17 @@ #define SERVER_PORT 8101 #include "server_testing_tools.h" + +class ServerSearchTest : public ServerTest +{ + void SetUp() override { + zfs1_.reset(new ZimFileServer(SERVER_PORT, + ZimFileServer::DEFAULT_OPTIONS, + "./test/lib_for_server_search_test.xml") + ); + } +}; + std::string makeSearchResultsHtml(const std::string& pattern, const std::string& header, const std::string& results, @@ -555,7 +566,7 @@ const std::vector LARGE_SEARCH_RESULTS = { // // In order to be able to share the same expected output data // LARGE_SEARCH_RESULTS between multiple build platforms and test-points -// of the ServerTest.searchResults test-case +// of the ServerSearchTest.searchResults test-case // // 1. Snippets are excluded from the plain-text comparison of actual and // expected HTML strings. This is done with the help of the @@ -916,7 +927,7 @@ struct TestData } }; -TEST_F(ServerTest, searchResults) +TEST_F(ServerSearchTest, searchResults) { const TestData testData[] = { { @@ -1340,14 +1351,12 @@ TEST_F(ServerTest, searchResults) /* pagination */ {} }, - // Only RayCharles is in English. - // [TODO] We should extend our test data to have another zim file in english returning results. { /* query */ "pattern=travel" "&books.filter.lang=eng", /* start */ 0, /* resultsPerPage */ 10, - /* totalResultCount */ 1, + /* totalResultCount */ 2, /* firstResultIndex */ 1, /* results */ { SEARCH_RESULT( @@ -1357,6 +1366,14 @@ TEST_F(ServerTest, searchResults) /*bookTitle*/ "Ray Charles", /*wordCount*/ "204" ), + + SEARCH_RESULT( + /*link*/ "/ROOT/content/example/Wikibooks.html", + /*title*/ "Wikibooks", + /*snippet*/ R"SNIPPET(...Travel guide Wikidata Knowledge database Commons Media repository Meta Coordination MediaWiki MediaWiki software Phabricator MediaWiki bug tracker Wikimedia Labs MediaWiki development The Wikimedia Foundation is a non-profit organization that depends on your voluntarism and donations to operate. If you find Wikibooks or other projects hosted by the Wikimedia Foundation useful, please volunteer or make a donation. Your donations primarily helps to purchase server equipment, launch new projects......)SNIPPET", + /*bookTitle*/ "Wikibooks", + /*wordCount*/ "538" + ) }, /* pagination */ {} }, From cb02dbd92a00df0a3a73394f04ee3d490a62ad33 Mon Sep 17 00:00:00 2001 From: Veloman Yunkan Date: Fri, 21 Oct 2022 19:49:43 +0400 Subject: [PATCH 6/7] RequestContext preserves the exact query string Before this change RequestContext::get_query() returned a reordered query string (alphabetically sorted by the parameter names). This fix facilitiates testing of responses where the request URL appears in the response. --- src/server/request_context.cpp | 8 ++++++++ src/server/request_context.h | 5 ++--- test/library_server.cpp | 8 ++++---- test/server.cpp | 10 +++++----- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/server/request_context.cpp b/src/server/request_context.cpp index 0b9a1a539..5e191afa7 100644 --- a/src/server/request_context.cpp +++ b/src/server/request_context.cpp @@ -107,6 +107,14 @@ MHD_Result RequestContext::fill_argument(void *__this, enum MHD_ValueKind kind, { RequestContext *_this = static_cast(__this); _this->arguments[key].push_back(value == nullptr ? "" : value); + if ( ! _this->queryString.empty() ) { + _this->queryString += "&"; + } + _this->queryString += key; + if ( value ) { + _this->queryString += "="; + _this->queryString += value; + } return MHD_YES; } diff --git a/src/server/request_context.h b/src/server/request_context.h index f63f89810..de02d465f 100644 --- a/src/server/request_context.h +++ b/src/server/request_context.h @@ -92,9 +92,7 @@ class RequestContext { std::string get_url_part(int part) const; std::string get_full_url() const; - std::string get_query(bool mustEncode = false) const { - return get_query([](const std::string& key) {return true;}, mustEncode); - } + std::string get_query() const { return queryString; } template std::string get_query(F filter, bool mustEncode) const { @@ -132,6 +130,7 @@ class RequestContext { ByteRange byteRange_; std::map headers; std::map> arguments; + std::string queryString; private: // functions static MHD_Result fill_header(void *, enum MHD_ValueKind, const char*, const char*); diff --git a/test/library_server.cpp b/test/library_server.cpp index fda5916b4..4afcbc10b 100644 --- a/test/library_server.cpp +++ b/test/library_server.cpp @@ -359,7 +359,7 @@ TEST_F(LibraryServerTest, catalog_search_results_pagination) EXPECT_EQ(maskVariableOPDSFeedData(r->body), OPDS_FEED_TAG " 12345678-90ab-cdef-1234-567890abcdef\n" - " Filtered zims (count=1&start=1)\n" + " Filtered zims (start=1&count=1)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 1\n" @@ -375,7 +375,7 @@ TEST_F(LibraryServerTest, catalog_search_results_pagination) EXPECT_EQ(maskVariableOPDSFeedData(r->body), OPDS_FEED_TAG " 12345678-90ab-cdef-1234-567890abcdef\n" - " Filtered zims (count=10&start=100)\n" + " Filtered zims (start=100&count=10)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 100\n" @@ -638,8 +638,8 @@ TEST_F(LibraryServerTest, catalog_v2_entries_filtered_by_range) const auto r = zfs1_->GET("/ROOT/catalog/v2/entries?start=1&count=1"); EXPECT_EQ(r->status, 200); EXPECT_EQ(maskVariableOPDSFeedData(r->body), - CATALOG_V2_ENTRIES_PREAMBLE("?count=1&start=1") - " Filtered Entries (count=1&start=1)\n" + CATALOG_V2_ENTRIES_PREAMBLE("?start=1&count=1") + " Filtered Entries (start=1&count=1)\n" " YYYY-MM-DDThh:mm:ssZ\n" " 3\n" " 1\n" diff --git a/test/server.cpp b/test/server.cpp index c774d0f1a..76541d0b3 100644 --- a/test/server.cpp +++ b/test/server.cpp @@ -839,7 +839,7 @@ TEST_F(ServerTest, Http400HtmlError) expected_body==R"(

Invalid request

- The requested URL "/ROOT/search?books.filter.lang=eng&pattern=" is not a valid request. + The requested URL "/ROOT/search?books.filter.lang=eng&pattern" is not a valid request.

No query provided. @@ -896,21 +896,21 @@ TEST_F(ServerTest, HttpXmlError) /* HTTP status code */ 400, /* expected response XML */ R"( Invalid request -The requested URL "/ROOT/search?content=zimfile&format=xml" is not a valid request. +The requested URL "/ROOT/search?format=xml&content=zimfile" is not a valid request. No query provided. )" }, { /* url */ "/ROOT/search?format=xml&content=non-existing-book&pattern=asdfqwerty", /* HTTP status code */ 400, /* expected response XML */ R"( Invalid request -The requested URL "/ROOT/search?content=non-existing-book&format=xml&pattern=asdfqwerty" is not a valid request. +The requested URL "/ROOT/search?format=xml&content=non-existing-book&pattern=asdfqwerty" is not a valid request. No such book: non-existing-book )" }, { /* url */ "/ROOT/search?format=xml&content=non-existing-book&pattern=a\"