From 940368b8ace64258cc2942999749a0b184a29a71 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sat, 22 May 2021 21:17:45 +0530 Subject: [PATCH 1/9] Add m_archives and getArchiveById to Library These members will mirror the functionality offered by equivalent usage of Reader class. --- include/library.h | 3 +++ include/reader.h | 9 ++++++++- src/library.cpp | 29 +++++++++++++++++++++++++++-- src/reader.cpp | 5 +++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/include/library.h b/include/library.h index 95d22e4bd..c5e026e2f 100644 --- a/include/library.h +++ b/include/library.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "book.h" #include "bookmark.h" @@ -146,6 +147,7 @@ class Library { std::map m_books; std::map> m_readers; + std::map> m_archives; std::vector m_bookmarks; class BookDB; std::unique_ptr m_bookDB; @@ -198,6 +200,7 @@ class Library const Book& getBookByPath(const std::string& path) const; Book& getBookByPath(const std::string& path); std::shared_ptr getReaderById(const std::string& id); + std::shared_ptr getArchiveById(const std::string& id); /** * Remove a book from the library. diff --git a/include/reader.h b/include/reader.h index 4001a5ff7..d969ea284 100644 --- a/include/reader.h +++ b/include/reader.h @@ -91,6 +91,13 @@ class Reader * (.zim extesion). */ explicit Reader(const string zimFilePath); + + /** + * Create a Reader to read a zim file given by the Archive. + * + * @param archive The shared pointer to the Archive object. + */ + explicit Reader(const std::shared_ptr archive); #ifndef _WIN32 explicit Reader(int fd); Reader(int fd, zim::offset_type offset, zim::size_type size); @@ -488,7 +495,7 @@ class Reader zim::Archive* getZimArchive() const; protected: - std::unique_ptr zimArchive; + std::shared_ptr zimArchive; std::string zimFilePath; SuggestionsList_t suggestions; diff --git a/src/library.cpp b/src/library.cpp index 1147282df..c2e324ebc 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -108,6 +108,7 @@ bool Library::removeBookById(const std::string& id) { m_bookDB->delete_document("Q" + id); m_readers.erase(id); + m_archives.erase(id); return m_books.erase(id) == 1; } @@ -146,11 +147,35 @@ std::shared_ptr Library::getReaderById(const std::string& id) return m_readers.at(id); } catch (std::out_of_range& e) {} + try { + auto reader = make_shared(m_archives.at(id)); + m_readers[id] = reader; + return reader; + } catch (std::out_of_range& e) {} + + auto book = getBookById(id); + if (!book.isPathValid()) + return nullptr; + + auto archive = make_shared(book.getPath()); + m_archives[id] = archive; + auto reader = make_shared(archive); + m_readers[id] = reader; + return reader; +} + +std::shared_ptr Library::getArchiveById(const std::string& id) +{ + try { + return m_archives.at(id); + } catch (std::out_of_range& e) {} + auto book = getBookById(id); if (!book.isPathValid()) return nullptr; - auto sptr = make_shared(book.getPath()); - m_readers[id] = sptr; + + auto sptr = make_shared(book.getPath()); + m_archives[id] = sptr; return sptr; } diff --git a/src/reader.cpp b/src/reader.cpp index c493ca72d..4a2d84234 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -86,6 +86,11 @@ Reader::Reader(const string zimFilePath) srand(time(nullptr)); } +Reader::Reader(const std::shared_ptr archive) + : zimArchive(archive), + zimFilePath(archive->getFilename()) + {} + #ifndef _WIN32 Reader::Reader(int fd) : zimArchive(new zim::Archive(fd)), From 7d68926539b73325d363fb78936e00b1ebb4cf85 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sat, 22 May 2021 22:40:20 +0530 Subject: [PATCH 2/9] Drop usage of Reader from InternalServer::handle_meta This is essentially a code move of meta handlers from using Reader functions to directly using Archive. --- format_code.sh | 2 + include/meson.build | 1 + include/tools/archiveTools.h | 45 +++++++++++++++ src/meson.build | 1 + src/reader.cpp | 47 ++++------------ src/server/internalServer.cpp | 99 ++++++++++++++++++++++++++++---- src/tools/archiveTools.cpp | 103 ++++++++++++++++++++++++++++++++++ 7 files changed, 249 insertions(+), 49 deletions(-) create mode 100644 include/tools/archiveTools.h create mode 100644 src/tools/archiveTools.cpp diff --git a/format_code.sh b/format_code.sh index b844eec8c..9e61dacb0 100755 --- a/format_code.sh +++ b/format_code.sh @@ -7,6 +7,7 @@ files=( "include/common/otherTools.h" "include/common/regexTools.h" "include/common/networkTools.h" +"include/common/archiveTools.h" "include/manager.h" "include/reader.h" "include/kiwix.h" @@ -22,6 +23,7 @@ files=( "src/common/pathTools.cpp" "src/common/regexTools.cpp" "src/common/otherTools.cpp" +"src/common/archiveTools.cpp" "src/common/networkTools.cpp" "src/common/stringTools.cpp" "src/xapianSearcher.cpp" diff --git a/include/meson.build b/include/meson.build index 4157970e4..6c0e46cc6 100644 --- a/include/meson.build +++ b/include/meson.build @@ -25,6 +25,7 @@ install_headers( 'tools/pathTools.h', 'tools/regexTools.h', 'tools/stringTools.h', + 'tools/archiveTools.h', subdir:'kiwix/tools' ) diff --git a/include/tools/archiveTools.h b/include/tools/archiveTools.h new file mode 100644 index 000000000..08c36de16 --- /dev/null +++ b/include/tools/archiveTools.h @@ -0,0 +1,45 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef KIWIX_ARCHIVETOOLS_H +#define KIWIX_ARCHIVETOOLS_H + +#include + +/** + * This file contains all the functions that would make handling data related to + * an archive easier. + **/ + +namespace kiwix +{ + std::string getMetadata(const zim::Archive* const archive, const std::string& name); + std::string getArchiveTitle(const zim::Archive* const archive); + std::string getMetaDescription(const zim::Archive* const archive); + std::string getMetaTags(const zim::Archive* const archive, bool original = false); + bool getArchiveFavicon(const zim::Archive* const archive, + std::string& content, std::string& mimeType); + std::string getMetaLanguage(const zim::Archive* const archive); + std::string getMetaName(const zim::Archive* const archive); + std::string getMetaDate(const zim::Archive* const archive); + std::string getMetaCreator(const zim::Archive* const archive); + std::string getMetaPublisher(const zim::Archive* const archive); +} + +#endif diff --git a/src/meson.build b/src/meson.build index 7d6dab9c1..43f863e07 100644 --- a/src/meson.build +++ b/src/meson.build @@ -19,6 +19,7 @@ kiwix_sources = [ 'tools/stringTools.cpp', 'tools/networkTools.cpp', 'tools/otherTools.cpp', + 'tools/archiveTools.cpp', 'kiwixserve.cpp', 'name_mapper.cpp', 'server/byte_range.cpp', diff --git a/src/reader.cpp b/src/reader.cpp index 4a2d84234..972d416ad 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -25,6 +25,7 @@ #include #include "tools/otherTools.h" +#include "tools/archiveTools.h" inline char hi(char v) { @@ -188,14 +189,7 @@ Entry Reader::getMainPage() const bool Reader::getFavicon(string& content, string& mimeType) const { - try { - auto item = zimArchive->getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; + return kiwix::getArchiveFavicon(zimArchive.get(), content, mimeType); } string Reader::getZimFilePath() const @@ -217,47 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const string Reader::getName() const { - METADATA("Name") + return kiwix::getMetaName(zimArchive.get()); } string Reader::getTitle() const { - string value = zimArchive->getMetadata("Title"); - if (value.empty()) { - value = getLastPathElement(zimFilePath); - std::replace(value.begin(), value.end(), '_', ' '); - size_t pos = value.find(".zim"); - value = value.substr(0, pos); - } - return value; + return kiwix::getArchiveTitle(zimArchive.get()); } string Reader::getCreator() const { - METADATA("Creator") + return kiwix::getMetaCreator(zimArchive.get()); } string Reader::getPublisher() const { - METADATA("Publisher") + return kiwix::getMetaPublisher(zimArchive.get()); } string Reader::getDate() const { - METADATA("Date") + return kiwix::getMetaDate(zimArchive.get()); } string Reader::getDescription() const { - string value; - this->getMetadata("Description", value); - - /* Mediawiki Collection tends to use the "Subtitle" name */ - if (value.empty()) { - this->getMetadata("Subtitle", value); - } - - return value; + return kiwix::getMetaDescription(zimArchive.get()); } string Reader::getLongDescription() const @@ -267,7 +246,7 @@ string Reader::getLongDescription() const string Reader::getLanguage() const { - METADATA("Language") + return kiwix::getMetaLanguage(zimArchive.get()); } string Reader::getLicense() const @@ -277,13 +256,7 @@ string Reader::getLicense() const string Reader::getTags(bool original) const { - string tags_str; - getMetadata("Tags", tags_str); - if (original) { - return tags_str; - } - auto tags = convertTags(tags_str); - return join(tags, ";"); + return kiwix::getMetaTags(zimArchive.get(), original); } diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 78d538f6e..61a9bad17 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -47,6 +47,7 @@ extern "C" { #include "tools/pathTools.h" #include "tools/regexTools.h" #include "tools/stringTools.h" +#include "tools/archiveTools.h" #include "library.h" #include "name_mapper.h" #include "entry.h" @@ -55,6 +56,7 @@ extern "C" { #include "opds_dumper.h" #include +#include #include @@ -323,22 +325,95 @@ std::unique_ptr InternalServer::build_homepage(const RequestContext& r return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true); } +/** + * Archive and Zim handlers begin + **/ + +std::vector getTitleVariants(const std::string& title) +{ + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} + +// TODO: retrieve searcher from caching mechanism +SuggestionsList_t getSuggestions(const zim::Archive* const archive, + const std::string& queryString, int suggestionCount) +{ + SuggestionsList_t suggestions; + if (archive->hasTitleIndex()) { + auto searcher = zim::Searcher(*archive); + zim::Query suggestionQuery; + suggestionQuery.setQuery(queryString, true); + auto suggestionSearch = searcher.search(suggestionQuery); + auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); + + for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { + SuggestionItem suggestion(it.getTitle(), it.getPath(), + kiwix::normalize(it.getTitle()), it.getSnippet()); + suggestions.push_back(suggestion); + } + } else { + // TODO: This case should be handled by libzim + std::vector variants = getTitleVariants(queryString); + int currCount = 0; + for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { + for (auto& entry: archive->findByTitle(*it)) { + SuggestionItem suggestion(entry.getTitle(), entry.getPath(), + kiwix::normalize(entry.getTitle())); + suggestions.push_back(suggestion); + currCount++; + } + } + } + return suggestions; +} + +zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) +{ + int loopCounter = 42; + auto final_entry = entry; + while (final_entry.isRedirect() && loopCounter--) { + final_entry = final_entry.getRedirectEntry(); + } + // Prevent infinite loops. + if (final_entry.isRedirect()) { + throw zim::EntryNotFound("Unable to resolve entry redirects."); + } + return final_entry; +} + +zim::Entry getEntryFromPath(const zim::Archive* const archive, const std::string& path) +{ + if (path.empty() || path == "/") { + return archive->getMainEntry(); + } + return archive->getEntryByPath(path); +} + +/** + * Archive and Zim handlers end + **/ + std::unique_ptr InternalServer::handle_meta(const RequestContext& request) { std::string bookName; std::string bookId; std::string meta_name; - std::shared_ptr reader; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); meta_name = request.get_argument("name"); - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { return Response::build_404(*this, request, bookName, ""); } - if (reader == nullptr) { + if (archive == nullptr) { return Response::build_404(*this, request, bookName, ""); } @@ -346,23 +421,23 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ std::string mimeType = "text"; if (meta_name == "title") { - content = reader->getTitle(); + content = getArchiveTitle(archive.get()); } else if (meta_name == "description") { - content = reader->getDescription(); + content = getMetaDescription(archive.get()); } else if (meta_name == "language") { - content = reader->getLanguage(); + content = getMetaLanguage(archive.get()); } else if (meta_name == "name") { - content = reader->getName(); + content = getMetaName(archive.get()); } else if (meta_name == "tags") { - content = reader->getTags(); + content = getMetaTags(archive.get()); } else if (meta_name == "date") { - content = reader->getDate(); + content = getMetaDate(archive.get()); } else if (meta_name == "creator") { - content = reader->getCreator(); + content = getMetaCreator(archive.get()); } else if (meta_name == "publisher") { - content = reader->getPublisher(); + content = getMetaPublisher(archive.get()); } else if (meta_name == "favicon") { - reader->getFavicon(content, mimeType); + getArchiveFavicon(archive.get(), content, mimeType); } else { return Response::build_404(*this, request, bookName, ""); } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp new file mode 100644 index 000000000..930bc8916 --- /dev/null +++ b/src/tools/archiveTools.cpp @@ -0,0 +1,103 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include +#include + +namespace kiwix +{ +std::string getMetadata(const zim::Archive* const archive, const std::string& name) { + try { + return archive->getMetadata(name); + } catch (zim::EntryNotFound& e) { + return ""; + } +} + +std::string getArchiveTitle(const zim::Archive* const archive) { + std::string value = getMetadata(archive, "Title"); + if (value.empty()) { + value = getLastPathElement(archive->getFilename()); + std::replace(value.begin(), value.end(), '_', ' '); + size_t pos = value.find(".zim"); + value = value.substr(0, pos); + } + return value; +} + +std::string getMetaDescription(const zim::Archive* const archive) { + std::string value; + value = getMetadata(archive, "Description"); + + /* Mediawiki Collection tends to use the "Subtitle" name */ + if (value.empty()) { + value = getMetadata(archive, "Subtitle"); + } + + return value; +} + +std::string getMetaTags(const zim::Archive* const archive, bool original) { + std::string tags_str = getMetadata(archive, "Tags"); + if (original) { + return tags_str; + } + auto tags = convertTags(tags_str); + return join(tags, ";"); +} + +bool getArchiveFavicon(const zim::Archive* const archive, + std::string& content, std::string& mimeType){ + try { + auto entry = archive->getFaviconEntry(); + auto item = entry.getItem(true); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +std::string getMetaLanguage(const zim::Archive* const archive) { + return getMetadata(archive, "Language"); +} + +std::string getMetaName(const zim::Archive* const archive) { + return getMetadata(archive, "Name"); +} + +std::string getMetaDate(const zim::Archive* const archive) { + return getMetadata(archive, "Date"); +} + +std::string getMetaCreator(const zim::Archive* const archive) { + return getMetadata(archive, "Creator"); +} + +std::string getMetaPublisher(const zim::Archive* const archive) { + return getMetadata(archive, "Publisher"); +} + +} // kiwix From a236751c74d04f0024932209a1070ee922785246 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sun, 23 May 2021 11:44:39 +0530 Subject: [PATCH 3/9] Drop usage of Reader from InternalServer::handle_suggest --- include/reader.h | 4 +- include/tools/stringTools.h | 2 + src/reader.cpp | 7 +- src/server/internalServer.cpp | 117 ++++++++++++++++++++++------------ src/tools/stringTools.cpp | 8 +++ 5 files changed, 90 insertions(+), 48 deletions(-) diff --git a/include/reader.h b/include/reader.h index d969ea284..d288338b2 100644 --- a/include/reader.h +++ b/include/reader.h @@ -41,11 +41,11 @@ namespace kiwix * The SuggestionItem is a helper class that contains the info about a single * suggestion item. */ - class SuggestionItem { // Functions - private: + // Temporarily making the constructor public until the code move is complete + public: // Create a sugggestion item. explicit SuggestionItem(std::string title, std::string normalizedTitle, std::string path, std::string snippet = "") : diff --git a/include/tools/stringTools.h b/include/tools/stringTools.h index 8c9a8bc9d..6793c7f20 100644 --- a/include/tools/stringTools.h +++ b/include/tools/stringTools.h @@ -70,5 +70,7 @@ T extractFromString(const std::string& str) { } bool startsWith(const std::string& base, const std::string& start); + +std::vector getTitleVariants(const std::string& title); } //namespace kiwix #endif diff --git a/src/reader.cpp b/src/reader.cpp index 972d416ad..c2eaa2e06 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -438,12 +438,7 @@ bool Reader::searchSuggestions(const string& prefix, std::vector Reader::getTitleVariants( const std::string& title) const { - std::vector variants; - variants.push_back(title); - variants.push_back(kiwix::ucFirst(title)); - variants.push_back(kiwix::lcFirst(title)); - variants.push_back(kiwix::toTitle(title)); - return variants; + return kiwix::getTitleVariants(title); } diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 61a9bad17..c41433a30 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -57,6 +57,7 @@ extern "C" { #include #include +#include #include @@ -329,16 +330,6 @@ std::unique_ptr InternalServer::build_homepage(const RequestContext& r * Archive and Zim handlers begin **/ -std::vector getTitleVariants(const std::string& title) -{ - std::vector variants; - variants.push_back(title); - variants.push_back(kiwix::ucFirst(title)); - variants.push_back(kiwix::lcFirst(title)); - variants.push_back(kiwix::toTitle(title)); - return variants; -} - // TODO: retrieve searcher from caching mechanism SuggestionsList_t getSuggestions(const zim::Archive* const archive, const std::string& queryString, int suggestionCount) @@ -352,8 +343,8 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { - SuggestionItem suggestion(it.getTitle(), it.getPath(), - kiwix::normalize(it.getTitle()), it.getSnippet()); + SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()), + it.getPath(), it.getSnippet()); suggestions.push_back(suggestion); } } else { @@ -362,8 +353,8 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, int currCount = 0; for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { for (auto& entry: archive->findByTitle(*it)) { - SuggestionItem suggestion(entry.getTitle(), entry.getPath(), - kiwix::normalize(entry.getTitle())); + SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()), + entry.getPath()); suggestions.push_back(suggestion); currCount++; } @@ -394,6 +385,49 @@ zim::Entry getEntryFromPath(const zim::Archive* const archive, const std::string return archive->getEntryByPath(path); } +std::vector getTitleVariants(const std::string& title) +{ + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} + +// TODO: retrieve searcher from caching mechanism +SuggestionsList_t getSuggestions(const zim::Archive* const archive, + const std::string& queryString, int suggestionCount) +{ + SuggestionsList_t suggestions; + if (archive->hasTitleIndex()) { + auto searcher = zim::Searcher(*archive); + zim::Query suggestionQuery; + suggestionQuery.setQuery(queryString, true); + auto suggestionSearch = searcher.search(suggestionQuery); + auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); + + for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { + SuggestionItem suggestion(it.getTitle(), it.getPath(), + kiwix::normalize(it.getTitle()), it.getSnippet()); + suggestions.push_back(suggestion); + } + } else { + // TODO: This case should be handled by libzim + std::vector variants = getTitleVariants(queryString); + int currCount = 0; + for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { + for (auto& entry: archive->findByTitle(*it)) { + SuggestionItem suggestion(entry.getTitle(), entry.getPath(), + kiwix::normalize(entry.getTitle())); + suggestions.push_back(suggestion); + currCount++; + } + } + } + return suggestions; +} + /** * Archive and Zim handlers end **/ @@ -460,51 +494,54 @@ std::unique_ptr InternalServer::handle_suggest(const RequestContext& r std::string bookName; std::string bookId; - std::string term; - std::shared_ptr reader; + std::string queryString; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); - term = request.get_argument("term"); - reader = mp_library->getReaderById(bookId); + queryString = request.get_argument("term"); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { return Response::build_404(*this, request, bookName, ""); } + if (archive == nullptr) { + return Response::build_404(*this, request, bookName, ""); + } + if (m_verbose.load()) { - printf("Searching suggestions for: \"%s\"\n", term.c_str()); + printf("Searching suggestions for: \"%s\"\n", queryString.c_str()); } MustacheData results{MustacheData::type::list}; bool first = true; - if (reader != nullptr) { - /* Get the suggestions */ - SuggestionsList_t suggestions; - reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions); - for(auto& suggestion:suggestions) { - MustacheData result; - result.set("label", suggestion.getTitle()); - - if (suggestion.hasSnippet()) { - result.set("label", suggestion.getSnippet()); - } - result.set("value", suggestion.getTitle()); - result.set("kind", "path"); - result.set("path", suggestion.getPath()); - result.set("first", first); - first = false; - results.push_back(result); - suggestionCount++; + /* Get the suggestions */ + SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, maxSuggestionCount); + for(auto& suggestion:suggestions) { + MustacheData result; + result.set("label", suggestion.getTitle()); + + if (suggestion.hasSnippet()) { + result.set("label", suggestion.getSnippet()); } + + result.set("value", suggestion.getTitle()); + result.set("kind", "path"); + result.set("path", suggestion.getPath()); + result.set("first", first); + first = false; + results.push_back(result); + suggestionCount++; } + /* Propose the fulltext search if possible */ - if (reader->hasFulltextIndex()) { + if (archive->hasFulltextIndex()) { MustacheData result; - result.set("label", "containing '" + term + "'..."); - result.set("value", term + " "); + result.set("label", "containing '" + queryString + "'..."); + result.set("value", queryString + " "); result.set("kind", "pattern"); result.set("first", first); results.push_back(result); diff --git a/src/tools/stringTools.cpp b/src/tools/stringTools.cpp index a0eac7ba2..55efc07f0 100644 --- a/src/tools/stringTools.cpp +++ b/src/tools/stringTools.cpp @@ -395,3 +395,11 @@ bool kiwix::startsWith(const std::string& base, const std::string& start) && std::equal(start.begin(), start.end(), base.begin()); } +std::vector kiwix::getTitleVariants(const std::string& title) { + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} From 75b4d311d7526508c6d50924c6cac947700107d7 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Sun, 23 May 2021 11:54:58 +0530 Subject: [PATCH 4/9] Drop Reader from InternalServer::handle_random --- include/tools/archiveTools.h | 1 + src/server/internalServer.cpp | 42 ++++++++++++++++++----------------- src/tools/archiveTools.cpp | 5 +++++ 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/include/tools/archiveTools.h b/include/tools/archiveTools.h index 08c36de16..55b105f7b 100644 --- a/include/tools/archiveTools.h +++ b/include/tools/archiveTools.h @@ -40,6 +40,7 @@ namespace kiwix std::string getMetaDate(const zim::Archive* const archive); std::string getMetaCreator(const zim::Archive* const archive); std::string getMetaPublisher(const zim::Archive* const archive); + zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry); } #endif diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index c41433a30..98ac18785 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -58,6 +58,8 @@ extern "C" { #include #include #include +#include +#include #include @@ -363,20 +365,6 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, return suggestions; } -zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) -{ - int loopCounter = 42; - auto final_entry = entry; - while (final_entry.isRedirect() && loopCounter--) { - final_entry = final_entry.getRedirectEntry(); - } - // Prevent infinite loops. - if (final_entry.isRedirect()) { - throw zim::EntryNotFound("Unable to resolve entry redirects."); - } - return final_entry; -} - zim::Entry getEntryFromPath(const zim::Archive* const archive, const std::string& path) { if (path.empty() || path == "/") { @@ -428,6 +416,20 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, return suggestions; } +zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) +{ + int loopCounter = 42; + auto final_entry = entry; + while (final_entry.isRedirect() && loopCounter--) { + final_entry = final_entry.getRedirectEntry(); + } + // Prevent infinite loops. + if (final_entry.isRedirect()) { + throw zim::EntryNotFound("Unable to resolve entry redirects."); + } + return final_entry; +} + /** * Archive and Zim handlers end **/ @@ -683,23 +685,23 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re std::string bookName; std::string bookId; - std::shared_ptr reader; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { return Response::build_404(*this, request, bookName, ""); } - if (reader == nullptr) { + if (archive == nullptr) { return Response::build_404(*this, request, bookName, ""); } try { - auto entry = reader->getRandomPage(); - return build_redirect(bookName, entry.getFinalEntry()); - } catch(kiwix::NoEntry& e) { + auto entry = archive->getRandomEntry(); + return build_redirect(bookName, getFinalEntry(archive.get(), entry)); + } catch(zim::EntryNotFound& e) { return Response::build_404(*this, request, bookName, ""); } } diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 930bc8916..1287dd1bc 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -100,4 +100,9 @@ std::string getMetaPublisher(const zim::Archive* const archive) { return getMetadata(archive, "Publisher"); } +zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) +{ + return archive->getEntryByPath(entry.getItem(true).getPath()); +} + } // kiwix From c046f64d83742f085d68c5003196c01f7679fb46 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Mon, 24 May 2021 12:08:56 +0530 Subject: [PATCH 5/9] Drop Reader and Entry wrappers from handle_content --- include/tools/archiveTools.h | 23 +++---- src/reader.cpp | 24 +++---- src/server/internalServer.cpp | 116 ++++++++-------------------------- src/server/internalServer.h | 2 +- src/tools/archiveTools.cpp | 43 ++++++++----- 5 files changed, 78 insertions(+), 130 deletions(-) diff --git a/include/tools/archiveTools.h b/include/tools/archiveTools.h index 55b105f7b..1429457ba 100644 --- a/include/tools/archiveTools.h +++ b/include/tools/archiveTools.h @@ -29,18 +29,19 @@ namespace kiwix { - std::string getMetadata(const zim::Archive* const archive, const std::string& name); - std::string getArchiveTitle(const zim::Archive* const archive); - std::string getMetaDescription(const zim::Archive* const archive); - std::string getMetaTags(const zim::Archive* const archive, bool original = false); - bool getArchiveFavicon(const zim::Archive* const archive, + std::string getMetadata(const zim::Archive& archive, const std::string& name); + std::string getArchiveTitle(const zim::Archive& archive); + std::string getMetaDescription(const zim::Archive& archive); + std::string getMetaTags(const zim::Archive& archive, bool original = false); + bool getArchiveFavicon(const zim::Archive& archive, std::string& content, std::string& mimeType); - std::string getMetaLanguage(const zim::Archive* const archive); - std::string getMetaName(const zim::Archive* const archive); - std::string getMetaDate(const zim::Archive* const archive); - std::string getMetaCreator(const zim::Archive* const archive); - std::string getMetaPublisher(const zim::Archive* const archive); - zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry); + std::string getMetaLanguage(const zim::Archive& archive); + std::string getMetaName(const zim::Archive& archive); + std::string getMetaDate(const zim::Archive& archive); + std::string getMetaCreator(const zim::Archive& archive); + std::string getMetaPublisher(const zim::Archive& archive); + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); + zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); } #endif diff --git a/src/reader.cpp b/src/reader.cpp index c2eaa2e06..20a341731 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -189,7 +189,7 @@ Entry Reader::getMainPage() const bool Reader::getFavicon(string& content, string& mimeType) const { - return kiwix::getArchiveFavicon(zimArchive.get(), content, mimeType); + return kiwix::getArchiveFavicon(*zimArchive, content, mimeType); } string Reader::getZimFilePath() const @@ -211,32 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const string Reader::getName() const { - return kiwix::getMetaName(zimArchive.get()); + return kiwix::getMetaName(*zimArchive); } string Reader::getTitle() const { - return kiwix::getArchiveTitle(zimArchive.get()); + return kiwix::getArchiveTitle(*zimArchive); } string Reader::getCreator() const { - return kiwix::getMetaCreator(zimArchive.get()); + return kiwix::getMetaCreator(*zimArchive); } string Reader::getPublisher() const { - return kiwix::getMetaPublisher(zimArchive.get()); + return kiwix::getMetaPublisher(*zimArchive); } string Reader::getDate() const { - return kiwix::getMetaDate(zimArchive.get()); + return kiwix::getMetaDate(*zimArchive); } string Reader::getDescription() const { - return kiwix::getMetaDescription(zimArchive.get()); + return kiwix::getMetaDescription(*zimArchive); } string Reader::getLongDescription() const @@ -246,7 +246,7 @@ string Reader::getLongDescription() const string Reader::getLanguage() const { - return kiwix::getMetaLanguage(zimArchive.get()); + return kiwix::getMetaLanguage(*zimArchive); } string Reader::getLicense() const @@ -256,7 +256,7 @@ string Reader::getLicense() const string Reader::getTags(bool original) const { - return kiwix::getMetaTags(zimArchive.get(), original); + return kiwix::getMetaTags(*zimArchive, original); } @@ -320,12 +320,8 @@ string Reader::getOrigId() const Entry Reader::getEntryFromPath(const std::string& path) const { - if (path.empty() || path == "/") { - return getMainPage(); - } - try { - return zimArchive->getEntryByPath(path); + return kiwix::getEntryFromPath(*zimArchive, path); } catch (zim::EntryNotFound& e) { throw NoEntry(); } diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 98ac18785..8d99b3f8a 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -365,71 +365,6 @@ SuggestionsList_t getSuggestions(const zim::Archive* const archive, return suggestions; } -zim::Entry getEntryFromPath(const zim::Archive* const archive, const std::string& path) -{ - if (path.empty() || path == "/") { - return archive->getMainEntry(); - } - return archive->getEntryByPath(path); -} - -std::vector getTitleVariants(const std::string& title) -{ - std::vector variants; - variants.push_back(title); - variants.push_back(kiwix::ucFirst(title)); - variants.push_back(kiwix::lcFirst(title)); - variants.push_back(kiwix::toTitle(title)); - return variants; -} - -// TODO: retrieve searcher from caching mechanism -SuggestionsList_t getSuggestions(const zim::Archive* const archive, - const std::string& queryString, int suggestionCount) -{ - SuggestionsList_t suggestions; - if (archive->hasTitleIndex()) { - auto searcher = zim::Searcher(*archive); - zim::Query suggestionQuery; - suggestionQuery.setQuery(queryString, true); - auto suggestionSearch = searcher.search(suggestionQuery); - auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); - - for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { - SuggestionItem suggestion(it.getTitle(), it.getPath(), - kiwix::normalize(it.getTitle()), it.getSnippet()); - suggestions.push_back(suggestion); - } - } else { - // TODO: This case should be handled by libzim - std::vector variants = getTitleVariants(queryString); - int currCount = 0; - for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { - for (auto& entry: archive->findByTitle(*it)) { - SuggestionItem suggestion(entry.getTitle(), entry.getPath(), - kiwix::normalize(entry.getTitle())); - suggestions.push_back(suggestion); - currCount++; - } - } - } - return suggestions; -} - -zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) -{ - int loopCounter = 42; - auto final_entry = entry; - while (final_entry.isRedirect() && loopCounter--) { - final_entry = final_entry.getRedirectEntry(); - } - // Prevent infinite loops. - if (final_entry.isRedirect()) { - throw zim::EntryNotFound("Unable to resolve entry redirects."); - } - return final_entry; -} - /** * Archive and Zim handlers end **/ @@ -457,23 +392,23 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ std::string mimeType = "text"; if (meta_name == "title") { - content = getArchiveTitle(archive.get()); + content = getArchiveTitle(*archive); } else if (meta_name == "description") { - content = getMetaDescription(archive.get()); + content = getMetaDescription(*archive); } else if (meta_name == "language") { - content = getMetaLanguage(archive.get()); + content = getMetaLanguage(*archive); } else if (meta_name == "name") { - content = getMetaName(archive.get()); + content = getMetaName(*archive); } else if (meta_name == "tags") { - content = getMetaTags(archive.get()); + content = getMetaTags(*archive); } else if (meta_name == "date") { - content = getMetaDate(archive.get()); + content = getMetaDate(*archive); } else if (meta_name == "creator") { - content = getMetaCreator(archive.get()); + content = getMetaCreator(*archive); } else if (meta_name == "publisher") { - content = getMetaPublisher(archive.get()); + content = getMetaPublisher(*archive); } else if (meta_name == "favicon") { - getArchiveFavicon(archive.get(), content, mimeType); + getArchiveFavicon(*archive, content, mimeType); } else { return Response::build_404(*this, request, bookName, ""); } @@ -617,7 +552,7 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re auto data = get_default_data(); data.set("pattern", encodeDiples(patternString)); auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8"); - response->set_taskbar(bookName, reader ? reader->getTitle() : ""); + response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : ""); response->set_code(MHD_HTTP_NOT_FOUND); return std::move(response); } @@ -668,7 +603,7 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re renderer.setSearchProtocolPrefix(m_root + "/search?"); renderer.setPageLength(pageLength); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); - response->set_taskbar(bookName, reader ? reader->getTitle() : ""); + response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : ""); return std::move(response); } catch (const std::exception& e) { @@ -700,7 +635,7 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re try { auto entry = archive->getRandomEntry(); - return build_redirect(bookName, getFinalEntry(archive.get(), entry)); + return build_redirect(bookName, getFinalItem(*archive, entry)); } catch(zim::EntryNotFound& e) { return Response::build_404(*this, request, bookName, ""); } @@ -861,9 +796,9 @@ InternalServer::get_reader(const std::string& bookName) const } std::unique_ptr -InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const +InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const { - auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath()); + auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(item.getPath()); return Response::build_redirect(*this, redirectUrl); } @@ -879,8 +814,13 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r if (bookName.empty()) return build_homepage(request); - const std::shared_ptr reader = get_reader(bookName); - if (reader == nullptr) { + std::shared_ptr archive; + try { + const std::string bookId = mp_nameMapper->getIdForName(bookName); + archive = mp_library->getArchiveById(bookId); + } catch (const std::out_of_range& e) {} + + if (archive == nullptr) { std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); @@ -893,31 +833,31 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r } try { - auto entry = reader->getEntryFromPath(urlStr); + auto entry = getEntryFromPath(*archive, urlStr); if (entry.isRedirect() || urlStr.empty()) { // If urlStr is empty, we want to mainPage. // We must do a redirection to the real page. - return build_redirect(bookName, entry.getFinalEntry()); + return build_redirect(bookName, getFinalItem(*archive, entry)); } - auto response = ItemResponse::build(*this, request, entry.getZimEntry().getItem()); + auto response = ItemResponse::build(*this, request, entry.getItem()); try { - dynamic_cast(*response).set_taskbar(bookName, reader->getTitle()); + dynamic_cast(*response).set_taskbar(bookName, getArchiveTitle(*archive)); } catch (std::bad_cast& e) {} if (m_verbose.load()) { printf("Found %s\n", entry.getPath().c_str()); - printf("mimeType: %s\n", entry.getMimetype().c_str()); + printf("mimeType: %s\n", entry.getItem(true).getMimetype().c_str()); } return response; - } catch(kiwix::NoEntry& e) { + } catch(zim::EntryNotFound& e) { if (m_verbose.load()) printf("Failed to find %s\n", urlStr.c_str()); std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); - return Response::build_404(*this, request, bookName, reader->getTitle(), details); + return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details); } } diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 142d3fc37..c5a2cc159 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -69,7 +69,7 @@ class InternalServer { private: // functions std::unique_ptr handle_request(const RequestContext& request); - std::unique_ptr build_redirect(const std::string& bookName, const kiwix::Entry& entry) const; + std::unique_ptr build_redirect(const std::string& bookName, const zim::Item& item) const; std::unique_ptr build_homepage(const RequestContext& request); std::unique_ptr handle_skin(const RequestContext& request); std::unique_ptr handle_catalog(const RequestContext& request); diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp index 1287dd1bc..3608357ab 100644 --- a/src/tools/archiveTools.cpp +++ b/src/tools/archiveTools.cpp @@ -27,18 +27,18 @@ namespace kiwix { -std::string getMetadata(const zim::Archive* const archive, const std::string& name) { +std::string getMetadata(const zim::Archive& archive, const std::string& name) { try { - return archive->getMetadata(name); + return archive.getMetadata(name); } catch (zim::EntryNotFound& e) { return ""; } } -std::string getArchiveTitle(const zim::Archive* const archive) { +std::string getArchiveTitle(const zim::Archive& archive) { std::string value = getMetadata(archive, "Title"); if (value.empty()) { - value = getLastPathElement(archive->getFilename()); + value = getLastPathElement(archive.getFilename()); std::replace(value.begin(), value.end(), '_', ' '); size_t pos = value.find(".zim"); value = value.substr(0, pos); @@ -46,7 +46,7 @@ std::string getArchiveTitle(const zim::Archive* const archive) { return value; } -std::string getMetaDescription(const zim::Archive* const archive) { +std::string getMetaDescription(const zim::Archive& archive) { std::string value; value = getMetadata(archive, "Description"); @@ -58,7 +58,7 @@ std::string getMetaDescription(const zim::Archive* const archive) { return value; } -std::string getMetaTags(const zim::Archive* const archive, bool original) { +std::string getMetaTags(const zim::Archive& archive, bool original) { std::string tags_str = getMetadata(archive, "Tags"); if (original) { return tags_str; @@ -67,11 +67,10 @@ std::string getMetaTags(const zim::Archive* const archive, bool original) { return join(tags, ";"); } -bool getArchiveFavicon(const zim::Archive* const archive, +bool getArchiveFavicon(const zim::Archive& archive, std::string& content, std::string& mimeType){ try { - auto entry = archive->getFaviconEntry(); - auto item = entry.getItem(true); + auto item = archive.getIllustrationItem(); content = item.getData(); mimeType = item.getMimetype(); return true; @@ -80,29 +79,41 @@ bool getArchiveFavicon(const zim::Archive* const archive, return false; } -std::string getMetaLanguage(const zim::Archive* const archive) { +std::string getMetaLanguage(const zim::Archive& archive) { return getMetadata(archive, "Language"); } -std::string getMetaName(const zim::Archive* const archive) { +std::string getMetaName(const zim::Archive& archive) { return getMetadata(archive, "Name"); } -std::string getMetaDate(const zim::Archive* const archive) { +std::string getMetaDate(const zim::Archive& archive) { return getMetadata(archive, "Date"); } -std::string getMetaCreator(const zim::Archive* const archive) { +std::string getMetaCreator(const zim::Archive& archive) { return getMetadata(archive, "Creator"); } -std::string getMetaPublisher(const zim::Archive* const archive) { +std::string getMetaPublisher(const zim::Archive& archive) { return getMetadata(archive, "Publisher"); } -zim::Entry getFinalEntry(const zim::Archive* const archive, const zim::Entry& entry) +zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry) { - return archive->getEntryByPath(entry.getItem(true).getPath()); + return entry.getItem(true); +} + +zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path) +{ + try { + return archive.getEntryByPath(path); + } catch (zim::EntryNotFound& e) { + if (path.empty() || path == "/") { + return archive.getMainEntry(); + } + } + throw zim::EntryNotFound("Cannot find entry for non empty path"); } } // kiwix From bcece66960146a1fb477ed318986b7b1ef6704ed Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Fri, 28 May 2021 12:37:43 +0530 Subject: [PATCH 6/9] Add SearchRenderer handles for libzim structures Introduces a new member mp_search that houses the zim::Search object, adds a new constructor for this purpose. This commit also add an overload for getHtml that takes start and end integers as arguments since they are not part of the search object we include. --- include/search_renderer.h | 5 ++++- include/searcher.h | 7 +++++++ src/search_renderer.cpp | 39 ++++++++++++++++++++++++--------------- src/searcher.cpp | 5 +++++ 4 files changed, 40 insertions(+), 16 deletions(-) diff --git a/include/search_renderer.h b/include/search_renderer.h index 4f491961f..5951ff8a7 100644 --- a/include/search_renderer.h +++ b/include/search_renderer.h @@ -21,6 +21,7 @@ #define KIWIX_SEARCH_RENDERER_H #include +#include namespace kiwix { @@ -40,6 +41,8 @@ class SearchRenderer * Used to generate pagination links. */ SearchRenderer(Searcher* searcher, NameMapper* mapper); + SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, + unsigned int start, unsigned int estimatedResultCount); ~SearchRenderer(); @@ -74,7 +77,7 @@ class SearchRenderer protected: std::string beautifyInteger(const unsigned int number); - Searcher* mp_searcher; + zim::SearchResultSet m_srs; NameMapper* mp_nameMapper; std::string searchContent; std::string searchPattern; diff --git a/include/searcher.h b/include/searcher.h index 8acf929ce..8c72953b4 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -32,6 +32,8 @@ #include "tools/pathTools.h" #include "tools/stringTools.h" +#include + using namespace std; namespace kiwix @@ -142,6 +144,11 @@ class Searcher */ unsigned int getEstimatedResultCount(); + /** + * Get a SearchResultSet object for current search + */ + zim::SearchResultSet getSearchResultSet(); + unsigned int getResultStart() { return resultStart; } unsigned int getResultEnd() { return resultEnd; } diff --git a/src/search_renderer.cpp b/src/search_renderer.cpp index 0e503e330..66bb3b429 100644 --- a/src/search_renderer.cpp +++ b/src/search_renderer.cpp @@ -37,10 +37,22 @@ namespace kiwix /* Constructor */ SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper) - : mp_searcher(searcher), + : m_srs(searcher->getSearchResultSet()), mp_nameMapper(mapper), protocolPrefix("zim://"), - searchProtocolPrefix("search://?") + searchProtocolPrefix("search://?"), + estimatedResultCount(searcher->getEstimatedResultCount()), + resultStart(searcher->getResultStart()) +{} + +SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, + unsigned int start, unsigned int estimatedResultCount) + : m_srs(srs), + mp_nameMapper(mapper), + protocolPrefix("zim://"), + searchProtocolPrefix("search://?"), + estimatedResultCount(estimatedResultCount), + resultStart(start) {} /* Destructor */ @@ -70,29 +82,26 @@ std::string SearchRenderer::getHtml() { kainjow::mustache::data results{kainjow::mustache::data::type::list}; - mp_searcher->restart_search(); - Result* p_result = NULL; - while ((p_result = mp_searcher->getNextResult())) { + for (auto it = m_srs.begin(); it != m_srs.end(); it++) { kainjow::mustache::data result; - result.set("title", p_result->get_title()); - result.set("url", p_result->get_url()); - result.set("snippet", p_result->get_snippet()); - result.set("resultContentId", mp_nameMapper->getNameForId(p_result->get_zimId())); - - if (p_result->get_wordCount() >= 0) { - result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount())); + result.set("title", it.getTitle()); + result.set("url", it.getPath()); + result.set("snippet", it.getSnippet()); + std::ostringstream s; + s << it.getZimId(); + result.set("resultContentId", mp_nameMapper->getNameForId(s.str())); + + if (it.getWordCount() >= 0) { + result.set("wordCount", kiwix::beautifyInteger(it.getWordCount())); } results.push_back(result); - delete p_result; } // pages kainjow::mustache::data pages{kainjow::mustache::data::type::list}; - auto resultStart = mp_searcher->getResultStart(); auto resultEnd = 0U; - auto estimatedResultCount = mp_searcher->getEstimatedResultCount(); auto currentPage = 0U; auto pageStart = 0U; auto pageEnd = 0U; diff --git a/src/searcher.cpp b/src/searcher.cpp index 6f3b54257..e8b41caa1 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -228,6 +228,11 @@ unsigned int Searcher::getEstimatedResultCount() return this->estimatedResultCount; } +zim::SearchResultSet Searcher::getSearchResultSet() +{ + return *(this->internal); +} + _Result::_Result(zim::SearchResultSet::iterator iterator) : iterator(iterator) { From bc821638da0daa279b2f9acc8030195d2884b720 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Fri, 28 May 2021 12:40:22 +0530 Subject: [PATCH 7/9] Drop wrapper structures from handle_search Since we now have SearcherRenderer that can work with native libzim structure, we will drop the wrapper and use them instead. --- src/server/internalServer.cpp | 48 +++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 14 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 8d99b3f8a..9607dd4fd 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -541,13 +541,13 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } catch(const std::out_of_range&) {} catch(const std::invalid_argument&) {} - std::shared_ptr reader(nullptr); + std::shared_ptr archive; try { - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) {} /* Make the search */ - if ( (!reader && !bookName.empty()) + if ( (!archive && !bookName.empty()) || (patternString.empty() && ! has_geo_query) ) { auto data = get_default_data(); data.set("pattern", encodeDiples(patternString)); @@ -557,14 +557,18 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re return std::move(response); } - Searcher searcher; - if (reader) { - searcher.add_reader(reader.get()); + std::shared_ptr searcher; + if (archive) { + searcher = std::make_shared(*archive); } else { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { - auto currentReader = mp_library->getReaderById(bookId); - if (currentReader) { - searcher.add_reader(currentReader.get()); + auto currentArchive = mp_library->getArchiveById(bookId); + if (currentArchive) { + if (! searcher) { + searcher = std::make_shared(*currentArchive); + } else { + searcher->add_archive(*currentArchive); + } } } } @@ -589,14 +593,30 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re /* Get the results */ try { + zim::Query query; if (patternString.empty()) { - searcher.geo_search(latitude, longitude, distance, - start, end, m_verbose.load()); + // Execute geo-search + if (m_verbose.load()) { + cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl; + } + + query.setVerbose(m_verbose.load()); + query.setQuery("", false); + query.setGeorange(latitude, longitude, distance); } else { - searcher.search(patternString, - start, end, m_verbose.load()); + // Execute Ft search + if (m_verbose.load()) { + cout << "Performing query `" << patternString << "'" << endl; + } + + std::string queryString = removeAccents(patternString); + query.setQuery(queryString, false); + query.setVerbose(m_verbose.load()); } - SearchRenderer renderer(&searcher, mp_nameMapper); + + zim::Search search = searcher->search(query); + SearchRenderer renderer(search.getResults(start, end), mp_nameMapper, start, + search.getEstimatedMatches()); renderer.setSearchPattern(patternString); renderer.setSearchContent(bookName); renderer.setProtocolPrefix(m_root + "/"); From a94a03cd225602d72b87d97309d9add5485a1662 Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Fri, 28 May 2021 13:16:33 +0530 Subject: [PATCH 8/9] Remove unwanted reader functions Removing the functions in InternalServer that are no longer needed. --- src/server/internalServer.cpp | 12 ------------ src/server/internalServer.h | 1 - 2 files changed, 13 deletions(-) diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 9607dd4fd..0e3545028 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -803,18 +803,6 @@ std::string searchSuggestionHTML(const std::string& searchURL, const std::string } // unnamed namespace -std::shared_ptr -InternalServer::get_reader(const std::string& bookName) const -{ - std::shared_ptr reader; - try { - const std::string bookId = mp_nameMapper->getIdForName(bookName); - reader = mp_library->getReaderById(bookId); - } catch (const std::out_of_range& e) { - } - return reader; -} - std::unique_ptr InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const { diff --git a/src/server/internalServer.h b/src/server/internalServer.h index c5a2cc159..ed40fea9c 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -89,7 +89,6 @@ class InternalServer { MustacheData get_default_data() const; - std::shared_ptr get_reader(const std::string& bookName) const; bool etag_not_needed(const RequestContext& r) const; ETag get_matching_if_none_match_etag(const RequestContext& request) const; From 6f639144ab030e4082919e7f4f73a42641182dda Mon Sep 17 00:00:00 2001 From: Maneesh P M Date: Fri, 25 Jun 2021 15:04:49 +0530 Subject: [PATCH 9/9] Add unit tests for Searcher and Reader Even though we will be removing the wrappers soon, the test coverage should be complete and we could simply remove these files later. --- test/meson.build | 2 ++ test/reader.cpp | 62 +++++++++++++++++++++++++++++++++++++++++++++++ test/searcher.cpp | 25 +++++++++++++++++++ 3 files changed, 89 insertions(+) create mode 100644 test/reader.cpp create mode 100644 test/searcher.cpp diff --git a/test/meson.build b/test/meson.build index 924202088..1502c12fb 100644 --- a/test/meson.build +++ b/test/meson.build @@ -9,6 +9,8 @@ tests = [ 'book', 'manager', 'opds_catalog', + 'reader', + 'searcher' ] if build_machine.system() != 'windows' diff --git a/test/reader.cpp b/test/reader.cpp new file mode 100644 index 000000000..2c25207dc --- /dev/null +++ b/test/reader.cpp @@ -0,0 +1,62 @@ + +#include "gtest/gtest.h" +#include "../include/reader.h" +#include "zim/archive.h" + +namespace kiwix +{ + /** + * This test file is written primarily to demonstrate how Reader is simply a + * wrapper over an archive. We will be dropping this wrapper soon. + **/ + TEST (Reader, archiveWrapper) { + Reader reader("./test/zimfile.zim"); + zim::Archive archive = *reader.getZimArchive(); + + std::ostringstream s; + s << archive.getUuid(); + + ASSERT_EQ(reader.getId(), s.str()); + ASSERT_EQ(reader.getGlobalCount(), archive.getEntryCount()); + ASSERT_EQ(reader.getMainPage().getTitle(), archive.getMainEntry().getTitle()); + ASSERT_EQ(reader.hasFulltextIndex(), archive.hasFulltextIndex()); + ASSERT_NO_THROW(reader.getRandomPage()); + } + + TEST (Reader, getFunctions) { + zim::Archive archive("./test/zimfile.zim"); + Reader reader("./test/zimfile.zim"); + + auto archiveEntry = archive.getRandomEntry(); + ASSERT_TRUE(reader.pathExists(archiveEntry.getPath())); + auto readerEntry = reader.getEntryFromPath(archiveEntry.getPath()); + ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle()); + + ASSERT_FALSE(reader.pathExists("invalidEntryPath")); + ASSERT_THROW(reader.getEntryFromPath("invalidEntryPath"), NoEntry); + + readerEntry = reader.getEntryFromTitle(archiveEntry.getTitle()); + ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle()); + } + + TEST (Reader, suggestions) { + Reader reader("./test/zimfile.zim"); + SuggestionsList_t suggestions; + reader.searchSuggestionsSmart("The Genius", 4, suggestions); + + std::vector suggestionResult, expectedResult; + std::string suggestionTitle; + for (auto it = suggestions.begin(); it != suggestions.end(); it++) { + suggestionResult.push_back(it->getTitle()); + } + + expectedResult = { + "The Genius After Hours", + "The Genius Hits the Road", + "The Genius Sings the Blues", + "The Genius of Ray Charles" + }; + + ASSERT_EQ(suggestionResult, expectedResult); + } +} \ No newline at end of file diff --git a/test/searcher.cpp b/test/searcher.cpp new file mode 100644 index 000000000..24beefc5d --- /dev/null +++ b/test/searcher.cpp @@ -0,0 +1,25 @@ +#include "gtest/gtest.h" +#include "../include/searcher.h" +#include "../include/reader.h" + +namespace kiwix +{ + +TEST(Searcher, search) { + Reader reader("./test/example.zim"); + + Searcher searcher; + searcher.add_reader(&reader); + ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle()); + + searcher.search("wiki", 0, 2); + searcher.restart_search(); + ASSERT_EQ(searcher.getEstimatedResultCount(), (unsigned int)2); + + auto result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "FreedomBox for Communities/Offline Wikipedia - Wikibooks, open books for an open world"); + result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "Wikibooks"); +} + +} \ No newline at end of file