diff --git a/format_code.sh b/format_code.sh index b844eec8c..9e61dacb0 100755 --- a/format_code.sh +++ b/format_code.sh @@ -7,6 +7,7 @@ files=( "include/common/otherTools.h" "include/common/regexTools.h" "include/common/networkTools.h" +"include/common/archiveTools.h" "include/manager.h" "include/reader.h" "include/kiwix.h" @@ -22,6 +23,7 @@ files=( "src/common/pathTools.cpp" "src/common/regexTools.cpp" "src/common/otherTools.cpp" +"src/common/archiveTools.cpp" "src/common/networkTools.cpp" "src/common/stringTools.cpp" "src/xapianSearcher.cpp" diff --git a/include/library.h b/include/library.h index 95d22e4bd..c5e026e2f 100644 --- a/include/library.h +++ b/include/library.h @@ -24,6 +24,7 @@ #include #include #include +#include #include "book.h" #include "bookmark.h" @@ -146,6 +147,7 @@ class Library { std::map m_books; std::map> m_readers; + std::map> m_archives; std::vector m_bookmarks; class BookDB; std::unique_ptr m_bookDB; @@ -198,6 +200,7 @@ class Library const Book& getBookByPath(const std::string& path) const; Book& getBookByPath(const std::string& path); std::shared_ptr getReaderById(const std::string& id); + std::shared_ptr getArchiveById(const std::string& id); /** * Remove a book from the library. diff --git a/include/meson.build b/include/meson.build index 4157970e4..6c0e46cc6 100644 --- a/include/meson.build +++ b/include/meson.build @@ -25,6 +25,7 @@ install_headers( 'tools/pathTools.h', 'tools/regexTools.h', 'tools/stringTools.h', + 'tools/archiveTools.h', subdir:'kiwix/tools' ) diff --git a/include/reader.h b/include/reader.h index 4001a5ff7..d288338b2 100644 --- a/include/reader.h +++ b/include/reader.h @@ -41,11 +41,11 @@ namespace kiwix * The SuggestionItem is a helper class that contains the info about a single * suggestion item. */ - class SuggestionItem { // Functions - private: + // Temporarily making the constructor public until the code move is complete + public: // Create a sugggestion item. explicit SuggestionItem(std::string title, std::string normalizedTitle, std::string path, std::string snippet = "") : @@ -91,6 +91,13 @@ class Reader * (.zim extesion). */ explicit Reader(const string zimFilePath); + + /** + * Create a Reader to read a zim file given by the Archive. + * + * @param archive The shared pointer to the Archive object. + */ + explicit Reader(const std::shared_ptr archive); #ifndef _WIN32 explicit Reader(int fd); Reader(int fd, zim::offset_type offset, zim::size_type size); @@ -488,7 +495,7 @@ class Reader zim::Archive* getZimArchive() const; protected: - std::unique_ptr zimArchive; + std::shared_ptr zimArchive; std::string zimFilePath; SuggestionsList_t suggestions; diff --git a/include/search_renderer.h b/include/search_renderer.h index 4f491961f..5951ff8a7 100644 --- a/include/search_renderer.h +++ b/include/search_renderer.h @@ -21,6 +21,7 @@ #define KIWIX_SEARCH_RENDERER_H #include +#include namespace kiwix { @@ -40,6 +41,8 @@ class SearchRenderer * Used to generate pagination links. */ SearchRenderer(Searcher* searcher, NameMapper* mapper); + SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, + unsigned int start, unsigned int estimatedResultCount); ~SearchRenderer(); @@ -74,7 +77,7 @@ class SearchRenderer protected: std::string beautifyInteger(const unsigned int number); - Searcher* mp_searcher; + zim::SearchResultSet m_srs; NameMapper* mp_nameMapper; std::string searchContent; std::string searchPattern; diff --git a/include/searcher.h b/include/searcher.h index 8acf929ce..8c72953b4 100644 --- a/include/searcher.h +++ b/include/searcher.h @@ -32,6 +32,8 @@ #include "tools/pathTools.h" #include "tools/stringTools.h" +#include + using namespace std; namespace kiwix @@ -142,6 +144,11 @@ class Searcher */ unsigned int getEstimatedResultCount(); + /** + * Get a SearchResultSet object for current search + */ + zim::SearchResultSet getSearchResultSet(); + unsigned int getResultStart() { return resultStart; } unsigned int getResultEnd() { return resultEnd; } diff --git a/include/tools/archiveTools.h b/include/tools/archiveTools.h new file mode 100644 index 000000000..1429457ba --- /dev/null +++ b/include/tools/archiveTools.h @@ -0,0 +1,47 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#ifndef KIWIX_ARCHIVETOOLS_H +#define KIWIX_ARCHIVETOOLS_H + +#include + +/** + * This file contains all the functions that would make handling data related to + * an archive easier. + **/ + +namespace kiwix +{ + std::string getMetadata(const zim::Archive& archive, const std::string& name); + std::string getArchiveTitle(const zim::Archive& archive); + std::string getMetaDescription(const zim::Archive& archive); + std::string getMetaTags(const zim::Archive& archive, bool original = false); + bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType); + std::string getMetaLanguage(const zim::Archive& archive); + std::string getMetaName(const zim::Archive& archive); + std::string getMetaDate(const zim::Archive& archive); + std::string getMetaCreator(const zim::Archive& archive); + std::string getMetaPublisher(const zim::Archive& archive); + zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry); + zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path); +} + +#endif diff --git a/include/tools/stringTools.h b/include/tools/stringTools.h index 8c9a8bc9d..6793c7f20 100644 --- a/include/tools/stringTools.h +++ b/include/tools/stringTools.h @@ -70,5 +70,7 @@ T extractFromString(const std::string& str) { } bool startsWith(const std::string& base, const std::string& start); + +std::vector getTitleVariants(const std::string& title); } //namespace kiwix #endif diff --git a/src/library.cpp b/src/library.cpp index 1147282df..c2e324ebc 100644 --- a/src/library.cpp +++ b/src/library.cpp @@ -108,6 +108,7 @@ bool Library::removeBookById(const std::string& id) { m_bookDB->delete_document("Q" + id); m_readers.erase(id); + m_archives.erase(id); return m_books.erase(id) == 1; } @@ -146,11 +147,35 @@ std::shared_ptr Library::getReaderById(const std::string& id) return m_readers.at(id); } catch (std::out_of_range& e) {} + try { + auto reader = make_shared(m_archives.at(id)); + m_readers[id] = reader; + return reader; + } catch (std::out_of_range& e) {} + + auto book = getBookById(id); + if (!book.isPathValid()) + return nullptr; + + auto archive = make_shared(book.getPath()); + m_archives[id] = archive; + auto reader = make_shared(archive); + m_readers[id] = reader; + return reader; +} + +std::shared_ptr Library::getArchiveById(const std::string& id) +{ + try { + return m_archives.at(id); + } catch (std::out_of_range& e) {} + auto book = getBookById(id); if (!book.isPathValid()) return nullptr; - auto sptr = make_shared(book.getPath()); - m_readers[id] = sptr; + + auto sptr = make_shared(book.getPath()); + m_archives[id] = sptr; return sptr; } diff --git a/src/meson.build b/src/meson.build index 7d6dab9c1..43f863e07 100644 --- a/src/meson.build +++ b/src/meson.build @@ -19,6 +19,7 @@ kiwix_sources = [ 'tools/stringTools.cpp', 'tools/networkTools.cpp', 'tools/otherTools.cpp', + 'tools/archiveTools.cpp', 'kiwixserve.cpp', 'name_mapper.cpp', 'server/byte_range.cpp', diff --git a/src/reader.cpp b/src/reader.cpp index c493ca72d..20a341731 100644 --- a/src/reader.cpp +++ b/src/reader.cpp @@ -25,6 +25,7 @@ #include #include "tools/otherTools.h" +#include "tools/archiveTools.h" inline char hi(char v) { @@ -86,6 +87,11 @@ Reader::Reader(const string zimFilePath) srand(time(nullptr)); } +Reader::Reader(const std::shared_ptr archive) + : zimArchive(archive), + zimFilePath(archive->getFilename()) + {} + #ifndef _WIN32 Reader::Reader(int fd) : zimArchive(new zim::Archive(fd)), @@ -183,14 +189,7 @@ Entry Reader::getMainPage() const bool Reader::getFavicon(string& content, string& mimeType) const { - try { - auto item = zimArchive->getIllustrationItem(); - content = item.getData(); - mimeType = item.getMimetype(); - return true; - } catch(zim::EntryNotFound& e) {}; - - return false; + return kiwix::getArchiveFavicon(*zimArchive, content, mimeType); } string Reader::getZimFilePath() const @@ -212,47 +211,32 @@ bool Reader::getMetadata(const string& name, string& value) const string Reader::getName() const { - METADATA("Name") + return kiwix::getMetaName(*zimArchive); } string Reader::getTitle() const { - string value = zimArchive->getMetadata("Title"); - if (value.empty()) { - value = getLastPathElement(zimFilePath); - std::replace(value.begin(), value.end(), '_', ' '); - size_t pos = value.find(".zim"); - value = value.substr(0, pos); - } - return value; + return kiwix::getArchiveTitle(*zimArchive); } string Reader::getCreator() const { - METADATA("Creator") + return kiwix::getMetaCreator(*zimArchive); } string Reader::getPublisher() const { - METADATA("Publisher") + return kiwix::getMetaPublisher(*zimArchive); } string Reader::getDate() const { - METADATA("Date") + return kiwix::getMetaDate(*zimArchive); } string Reader::getDescription() const { - string value; - this->getMetadata("Description", value); - - /* Mediawiki Collection tends to use the "Subtitle" name */ - if (value.empty()) { - this->getMetadata("Subtitle", value); - } - - return value; + return kiwix::getMetaDescription(*zimArchive); } string Reader::getLongDescription() const @@ -262,7 +246,7 @@ string Reader::getLongDescription() const string Reader::getLanguage() const { - METADATA("Language") + return kiwix::getMetaLanguage(*zimArchive); } string Reader::getLicense() const @@ -272,13 +256,7 @@ string Reader::getLicense() const string Reader::getTags(bool original) const { - string tags_str; - getMetadata("Tags", tags_str); - if (original) { - return tags_str; - } - auto tags = convertTags(tags_str); - return join(tags, ";"); + return kiwix::getMetaTags(*zimArchive, original); } @@ -342,12 +320,8 @@ string Reader::getOrigId() const Entry Reader::getEntryFromPath(const std::string& path) const { - if (path.empty() || path == "/") { - return getMainPage(); - } - try { - return zimArchive->getEntryByPath(path); + return kiwix::getEntryFromPath(*zimArchive, path); } catch (zim::EntryNotFound& e) { throw NoEntry(); } @@ -460,12 +434,7 @@ bool Reader::searchSuggestions(const string& prefix, std::vector Reader::getTitleVariants( const std::string& title) const { - std::vector variants; - variants.push_back(title); - variants.push_back(kiwix::ucFirst(title)); - variants.push_back(kiwix::lcFirst(title)); - variants.push_back(kiwix::toTitle(title)); - return variants; + return kiwix::getTitleVariants(title); } diff --git a/src/search_renderer.cpp b/src/search_renderer.cpp index 0e503e330..66bb3b429 100644 --- a/src/search_renderer.cpp +++ b/src/search_renderer.cpp @@ -37,10 +37,22 @@ namespace kiwix /* Constructor */ SearchRenderer::SearchRenderer(Searcher* searcher, NameMapper* mapper) - : mp_searcher(searcher), + : m_srs(searcher->getSearchResultSet()), mp_nameMapper(mapper), protocolPrefix("zim://"), - searchProtocolPrefix("search://?") + searchProtocolPrefix("search://?"), + estimatedResultCount(searcher->getEstimatedResultCount()), + resultStart(searcher->getResultStart()) +{} + +SearchRenderer::SearchRenderer(zim::SearchResultSet srs, NameMapper* mapper, + unsigned int start, unsigned int estimatedResultCount) + : m_srs(srs), + mp_nameMapper(mapper), + protocolPrefix("zim://"), + searchProtocolPrefix("search://?"), + estimatedResultCount(estimatedResultCount), + resultStart(start) {} /* Destructor */ @@ -70,29 +82,26 @@ std::string SearchRenderer::getHtml() { kainjow::mustache::data results{kainjow::mustache::data::type::list}; - mp_searcher->restart_search(); - Result* p_result = NULL; - while ((p_result = mp_searcher->getNextResult())) { + for (auto it = m_srs.begin(); it != m_srs.end(); it++) { kainjow::mustache::data result; - result.set("title", p_result->get_title()); - result.set("url", p_result->get_url()); - result.set("snippet", p_result->get_snippet()); - result.set("resultContentId", mp_nameMapper->getNameForId(p_result->get_zimId())); - - if (p_result->get_wordCount() >= 0) { - result.set("wordCount", kiwix::beautifyInteger(p_result->get_wordCount())); + result.set("title", it.getTitle()); + result.set("url", it.getPath()); + result.set("snippet", it.getSnippet()); + std::ostringstream s; + s << it.getZimId(); + result.set("resultContentId", mp_nameMapper->getNameForId(s.str())); + + if (it.getWordCount() >= 0) { + result.set("wordCount", kiwix::beautifyInteger(it.getWordCount())); } results.push_back(result); - delete p_result; } // pages kainjow::mustache::data pages{kainjow::mustache::data::type::list}; - auto resultStart = mp_searcher->getResultStart(); auto resultEnd = 0U; - auto estimatedResultCount = mp_searcher->getEstimatedResultCount(); auto currentPage = 0U; auto pageStart = 0U; auto pageEnd = 0U; diff --git a/src/searcher.cpp b/src/searcher.cpp index 6f3b54257..e8b41caa1 100644 --- a/src/searcher.cpp +++ b/src/searcher.cpp @@ -228,6 +228,11 @@ unsigned int Searcher::getEstimatedResultCount() return this->estimatedResultCount; } +zim::SearchResultSet Searcher::getSearchResultSet() +{ + return *(this->internal); +} + _Result::_Result(zim::SearchResultSet::iterator iterator) : iterator(iterator) { diff --git a/src/server/internalServer.cpp b/src/server/internalServer.cpp index 78d538f6e..0e3545028 100644 --- a/src/server/internalServer.cpp +++ b/src/server/internalServer.cpp @@ -47,6 +47,7 @@ extern "C" { #include "tools/pathTools.h" #include "tools/regexTools.h" #include "tools/stringTools.h" +#include "tools/archiveTools.h" #include "library.h" #include "name_mapper.h" #include "entry.h" @@ -55,6 +56,10 @@ extern "C" { #include "opds_dumper.h" #include +#include +#include +#include +#include #include @@ -323,22 +328,63 @@ std::unique_ptr InternalServer::build_homepage(const RequestContext& r return ContentResponse::build(*this, RESOURCE::templates::index_html, get_default_data(), "text/html; charset=utf-8", true); } +/** + * Archive and Zim handlers begin + **/ + +// TODO: retrieve searcher from caching mechanism +SuggestionsList_t getSuggestions(const zim::Archive* const archive, + const std::string& queryString, int suggestionCount) +{ + SuggestionsList_t suggestions; + if (archive->hasTitleIndex()) { + auto searcher = zim::Searcher(*archive); + zim::Query suggestionQuery; + suggestionQuery.setQuery(queryString, true); + auto suggestionSearch = searcher.search(suggestionQuery); + auto suggestionResult = suggestionSearch.getResults(0, suggestionCount); + + for (auto it = suggestionResult.begin(); it != suggestionResult.end(); it++) { + SuggestionItem suggestion(it.getTitle(), kiwix::normalize(it.getTitle()), + it.getPath(), it.getSnippet()); + suggestions.push_back(suggestion); + } + } else { + // TODO: This case should be handled by libzim + std::vector variants = getTitleVariants(queryString); + int currCount = 0; + for (auto it = variants.begin(); it != variants.end() && currCount < suggestionCount; it++) { + for (auto& entry: archive->findByTitle(*it)) { + SuggestionItem suggestion(entry.getTitle(), kiwix::normalize(entry.getTitle()), + entry.getPath()); + suggestions.push_back(suggestion); + currCount++; + } + } + } + return suggestions; +} + +/** + * Archive and Zim handlers end + **/ + std::unique_ptr InternalServer::handle_meta(const RequestContext& request) { std::string bookName; std::string bookId; std::string meta_name; - std::shared_ptr reader; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); meta_name = request.get_argument("name"); - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range& e) { return Response::build_404(*this, request, bookName, ""); } - if (reader == nullptr) { + if (archive == nullptr) { return Response::build_404(*this, request, bookName, ""); } @@ -346,23 +392,23 @@ std::unique_ptr InternalServer::handle_meta(const RequestContext& requ std::string mimeType = "text"; if (meta_name == "title") { - content = reader->getTitle(); + content = getArchiveTitle(*archive); } else if (meta_name == "description") { - content = reader->getDescription(); + content = getMetaDescription(*archive); } else if (meta_name == "language") { - content = reader->getLanguage(); + content = getMetaLanguage(*archive); } else if (meta_name == "name") { - content = reader->getName(); + content = getMetaName(*archive); } else if (meta_name == "tags") { - content = reader->getTags(); + content = getMetaTags(*archive); } else if (meta_name == "date") { - content = reader->getDate(); + content = getMetaDate(*archive); } else if (meta_name == "creator") { - content = reader->getCreator(); + content = getMetaCreator(*archive); } else if (meta_name == "publisher") { - content = reader->getPublisher(); + content = getMetaPublisher(*archive); } else if (meta_name == "favicon") { - reader->getFavicon(content, mimeType); + getArchiveFavicon(*archive, content, mimeType); } else { return Response::build_404(*this, request, bookName, ""); } @@ -385,51 +431,54 @@ std::unique_ptr InternalServer::handle_suggest(const RequestContext& r std::string bookName; std::string bookId; - std::string term; - std::shared_ptr reader; + std::string queryString; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); - term = request.get_argument("term"); - reader = mp_library->getReaderById(bookId); + queryString = request.get_argument("term"); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { return Response::build_404(*this, request, bookName, ""); } + if (archive == nullptr) { + return Response::build_404(*this, request, bookName, ""); + } + if (m_verbose.load()) { - printf("Searching suggestions for: \"%s\"\n", term.c_str()); + printf("Searching suggestions for: \"%s\"\n", queryString.c_str()); } MustacheData results{MustacheData::type::list}; bool first = true; - if (reader != nullptr) { - /* Get the suggestions */ - SuggestionsList_t suggestions; - reader->searchSuggestionsSmart(term, maxSuggestionCount, suggestions); - for(auto& suggestion:suggestions) { - MustacheData result; - result.set("label", suggestion.getTitle()); - - if (suggestion.hasSnippet()) { - result.set("label", suggestion.getSnippet()); - } - result.set("value", suggestion.getTitle()); - result.set("kind", "path"); - result.set("path", suggestion.getPath()); - result.set("first", first); - first = false; - results.push_back(result); - suggestionCount++; + /* Get the suggestions */ + SuggestionsList_t suggestions = getSuggestions(archive.get(), queryString, maxSuggestionCount); + for(auto& suggestion:suggestions) { + MustacheData result; + result.set("label", suggestion.getTitle()); + + if (suggestion.hasSnippet()) { + result.set("label", suggestion.getSnippet()); } + + result.set("value", suggestion.getTitle()); + result.set("kind", "path"); + result.set("path", suggestion.getPath()); + result.set("first", first); + first = false; + results.push_back(result); + suggestionCount++; } + /* Propose the fulltext search if possible */ - if (reader->hasFulltextIndex()) { + if (archive->hasFulltextIndex()) { MustacheData result; - result.set("label", "containing '" + term + "'..."); - result.set("value", term + " "); + result.set("label", "containing '" + queryString + "'..."); + result.set("value", queryString + " "); result.set("kind", "pattern"); result.set("first", first); results.push_back(result); @@ -492,30 +541,34 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re } catch(const std::out_of_range&) {} catch(const std::invalid_argument&) {} - std::shared_ptr reader(nullptr); + std::shared_ptr archive; try { - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) {} /* Make the search */ - if ( (!reader && !bookName.empty()) + if ( (!archive && !bookName.empty()) || (patternString.empty() && ! has_geo_query) ) { auto data = get_default_data(); data.set("pattern", encodeDiples(patternString)); auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8"); - response->set_taskbar(bookName, reader ? reader->getTitle() : ""); + response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : ""); response->set_code(MHD_HTTP_NOT_FOUND); return std::move(response); } - Searcher searcher; - if (reader) { - searcher.add_reader(reader.get()); + std::shared_ptr searcher; + if (archive) { + searcher = std::make_shared(*archive); } else { for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) { - auto currentReader = mp_library->getReaderById(bookId); - if (currentReader) { - searcher.add_reader(currentReader.get()); + auto currentArchive = mp_library->getArchiveById(bookId); + if (currentArchive) { + if (! searcher) { + searcher = std::make_shared(*currentArchive); + } else { + searcher->add_archive(*currentArchive); + } } } } @@ -540,21 +593,37 @@ std::unique_ptr InternalServer::handle_search(const RequestContext& re /* Get the results */ try { + zim::Query query; if (patternString.empty()) { - searcher.geo_search(latitude, longitude, distance, - start, end, m_verbose.load()); + // Execute geo-search + if (m_verbose.load()) { + cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl; + } + + query.setVerbose(m_verbose.load()); + query.setQuery("", false); + query.setGeorange(latitude, longitude, distance); } else { - searcher.search(patternString, - start, end, m_verbose.load()); + // Execute Ft search + if (m_verbose.load()) { + cout << "Performing query `" << patternString << "'" << endl; + } + + std::string queryString = removeAccents(patternString); + query.setQuery(queryString, false); + query.setVerbose(m_verbose.load()); } - SearchRenderer renderer(&searcher, mp_nameMapper); + + zim::Search search = searcher->search(query); + SearchRenderer renderer(search.getResults(start, end), mp_nameMapper, start, + search.getEstimatedMatches()); renderer.setSearchPattern(patternString); renderer.setSearchContent(bookName); renderer.setProtocolPrefix(m_root + "/"); renderer.setSearchProtocolPrefix(m_root + "/search?"); renderer.setPageLength(pageLength); auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8"); - response->set_taskbar(bookName, reader ? reader->getTitle() : ""); + response->set_taskbar(bookName, archive ? getArchiveTitle(*archive) : ""); return std::move(response); } catch (const std::exception& e) { @@ -571,23 +640,23 @@ std::unique_ptr InternalServer::handle_random(const RequestContext& re std::string bookName; std::string bookId; - std::shared_ptr reader; + std::shared_ptr archive; try { bookName = request.get_argument("content"); bookId = mp_nameMapper->getIdForName(bookName); - reader = mp_library->getReaderById(bookId); + archive = mp_library->getArchiveById(bookId); } catch (const std::out_of_range&) { return Response::build_404(*this, request, bookName, ""); } - if (reader == nullptr) { + if (archive == nullptr) { return Response::build_404(*this, request, bookName, ""); } try { - auto entry = reader->getRandomPage(); - return build_redirect(bookName, entry.getFinalEntry()); - } catch(kiwix::NoEntry& e) { + auto entry = archive->getRandomEntry(); + return build_redirect(bookName, getFinalItem(*archive, entry)); + } catch(zim::EntryNotFound& e) { return Response::build_404(*this, request, bookName, ""); } } @@ -734,22 +803,10 @@ std::string searchSuggestionHTML(const std::string& searchURL, const std::string } // unnamed namespace -std::shared_ptr -InternalServer::get_reader(const std::string& bookName) const -{ - std::shared_ptr reader; - try { - const std::string bookId = mp_nameMapper->getIdForName(bookName); - reader = mp_library->getReaderById(bookId); - } catch (const std::out_of_range& e) { - } - return reader; -} - std::unique_ptr -InternalServer::build_redirect(const std::string& bookName, const kiwix::Entry& entry) const +InternalServer::build_redirect(const std::string& bookName, const zim::Item& item) const { - auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(entry.getPath()); + auto redirectUrl = m_root + "/" + bookName + "/" + kiwix::urlEncode(item.getPath()); return Response::build_redirect(*this, redirectUrl); } @@ -765,8 +822,13 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r if (bookName.empty()) return build_homepage(request); - const std::shared_ptr reader = get_reader(bookName); - if (reader == nullptr) { + std::shared_ptr archive; + try { + const std::string bookId = mp_nameMapper->getIdForName(bookName); + archive = mp_library->getArchiveById(bookId); + } catch (const std::out_of_range& e) {} + + if (archive == nullptr) { std::string searchURL = m_root+"/search?pattern="+pattern; // Make a full search on the entire library. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); @@ -779,31 +841,31 @@ std::unique_ptr InternalServer::handle_content(const RequestContext& r } try { - auto entry = reader->getEntryFromPath(urlStr); + auto entry = getEntryFromPath(*archive, urlStr); if (entry.isRedirect() || urlStr.empty()) { // If urlStr is empty, we want to mainPage. // We must do a redirection to the real page. - return build_redirect(bookName, entry.getFinalEntry()); + return build_redirect(bookName, getFinalItem(*archive, entry)); } - auto response = ItemResponse::build(*this, request, entry.getZimEntry().getItem()); + auto response = ItemResponse::build(*this, request, entry.getItem()); try { - dynamic_cast(*response).set_taskbar(bookName, reader->getTitle()); + dynamic_cast(*response).set_taskbar(bookName, getArchiveTitle(*archive)); } catch (std::bad_cast& e) {} if (m_verbose.load()) { printf("Found %s\n", entry.getPath().c_str()); - printf("mimeType: %s\n", entry.getMimetype().c_str()); + printf("mimeType: %s\n", entry.getItem(true).getMimetype().c_str()); } return response; - } catch(kiwix::NoEntry& e) { + } catch(zim::EntryNotFound& e) { if (m_verbose.load()) printf("Failed to find %s\n", urlStr.c_str()); std::string searchURL = m_root+"/search?content="+bookName+"&pattern="+pattern; // Make a search on this specific book only. const std::string details = searchSuggestionHTML(searchURL, kiwix::urlDecode(pattern)); - return Response::build_404(*this, request, bookName, reader->getTitle(), details); + return Response::build_404(*this, request, bookName, getArchiveTitle(*archive), details); } } diff --git a/src/server/internalServer.h b/src/server/internalServer.h index 142d3fc37..ed40fea9c 100644 --- a/src/server/internalServer.h +++ b/src/server/internalServer.h @@ -69,7 +69,7 @@ class InternalServer { private: // functions std::unique_ptr handle_request(const RequestContext& request); - std::unique_ptr build_redirect(const std::string& bookName, const kiwix::Entry& entry) const; + std::unique_ptr build_redirect(const std::string& bookName, const zim::Item& item) const; std::unique_ptr build_homepage(const RequestContext& request); std::unique_ptr handle_skin(const RequestContext& request); std::unique_ptr handle_catalog(const RequestContext& request); @@ -89,7 +89,6 @@ class InternalServer { MustacheData get_default_data() const; - std::shared_ptr get_reader(const std::string& bookName) const; bool etag_not_needed(const RequestContext& r) const; ETag get_matching_if_none_match_etag(const RequestContext& request) const; diff --git a/src/tools/archiveTools.cpp b/src/tools/archiveTools.cpp new file mode 100644 index 000000000..3608357ab --- /dev/null +++ b/src/tools/archiveTools.cpp @@ -0,0 +1,119 @@ +/* + * Copyright 2021 Maneesh P M + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include +#include + +#include +#include + +namespace kiwix +{ +std::string getMetadata(const zim::Archive& archive, const std::string& name) { + try { + return archive.getMetadata(name); + } catch (zim::EntryNotFound& e) { + return ""; + } +} + +std::string getArchiveTitle(const zim::Archive& archive) { + std::string value = getMetadata(archive, "Title"); + if (value.empty()) { + value = getLastPathElement(archive.getFilename()); + std::replace(value.begin(), value.end(), '_', ' '); + size_t pos = value.find(".zim"); + value = value.substr(0, pos); + } + return value; +} + +std::string getMetaDescription(const zim::Archive& archive) { + std::string value; + value = getMetadata(archive, "Description"); + + /* Mediawiki Collection tends to use the "Subtitle" name */ + if (value.empty()) { + value = getMetadata(archive, "Subtitle"); + } + + return value; +} + +std::string getMetaTags(const zim::Archive& archive, bool original) { + std::string tags_str = getMetadata(archive, "Tags"); + if (original) { + return tags_str; + } + auto tags = convertTags(tags_str); + return join(tags, ";"); +} + +bool getArchiveFavicon(const zim::Archive& archive, + std::string& content, std::string& mimeType){ + try { + auto item = archive.getIllustrationItem(); + content = item.getData(); + mimeType = item.getMimetype(); + return true; + } catch(zim::EntryNotFound& e) {}; + + return false; +} + +std::string getMetaLanguage(const zim::Archive& archive) { + return getMetadata(archive, "Language"); +} + +std::string getMetaName(const zim::Archive& archive) { + return getMetadata(archive, "Name"); +} + +std::string getMetaDate(const zim::Archive& archive) { + return getMetadata(archive, "Date"); +} + +std::string getMetaCreator(const zim::Archive& archive) { + return getMetadata(archive, "Creator"); +} + +std::string getMetaPublisher(const zim::Archive& archive) { + return getMetadata(archive, "Publisher"); +} + +zim::Item getFinalItem(const zim::Archive& archive, const zim::Entry& entry) +{ + return entry.getItem(true); +} + +zim::Entry getEntryFromPath(const zim::Archive& archive, const std::string& path) +{ + try { + return archive.getEntryByPath(path); + } catch (zim::EntryNotFound& e) { + if (path.empty() || path == "/") { + return archive.getMainEntry(); + } + } + throw zim::EntryNotFound("Cannot find entry for non empty path"); +} + +} // kiwix diff --git a/src/tools/stringTools.cpp b/src/tools/stringTools.cpp index a0eac7ba2..55efc07f0 100644 --- a/src/tools/stringTools.cpp +++ b/src/tools/stringTools.cpp @@ -395,3 +395,11 @@ bool kiwix::startsWith(const std::string& base, const std::string& start) && std::equal(start.begin(), start.end(), base.begin()); } +std::vector kiwix::getTitleVariants(const std::string& title) { + std::vector variants; + variants.push_back(title); + variants.push_back(kiwix::ucFirst(title)); + variants.push_back(kiwix::lcFirst(title)); + variants.push_back(kiwix::toTitle(title)); + return variants; +} diff --git a/test/meson.build b/test/meson.build index 924202088..1502c12fb 100644 --- a/test/meson.build +++ b/test/meson.build @@ -9,6 +9,8 @@ tests = [ 'book', 'manager', 'opds_catalog', + 'reader', + 'searcher' ] if build_machine.system() != 'windows' diff --git a/test/reader.cpp b/test/reader.cpp new file mode 100644 index 000000000..2c25207dc --- /dev/null +++ b/test/reader.cpp @@ -0,0 +1,62 @@ + +#include "gtest/gtest.h" +#include "../include/reader.h" +#include "zim/archive.h" + +namespace kiwix +{ + /** + * This test file is written primarily to demonstrate how Reader is simply a + * wrapper over an archive. We will be dropping this wrapper soon. + **/ + TEST (Reader, archiveWrapper) { + Reader reader("./test/zimfile.zim"); + zim::Archive archive = *reader.getZimArchive(); + + std::ostringstream s; + s << archive.getUuid(); + + ASSERT_EQ(reader.getId(), s.str()); + ASSERT_EQ(reader.getGlobalCount(), archive.getEntryCount()); + ASSERT_EQ(reader.getMainPage().getTitle(), archive.getMainEntry().getTitle()); + ASSERT_EQ(reader.hasFulltextIndex(), archive.hasFulltextIndex()); + ASSERT_NO_THROW(reader.getRandomPage()); + } + + TEST (Reader, getFunctions) { + zim::Archive archive("./test/zimfile.zim"); + Reader reader("./test/zimfile.zim"); + + auto archiveEntry = archive.getRandomEntry(); + ASSERT_TRUE(reader.pathExists(archiveEntry.getPath())); + auto readerEntry = reader.getEntryFromPath(archiveEntry.getPath()); + ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle()); + + ASSERT_FALSE(reader.pathExists("invalidEntryPath")); + ASSERT_THROW(reader.getEntryFromPath("invalidEntryPath"), NoEntry); + + readerEntry = reader.getEntryFromTitle(archiveEntry.getTitle()); + ASSERT_EQ(readerEntry.getTitle(), archiveEntry.getTitle()); + } + + TEST (Reader, suggestions) { + Reader reader("./test/zimfile.zim"); + SuggestionsList_t suggestions; + reader.searchSuggestionsSmart("The Genius", 4, suggestions); + + std::vector suggestionResult, expectedResult; + std::string suggestionTitle; + for (auto it = suggestions.begin(); it != suggestions.end(); it++) { + suggestionResult.push_back(it->getTitle()); + } + + expectedResult = { + "The Genius After Hours", + "The Genius Hits the Road", + "The Genius Sings the Blues", + "The Genius of Ray Charles" + }; + + ASSERT_EQ(suggestionResult, expectedResult); + } +} \ No newline at end of file diff --git a/test/searcher.cpp b/test/searcher.cpp new file mode 100644 index 000000000..24beefc5d --- /dev/null +++ b/test/searcher.cpp @@ -0,0 +1,25 @@ +#include "gtest/gtest.h" +#include "../include/searcher.h" +#include "../include/reader.h" + +namespace kiwix +{ + +TEST(Searcher, search) { + Reader reader("./test/example.zim"); + + Searcher searcher; + searcher.add_reader(&reader); + ASSERT_EQ(searcher.get_reader(0)->getTitle(), reader.getTitle()); + + searcher.search("wiki", 0, 2); + searcher.restart_search(); + ASSERT_EQ(searcher.getEstimatedResultCount(), (unsigned int)2); + + auto result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "FreedomBox for Communities/Offline Wikipedia - Wikibooks, open books for an open world"); + result = searcher.getNextResult(); + ASSERT_EQ(result->get_title(), "Wikibooks"); +} + +} \ No newline at end of file