Skip to content

Commit

Permalink
Retrieve Searcher and Search from LRU Cache
Browse files Browse the repository at this point in the history
We use the new cache template to implement two kind of cache.
1: The Searcher cache is more general in terms of its usage. A Searcher
   can be used for multiple searches without much change to itself. We
   try to retrieve the searcher and perform searches using it whenever
   possible, and if not we put a searcher into the cache. User can
   specify a custom cache length by manipulating the environment
   variable SEARCHER_CACHE_SIZE. It's default value is 10% of all the
   books available.
2: The search cache is much more restricted in terms of usage. It's main
   purpose is to avoid re-searching on the searcher during page changes
   to generate SearchResultSet of various ranges. User can specify a
   custom cache length using the environment variable SEARCH_CACHE_SIZE
   with a default value of 2;
  • Loading branch information
maneeshpm authored and mgautierfr committed Mar 8, 2022
1 parent a51f8d6 commit 7cb4c13
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 10 deletions.
38 changes: 28 additions & 10 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,6 @@ extern "C" {

#include <zim/uuid.h>
#include <zim/error.h>
#include <zim/search.h>
#include <zim/suggestion.h>
#include <zim/entry.h>
#include <zim/item.h>

Expand All @@ -80,6 +78,7 @@ extern "C" {

#define MAX_SEARCH_LEN 140
#define KIWIX_MIN_CONTENT_SIZE_TO_DEFLATE 100
#define DEFAULT_CACHE_SIZE 2

namespace kiwix {

Expand All @@ -96,6 +95,18 @@ inline std::string normalizeRootUrl(std::string rootUrl)
return rootUrl.empty() ? rootUrl : "/" + rootUrl;
}

// Returns the value of env var `name` if found, otherwise returns defaultVal
unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
try {
const char* envString = std::getenv(name);
if (envString == nullptr) {
throw std::runtime_error("Environment variable not set");
}
return extractFromString<unsigned int>(envString);
} catch (...) {}

return defaultVal;
}
} // unnamed namespace

static IdNameMapper defaultNameMapper;
Expand Down Expand Up @@ -134,7 +145,9 @@ InternalServer::InternalServer(Library* library,
m_ipConnectionLimit(ipConnectionLimit),
mp_daemon(nullptr),
mp_library(library),
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper)
mp_nameMapper(nameMapper ? nameMapper : &defaultNameMapper),
searcherCache(getCacheLength("SEARCHER_CACHE_SIZE", std::max((unsigned int) (mp_library->getBookCount(true, true)*0.1), 1U))),
searchCache(getCacheLength("SEARCH_CACHE_SIZE", DEFAULT_CACHE_SIZE))
{}

bool InternalServer::start() {
Expand Down Expand Up @@ -488,11 +501,11 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}

std::string bookName;
std::string bookName, bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
const std::string bookId = mp_nameMapper->getIdForName(bookName);
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}

Expand All @@ -509,7 +522,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re

std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = std::make_shared<zim::Searcher>(*archive);
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
Expand Down Expand Up @@ -540,6 +553,7 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}

/* Get the results */
std::string queryString;
try {
zim::Query query;
if (patternString.empty()) {
Expand All @@ -549,20 +563,24 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
}

query.setQuery("");
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}

std::string queryString = removeAccents(patternString);
queryString = "FT:" + removeAccents(patternString);
query.setQuery(queryString);
}
queryString = bookId + queryString;

std::shared_ptr<zim::Search> search;
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});

zim::Search search = searcher->search(query);
SearchRenderer renderer(search.getResults(start, pageLength), mp_nameMapper, mp_library, start,
search.getEstimatedMatches());
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
search->getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setProtocolPrefix(m_root + "/");
Expand Down
10 changes: 10 additions & 0 deletions src/server/internalServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ extern "C" {
#include "library.h"
#include "name_mapper.h"

#include <zim/search.h>
#include <zim/suggestion.h>

#include <mustache.hpp>

#include <atomic>
Expand All @@ -36,9 +39,13 @@ extern "C" {
#include "server/request_context.h"
#include "server/response.h"

#include "tools/concurrent_cache.h"

namespace kiwix {

typedef kainjow::mustache::data MustacheData;
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;

class Entry;
class OPDSDumper;
Expand Down Expand Up @@ -115,6 +122,9 @@ class InternalServer {
Library* mp_library;
NameMapper* mp_nameMapper;

SearcherCache searcherCache;
SearchCache searchCache;

std::string m_server_id;
std::string m_library_id;

Expand Down

0 comments on commit 7cb4c13

Please sign in to comment.