Skip to content

Commit

Permalink
Merge pull request #724 from kiwix/search_improvement
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Mar 29, 2022
2 parents 574c1ad + 311f783 commit 95d4dd6
Show file tree
Hide file tree
Showing 8 changed files with 321 additions and 114 deletions.
201 changes: 112 additions & 89 deletions src/server/internalServer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,54 @@ unsigned int getCacheLength(const char* name, unsigned int defaultVal) {
}
} // unnamed namespace

SearchInfo::SearchInfo(const std::string& pattern)
: pattern(pattern),
geoQuery()
{}

SearchInfo::SearchInfo(const std::string& pattern, GeoQuery geoQuery)
: pattern(pattern),
geoQuery(geoQuery)
{}

SearchInfo::SearchInfo(const RequestContext& request)
: pattern(request.get_optional_param<std::string>("pattern", "")),
geoQuery(),
bookName(request.get_optional_param<std::string>("content", ""))
{
/* Retrive geo search */
try {
auto latitude = request.get_argument<float>("latitude");
auto longitude = request.get_argument<float>("longitude");
auto distance = request.get_argument<float>("distance");
geoQuery = GeoQuery(latitude, longitude, distance);
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}

if (!geoQuery && pattern.empty()) {
throw std::invalid_argument("No query provided.");
}
}

zim::Query SearchInfo::getZimQuery(bool verbose) const {
zim::Query query;
if (verbose) {
std::cout << "Performing query '" << pattern<< "'";
}
query.setQuery(pattern);
if (geoQuery) {
if (verbose) {
std::cout << " with geo query '" << geoQuery.distance << "&(" << geoQuery.latitude << ";" << geoQuery.longitude << ")'";
}
query.setGeorange(geoQuery.latitude, geoQuery.longitude, geoQuery.distance);
}
if (verbose) {
std::cout << std::endl;
}
return query;
}


static IdNameMapper defaultNameMapper;

static MHD_Result staticHandlerCallback(void* cls,
Expand Down Expand Up @@ -499,111 +547,86 @@ std::unique_ptr<Response> InternalServer::handle_search(const RequestContext& re
printf("** running handle_search\n");
}

std::string patternString;
try {
patternString = request.get_argument("pattern");
} catch (const std::out_of_range&) {}

/* Retrive geo search */
bool has_geo_query = false;
float latitude = 0;
float longitude = 0;
float distance = 0;
try {
latitude = request.get_argument<float>("latitude");
longitude = request.get_argument<float>("longitude");
distance = request.get_argument<float>("distance");
has_geo_query = true;
} catch(const std::out_of_range&) {}
catch(const std::invalid_argument&) {}

std::string bookName, bookId;
std::shared_ptr<zim::Archive> archive;
try {
bookName = request.get_argument("content");
bookId = mp_nameMapper->getIdForName(bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {}

/* Make the search */
if ( (!archive && !bookName.empty())
|| (patternString.empty() && ! has_geo_query) ) {
auto data = get_default_data();
data.set("pattern", encodeDiples(patternString));
data.set("root", m_root);
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
response->set_code(MHD_HTTP_NOT_FOUND);
return withTaskbarInfo(bookName, archive.get(), std::move(response));
}

std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
if (currentArchive) {
if (! searcher) {
searcher = std::make_shared<zim::Searcher>(*currentArchive);
} else {
searcher->addArchive(*currentArchive);
}
auto searchInfo = SearchInfo(request);

std::string bookId;
std::shared_ptr<zim::Archive> archive;
if (!searchInfo.bookName.empty()) {
try {
bookId = mp_nameMapper->getIdForName(searchInfo.bookName);
archive = mp_library->getArchiveById(bookId);
} catch (const std::out_of_range&) {
throw std::invalid_argument("The requested book doesn't exist.");
}
}
}

auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}

auto pageLength = 25;
try {
pageLength = request.get_argument<unsigned int>("pageLength");
} catch (const std::exception&) {}
if (pageLength > MAX_SEARCH_LEN) {
pageLength = MAX_SEARCH_LEN;
}
if (pageLength == 0) {
pageLength = 25;
}
/* Make the search */
// Try to get a search from the searchInfo, else build it
std::shared_ptr<zim::Search> search;
try {
search = searchCache.getOrPut(searchInfo,
[=](){
std::shared_ptr<zim::Searcher> searcher;
if (archive) {
searcher = searcherCache.getOrPut(bookId, [=](){ return std::make_shared<zim::Searcher>(*archive);});
} else {
for (auto& bookId: mp_library->filter(kiwix::Filter().local(true).valid(true))) {
auto currentArchive = mp_library->getArchiveById(bookId);
if (currentArchive) {
if (! searcher) {
searcher = std::make_shared<zim::Searcher>(*currentArchive);
} else {
searcher->addArchive(*currentArchive);
}
}
}
}
return make_shared<zim::Search>(searcher->search(searchInfo.getZimQuery(m_verbose.load())));
}
);
} catch(std::runtime_error& e) {
// Searcher->search will throw a runtime error if there is no valid xapian database to do the search.
// (in case of zim file not containing a index)
auto data = get_default_data();
data.set("pattern", encodeDiples(searchInfo.pattern));
auto response = ContentResponse::build(*this, RESOURCE::templates::no_search_result_html, data, "text/html; charset=utf-8");
response->set_code(MHD_HTTP_NOT_FOUND);
return withTaskbarInfo(searchInfo.bookName, archive.get(), std::move(response));
}

/* Get the results */
std::string queryString;
try {
zim::Query query;
if (patternString.empty()) {
// Execute geo-search
if (m_verbose.load()) {
cout << "Performing geo query `" << distance << "&(" << latitude << ";" << longitude << ")'" << endl;
}

query.setQuery("");
queryString = "GEO:" + to_string(latitude) + to_string(longitude) + to_string(distance);
query.setGeorange(latitude, longitude, distance);
} else {
// Execute Ft search
if (m_verbose.load()) {
cout << "Performing query `" << patternString << "'" << endl;
}
auto start = 0;
try {
start = request.get_argument<unsigned int>("start");
} catch (const std::exception&) {}

queryString = "FT:" + removeAccents(patternString);
query.setQuery(queryString);
auto pageLength = 25;
try {
pageLength = request.get_argument<unsigned int>("pageLength");
} catch (const std::exception&) {}
if (pageLength > MAX_SEARCH_LEN) {
pageLength = MAX_SEARCH_LEN;
}
if (pageLength == 0) {
pageLength = 25;
}
queryString = bookId + queryString;

std::shared_ptr<zim::Search> search;
search = searchCache.getOrPut(queryString, [=](){ return make_shared<zim::Search>(searcher->search(query));});

/* Get the results */
SearchRenderer renderer(search->getResults(start, pageLength), mp_nameMapper, mp_library, start,
search->getEstimatedMatches());
renderer.setSearchPattern(patternString);
renderer.setSearchContent(bookName);
renderer.setSearchPattern(searchInfo.pattern);
renderer.setSearchContent(searchInfo.bookName);
renderer.setProtocolPrefix(m_root + "/");
renderer.setSearchProtocolPrefix(m_root + "/search?");
renderer.setPageLength(pageLength);
auto response = ContentResponse::build(*this, renderer.getHtml(), "text/html; charset=utf-8");
return withTaskbarInfo(bookName, archive.get(), std::move(response));
return withTaskbarInfo(searchInfo.bookName, archive.get(), std::move(response));
} catch (const std::invalid_argument& e) {
return HTTP400HtmlResponse(*this, request)
+ invalidUrlMsg
+ std::string(e.what());
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
return Response::build_500(*this, e.what());
Expand Down
48 changes: 46 additions & 2 deletions src/server/internalServer.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,53 @@ extern "C" {

namespace kiwix {

struct GeoQuery {
GeoQuery()
: GeoQuery(0, 0, -1)
{}

GeoQuery(float latitude, float longitude, float distance)
: latitude(latitude), longitude(longitude), distance(distance)
{}
float latitude;
float longitude;
float distance;

explicit operator bool() const {
return distance >= 0;
}

friend bool operator<(const GeoQuery& l, const GeoQuery& r)
{
return std::tie(l.latitude, l.longitude, l.distance)
< std::tie(r.latitude, r.longitude, r.distance); // keep the same order
}
};

class SearchInfo {
public:
SearchInfo(const std::string& pattern);
SearchInfo(const std::string& pattern, GeoQuery geoQuery);
SearchInfo(const RequestContext& request);

zim::Query getZimQuery(bool verbose) const;

friend bool operator<(const SearchInfo& l, const SearchInfo& r)
{
return std::tie(l.bookName, l.pattern, l.geoQuery)
< std::tie(r.bookName, r.pattern, r.geoQuery); // keep the same order
}

public: //data
std::string pattern;
GeoQuery geoQuery;
std::string bookName;
};


typedef kainjow::mustache::data MustacheData;
typedef ConcurrentCache<string, std::shared_ptr<zim::Searcher>> SearcherCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<SearchInfo, std::shared_ptr<zim::Search>> SearchCache;
typedef ConcurrentCache<string, std::shared_ptr<zim::SuggestionSearcher>> SuggestionSearcherCache;

class Entry;
Expand Down Expand Up @@ -77,7 +121,7 @@ class InternalServer {
bool start();
void stop();
std::string getAddress() { return m_addr; }
int getPort() { return m_port; }
int getPort() { return m_port; }

private: // functions
std::unique_ptr<Response> handle_request(const RequestContext& request);
Expand Down
31 changes: 31 additions & 0 deletions src/server/response.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ std::unique_ptr<ContentResponse> Response::build_404(const InternalServer& serve
}

extern const UrlNotFoundMsg urlNotFoundMsg;
extern const InvalidUrlMsg invalidUrlMsg;

std::unique_ptr<ContentResponse> ContentResponseBlueprint::generateResponseObject() const
{
Expand Down Expand Up @@ -145,6 +146,36 @@ HTTP404HtmlResponse& HTTP404HtmlResponse::operator+(const std::string& msg)
return *this;
}

HTTP400HtmlResponse::HTTP400HtmlResponse(const InternalServer& server,
const RequestContext& request)
: ContentResponseBlueprint(&server,
&request,
MHD_HTTP_BAD_REQUEST,
"text/html",
RESOURCE::templates::_400_html)
{
kainjow::mustache::list emptyList;
this->m_data = kainjow::mustache::object{{"details", emptyList}};
}

HTTP400HtmlResponse& HTTP400HtmlResponse::operator+(InvalidUrlMsg /*unused*/)
{
std::string requestUrl = m_request.get_full_url();
const auto query = m_request.get_query();
if (!query.empty()) {
requestUrl += "?" + encodeDiples(query);
}
kainjow::mustache::mustache msgTmpl(R"(The requested URL "{{{url}}}" is not a valid request.)");
return *this + msgTmpl.render({"url", requestUrl});
}

HTTP400HtmlResponse& HTTP400HtmlResponse::operator+(const std::string& msg)
{
m_data["details"].push_back({"p", msg});
return *this;
}


ContentResponseBlueprint& ContentResponseBlueprint::operator+(const TaskbarInfo& taskbarInfo)
{
this->m_taskbarInfo.reset(new TaskbarInfo(taskbarInfo));
Expand Down
14 changes: 14 additions & 0 deletions src/server/response.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,20 @@ struct HTTP404HtmlResponse : ContentResponseBlueprint
HTTP404HtmlResponse& operator+(const std::string& errorDetails);
};

class InvalidUrlMsg {};

extern const InvalidUrlMsg invalidUrlMsg;

struct HTTP400HtmlResponse : ContentResponseBlueprint
{
HTTP400HtmlResponse(const InternalServer& server,
const RequestContext& request);

using ContentResponseBlueprint::operator+;
HTTP400HtmlResponse& operator+(InvalidUrlMsg /*unused*/);
HTTP400HtmlResponse& operator+(const std::string& errorDetails);
};

class ItemResponse : public Response {
public:
ItemResponse(bool verbose, const zim::Item& item, const std::string& mimetype, const ByteRange& byterange);
Expand Down
1 change: 1 addition & 0 deletions static/resources_list.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ skin/block_external.js
skin/search_results.css
templates/search_result.html
templates/no_search_result.html
templates/400.html
templates/404.html
templates/500.html
templates/index.html
Expand Down
15 changes: 15 additions & 0 deletions static/templates/400.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta content="text/html;charset=UTF-8" http-equiv="content-type" />
<title>Invalid request</title>
</head>
<body>
<h1>Invalid request</h1>
{{#details}}
<p>
{{{p}}}
</p>
{{/details}}
</body>
</html>
4 changes: 2 additions & 2 deletions static/templates/search_result.html
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@
</b> of <b>
{{count}}
</b> for <b>
{{searchPattern}}
"{{{searchPattern}}}"
</b>
{{/hasResults}}
{{^hasResults}}
No results were found for <b>{{searchPattern}}</b>
No results were found for <b>"{{{searchPattern}}}"</b>
{{/hasResults}}
</div>

Expand Down
Loading

0 comments on commit 95d4dd6

Please sign in to comment.