Skip to content

Commit

Permalink
Merge pull request #967 from kiwix/opdsFilters
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr authored Jul 26, 2023
2 parents 903dcd4 + 385931f commit 9c91fc7
Show file tree
Hide file tree
Showing 9 changed files with 348 additions and 60 deletions.
2 changes: 2 additions & 0 deletions include/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ headers = [
'manager.h',
'libxml_dumper.h',
'opds_dumper.h',
'library_dumper.h',
'html_dumper.h',
'downloader.h',
'search_renderer.h',
'server.h',
Expand Down
26 changes: 26 additions & 0 deletions include/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@
#include <cstdint>

namespace kiwix {
typedef std::pair<std::string, std::string> LangNameCodePair;
typedef std::vector<LangNameCodePair> FeedLanguages;
typedef std::vector<std::string> FeedCategories;

/**
* Return the current directory.
Expand Down Expand Up @@ -226,5 +229,28 @@ std::string getBestPublicIp();
*/
std::string beautifyFileSize(uint64_t number);

/**
* Load languages stored in an OPDS stream.
*
* @param content the OPDS stream.
* @return vector containing pairs of language code and their corresponding full language name.
*/
FeedLanguages readLanguagesFromFeed(const std::string& content);

/**
* Load categories stored in an OPDS stream .
*
* @param content the OPDS stream.
* @return vector containing category strings.
*/
FeedCategories readCategoriesFromFeed(const std::string& content);

/**
* Retrieve the full language name associated with a given ISO 639-3 language code.
*
* @param lang ISO 639-3 language code.
* @return full language name.
*/
std::string getLanguageSelfName(const std::string& lang);
}
#endif // KIWIX_TOOLS_H
60 changes: 0 additions & 60 deletions src/library_dumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,65 +23,6 @@ void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int coun
m_count = count;
}

namespace {

std::map<std::string, std::string> iso639_3 = {
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"guw", "Gungbe"},
{"hbs", "srpskohrvatski"},
{"ido", "ido"},
{"kbp", "kabɩyɛ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"twi", "twi"},
};

std::once_flag fillLanguagesFlag;

void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}

std::string getLanguageSelfName(const std::string& lang) {
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};

} // unnamed namespace

kainjow::mustache::list LibraryDumper::getCategoryData() const
{
const auto now = gen_date_str();
Expand All @@ -102,7 +43,6 @@ kainjow::mustache::list LibraryDumper::getLanguageData() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
std::call_once(fillLanguagesFlag, fillLanguagesMap);
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
Expand Down
2 changes: 2 additions & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ kiwix_sources = [
'tools/regexTools.cpp',
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/opdsParsingTools.cpp',
'tools/languageTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
Expand Down
74 changes: 74 additions & 0 deletions src/tools/languageTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#include "tools.h"
#include "stringTools.h"
#include <mutex>

namespace kiwix
{

namespace
{

// These mappings are not provided by the ICU library, any such mappings can be manually added here
std::map<std::string, std::string> iso639_3 = {
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"guw", "Gungbe"},
{"hbs", "srpskohrvatski"},
{"ido", "ido"},
{"kbp", "kabɩyɛ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"twi", "twi"},
// ICU for Ubuntu versions <= focal (20.04) returns "" for the language code ""
// unlike the later versions - which returns "und". We map this value to "Undetermined" for a common ground.
{"", "Undetermined"},
};

std::once_flag fillLanguagesFlag;

void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const kiwix::ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}

} // unnamed namespace

std::string getLanguageSelfName(const std::string& lang)
{
std::call_once(fillLanguagesFlag, fillLanguagesMap);
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};

} // namespace kiwix
70 changes: 70 additions & 0 deletions src/tools/opdsParsingTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#include "tools.h"
#include <pugixml.hpp>

namespace kiwix
{

namespace
{
#define VALUE(name) entryNode.child(name).child_value()
FeedLanguages parseLanguages(const pugi::xml_document& doc)
{
pugi::xml_node feedNode = doc.child("feed");
FeedLanguages langs;

for (pugi::xml_node entryNode = feedNode.child("entry"); entryNode;
entryNode = entryNode.next_sibling("entry")) {
auto title = VALUE("title");
auto code = VALUE("dc:language");
langs.push_back({code, title});
}

return langs;
}

FeedCategories parseCategories(const pugi::xml_document& doc)
{
pugi::xml_node feedNode = doc.child("feed");
FeedCategories categories;

for (pugi::xml_node entryNode = feedNode.child("entry"); entryNode;
entryNode = entryNode.next_sibling("entry")) {
auto title = VALUE("title");
categories.push_back(title);
}

return categories;
}

} // unnamed namespace

FeedLanguages readLanguagesFromFeed(const std::string& content)
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)content.data(), content.size());

if (result) {
auto langs = parseLanguages(doc);
return langs;
}

return FeedLanguages();
}

FeedCategories readCategoriesFromFeed(const std::string& content)
{
pugi::xml_document doc;
pugi::xml_parse_result result
= doc.load_buffer((void*)content.data(), content.size());

FeedCategories categories;
if (result) {
categories = parseCategories(doc);
return categories;
}

return categories;
}

} // namespace kiwix
41 changes: 41 additions & 0 deletions test/languageTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2023 Nikhil Tanwar (2002nikhiltanwar@gmail.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "gtest/gtest.h"
#include "../include/tools.h"

namespace
{

TEST(LanguageToolsTest, englishTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName("eng"), "English");
}

TEST(LanguageToolsTest, manualValuesTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName("dty"), "डोटेली");
}

TEST(LanguageToolsTest, emptyStringTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName(""), "Undetermined");
}

}
2 changes: 2 additions & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ tests = [
'stringTools',
'pathTools',
'otherTools',
'opdsParsingTools',
'languageTools',
'kiwixserve',
'book',
'manager',
Expand Down
Loading

0 comments on commit 9c91fc7

Please sign in to comment.