Skip to content

Commit

Permalink
Move getLanguageSelfName to tools.h
Browse files Browse the repository at this point in the history
This is a general utility which other ports can get use of.
Added tests
  • Loading branch information
juuz0 committed Jul 21, 2023
1 parent 2fb51b7 commit bd1348f
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 60 deletions.
8 changes: 8 additions & 0 deletions include/tools.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,5 +234,13 @@ FeedLanguages readLanguagesFromFeed(const std::string& content);
* @return vector containing category strings.
*/
FeedCategories readCategoriesFromFeed(const std::string& content);

/**
* Retrieve the full language name associated with a given ISO 639-3 language code.
*
* @param lang ISO 639-3 language code.
* @return full language name.
*/
std::string getLanguageSelfName(const std::string& lang);
}
#endif // KIWIX_TOOLS_H
60 changes: 0 additions & 60 deletions src/library_dumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,65 +23,6 @@ void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int coun
m_count = count;
}

namespace {

std::map<std::string, std::string> iso639_3 = {
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"guw", "Gungbe"},
{"hbs", "srpskohrvatski"},
{"ido", "ido"},
{"kbp", "kabɩyɛ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"twi", "twi"},
};

std::once_flag fillLanguagesFlag;

void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}

std::string getLanguageSelfName(const std::string& lang) {
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};

} // unnamed namespace

kainjow::mustache::list LibraryDumper::getCategoryData() const
{
const auto now = gen_date_str();
Expand All @@ -102,7 +43,6 @@ kainjow::mustache::list LibraryDumper::getLanguageData() const
{
const auto now = gen_date_str();
kainjow::mustache::list languageData;
std::call_once(fillLanguagesFlag, fillLanguagesMap);
for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) {
const std::string languageCode = langAndBookCount.first;
const int bookCount = langAndBookCount.second;
Expand Down
1 change: 1 addition & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ kiwix_sources = [
'tools/stringTools.cpp',
'tools/networkTools.cpp',
'tools/opdsParsingTools.cpp',
'tools/languageTools.cpp',
'tools/otherTools.cpp',
'tools/archiveTools.cpp',
'kiwixserve.cpp',
Expand Down
71 changes: 71 additions & 0 deletions src/tools/languageTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#include "tools.h"
#include "stringTools.h"
#include <mutex>

namespace kiwix
{

namespace
{

// These mappings are not provided by the ICU library, any such mappings can be manually added here
std::map<std::string, std::string> iso639_3 = {
{"atj", "atikamekw"},
{"azb", "آذربایجان دیلی"},
{"bcl", "central bikol"},
{"bgs", "tagabawa"},
{"bxr", "буряад хэлэн"},
{"cbk", "chavacano"},
{"cdo", "閩東語"},
{"dag", "Dagbani"},
{"diq", "dimli"},
{"dty", "डोटेली"},
{"eml", "emiliân-rumagnōl"},
{"fbs", "српскохрватски"},
{"guw", "Gungbe"},
{"hbs", "srpskohrvatski"},
{"ido", "ido"},
{"kbp", "kabɩyɛ"},
{"kld", "Gamilaraay"},
{"lbe", "лакку маз"},
{"lbj", "ལ་དྭགས་སྐད་"},
{"map", "Austronesian"},
{"mhr", "марий йылме"},
{"mnw", "ဘာသာမန်"},
{"myn", "mayan"},
{"nah", "nahuatl"},
{"nai", "north American Indian"},
{"nds", "plattdütsch"},
{"nrm", "bhasa narom"},
{"olo", "livvi"},
{"pih", "Pitcairn-Norfolk"},
{"pnb", "Western Panjabi"},
{"rmr", "Caló"},
{"rmy", "romani shib"},
{"roa", "romance languages"},
{"twi", "twi"},
};

std::once_flag fillLanguagesFlag;

void fillLanguagesMap()
{
for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) {
const kiwix::ICULanguageInfo lang(*icuLangPtr);
iso639_3.insert({lang.iso3Code(), lang.selfName()});
}
}

} // unnamed namespace

std::string getLanguageSelfName(const std::string& lang)
{
std::call_once(fillLanguagesFlag, fillLanguagesMap);
const auto itr = iso639_3.find(lang);
if (itr != iso639_3.end()) {
return itr->second;
}
return lang;
};

} // namespace kiwix
41 changes: 41 additions & 0 deletions test/languageTools.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright (C) 2023 Nikhil Tanwar (2002nikhiltanwar@gmail.com)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* is provided AS IS, WITHOUT ANY WARRANTY; without even the implied
* warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and
* NON-INFRINGEMENT. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/

#include "gtest/gtest.h"
#include "../include/tools.h"

namespace
{

TEST(LanguageToolsTest, englishTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName("eng"), "English");
}

TEST(LanguageToolsTest, manualValuesTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName("dty"), "डोटेली");
}

TEST(LanguageToolsTest, emptyStringTest)
{
EXPECT_EQ(kiwix::getLanguageSelfName(""), "und");
}

}
1 change: 1 addition & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ tests = [
'pathTools',
'otherTools',
'opdsParsingTools',
'languageTools',
'kiwixserve',
'book',
'manager',
Expand Down

0 comments on commit bd1348f

Please sign in to comment.