diff --git a/include/tools.h b/include/tools.h index 6891a0852..6578fa5e5 100644 --- a/include/tools.h +++ b/include/tools.h @@ -243,5 +243,13 @@ FeedLanguages readLanguagesFromFeed(const std::string& content); * @return vector containing category strings. */ FeedCategories readCategoriesFromFeed(const std::string& content); + +/** + * Retrieve the full language name associated with a given ISO 639-3 language code. + * + * @param lang ISO 639-3 language code. + * @return full language name. + */ +std::string getLanguageSelfName(const std::string& lang); } #endif // KIWIX_TOOLS_H diff --git a/src/library_dumper.cpp b/src/library_dumper.cpp index 4fa4c6635..74cb157a7 100644 --- a/src/library_dumper.cpp +++ b/src/library_dumper.cpp @@ -23,65 +23,6 @@ void LibraryDumper::setOpenSearchInfo(int totalResults, int startIndex, int coun m_count = count; } -namespace { - -std::map iso639_3 = { - {"atj", "atikamekw"}, - {"azb", "آذربایجان دیلی"}, - {"bcl", "central bikol"}, - {"bgs", "tagabawa"}, - {"bxr", "буряад хэлэн"}, - {"cbk", "chavacano"}, - {"cdo", "閩東語"}, - {"dag", "Dagbani"}, - {"diq", "dimli"}, - {"dty", "डोटेली"}, - {"eml", "emiliân-rumagnōl"}, - {"fbs", "српскохрватски"}, - {"guw", "Gungbe"}, - {"hbs", "srpskohrvatski"}, - {"ido", "ido"}, - {"kbp", "kabɩyɛ"}, - {"kld", "Gamilaraay"}, - {"lbe", "лакку маз"}, - {"lbj", "ལ་དྭགས་སྐད་"}, - {"map", "Austronesian"}, - {"mhr", "марий йылме"}, - {"mnw", "ဘာသာမန်"}, - {"myn", "mayan"}, - {"nah", "nahuatl"}, - {"nai", "north American Indian"}, - {"nds", "plattdütsch"}, - {"nrm", "bhasa narom"}, - {"olo", "livvi"}, - {"pih", "Pitcairn-Norfolk"}, - {"pnb", "Western Panjabi"}, - {"rmr", "Caló"}, - {"rmy", "romani shib"}, - {"roa", "romance languages"}, - {"twi", "twi"}, -}; - -std::once_flag fillLanguagesFlag; - -void fillLanguagesMap() -{ - for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) { - const ICULanguageInfo lang(*icuLangPtr); - iso639_3.insert({lang.iso3Code(), lang.selfName()}); - } -} - -std::string getLanguageSelfName(const std::string& lang) { - const auto itr = iso639_3.find(lang); - if (itr != iso639_3.end()) { - return itr->second; - } - return lang; -}; - -} // unnamed namespace - kainjow::mustache::list LibraryDumper::getCategoryData() const { const auto now = gen_date_str(); @@ -102,7 +43,6 @@ kainjow::mustache::list LibraryDumper::getLanguageData() const { const auto now = gen_date_str(); kainjow::mustache::list languageData; - std::call_once(fillLanguagesFlag, fillLanguagesMap); for ( const auto& langAndBookCount : library->getBooksLanguagesWithCounts() ) { const std::string languageCode = langAndBookCount.first; const int bookCount = langAndBookCount.second; diff --git a/src/meson.build b/src/meson.build index a73fbc9e7..e68520e2f 100644 --- a/src/meson.build +++ b/src/meson.build @@ -18,6 +18,7 @@ kiwix_sources = [ 'tools/stringTools.cpp', 'tools/networkTools.cpp', 'tools/opdsParsingTools.cpp', + 'tools/languageTools.cpp', 'tools/otherTools.cpp', 'tools/archiveTools.cpp', 'kiwixserve.cpp', diff --git a/src/tools/languageTools.cpp b/src/tools/languageTools.cpp new file mode 100644 index 000000000..f4583c2e2 --- /dev/null +++ b/src/tools/languageTools.cpp @@ -0,0 +1,73 @@ +#include "tools.h" +#include "stringTools.h" +#include + +namespace kiwix +{ + +namespace +{ + +// These mappings are not provided by the ICU library, any such mappings can be manually added here +std::map iso639_3 = { + {"atj", "atikamekw"}, + {"azb", "آذربایجان دیلی"}, + {"bcl", "central bikol"}, + {"bgs", "tagabawa"}, + {"bxr", "буряад хэлэн"}, + {"cbk", "chavacano"}, + {"cdo", "閩東語"}, + {"dag", "Dagbani"}, + {"diq", "dimli"}, + {"dty", "डोटेली"}, + {"eml", "emiliân-rumagnōl"}, + {"fbs", "српскохрватски"}, + {"guw", "Gungbe"}, + {"hbs", "srpskohrvatski"}, + {"ido", "ido"}, + {"kbp", "kabɩyɛ"}, + {"kld", "Gamilaraay"}, + {"lbe", "лакку маз"}, + {"lbj", "ལ་དྭགས་སྐད་"}, + {"map", "Austronesian"}, + {"mhr", "марий йылме"}, + {"mnw", "ဘာသာမန်"}, + {"myn", "mayan"}, + {"nah", "nahuatl"}, + {"nai", "north American Indian"}, + {"nds", "plattdütsch"}, + {"nrm", "bhasa narom"}, + {"olo", "livvi"}, + {"pih", "Pitcairn-Norfolk"}, + {"pnb", "Western Panjabi"}, + {"rmr", "Caló"}, + {"rmy", "romani shib"}, + {"roa", "romance languages"}, + {"twi", "twi"}, +}; + +std::once_flag fillLanguagesFlag; + +void fillLanguagesMap() +{ + for (auto icuLangPtr = icu::Locale::getISOLanguages(); *icuLangPtr != NULL; ++icuLangPtr) { + const kiwix::ICULanguageInfo lang(*icuLangPtr); + iso639_3.insert({lang.iso3Code(), lang.selfName()}); + } +} + +} // unnamed namespace + +std::string getLanguageSelfName(const std::string& lang) +{ + std::call_once(fillLanguagesFlag, fillLanguagesMap); + if (lang == "") + return "und"; + const auto itr = iso639_3.find(lang); + if (itr != iso639_3.end()) { + return itr->second; + } + return lang; +}; + +} // namespace kiwix \ No newline at end of file diff --git a/test/languageTools.cpp b/test/languageTools.cpp new file mode 100644 index 000000000..a57467944 --- /dev/null +++ b/test/languageTools.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2023 Nikhil Tanwar (2002nikhiltanwar@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied + * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and + * NON-INFRINGEMENT. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "gtest/gtest.h" +#include "../include/tools.h" + +namespace +{ + +TEST(LanguageToolsTest, englishTest) +{ + EXPECT_EQ(kiwix::getLanguageSelfName("eng"), "English"); +} + +TEST(LanguageToolsTest, manualValuesTest) +{ + EXPECT_EQ(kiwix::getLanguageSelfName("dty"), "डोटेली"); +} + +TEST(LanguageToolsTest, emptyStringTest) +{ + EXPECT_EQ(kiwix::getLanguageSelfName(""), "und"); +} + +} diff --git a/test/meson.build b/test/meson.build index 6e89d2ad3..72d7b7331 100644 --- a/test/meson.build +++ b/test/meson.build @@ -6,6 +6,7 @@ tests = [ 'pathTools', 'otherTools', 'opdsParsingTools', + 'languageTools', 'kiwixserve', 'book', 'manager',