Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Commit

Permalink
feat: only enable languages also enabled on Pontoon (#558)
Browse files Browse the repository at this point in the history
* chore: only enable languages that are also enabled on Pontoon (fixes #534)
* chore: sort language dropdown by localized language name
* chore: always use same Language object structure
* chore: use language code as fallback for title if needed
* chore: add cache to language fetch
* chore: use language code as fallback for stats and profile as well
* chore: only return valid languages for user profile
  • Loading branch information
MichaelKohler authored Nov 20, 2021
1 parent 358d530 commit 0da87d0
Show file tree
Hide file tree
Showing 34 changed files with 380 additions and 600 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ The sentence collector is now accessible through `http://localhost:3333`. We're

We are using the same languages as Common Voice is. Therefore we rely on the information Common Voice stores in its repository. The `locales` JSON files in this repo only contain example data. These files get updated when deploying Sentence Collector.

If you want to work locally with the same info as in production/staging, you can run `npm run pull-cv-locales-info` in the root folder and it will download the required files. If you additionally want to have all the translations available locally, you can run `npm run pull-cv-translations` in the root folder.
If you want to work locally with the same info as in production/staging, you can run `npm run pull-cv-locales-info` in the root folder and it will download the required files. If you additionally want to have all the translations available locally, you can run `npm run pull-cv-translations` in the root folder. **Note that even after pulling this information, not all strings will be translated.** We are not fetching the complete English translation file, therefore the language selection on development will be broken. This is not perfect, but expected for now, behavior. This is a drawback of the current infrastructure, as we do not want the complete English translation file from Common Voice, as we keep the Sentence Collector EN FTL strings under version control. We do not want to risk to push the CV strings twice, therefore we do not touch the English file. If you want to test it, use a different UI language after running `npm run pull-cv-translations`.

## Getting required parameters from Auth0

Expand Down
345 changes: 23 additions & 322 deletions server/lib/languages.js
Original file line number Diff line number Diff line change
@@ -1,341 +1,42 @@
const ISO6391 = require('iso-639-1');
const debug = require('debug')('sentencecollector:languages');
const fetch = require('node-fetch');

const nativeNames = require('../../locales/native-names.json');
const { Sentence } = require('./models');

const FALLBACK_LOCALE = 'en';
const MAX_CACHE_AGE = 30 * 60 * 1000;
const cache = {};

module.exports = {
FALLBACK_LOCALE,
getAllLanguages,
getMissingLanguages,
getLanguagesNotInPontoon,
};

const ADDITIONAL_LANGUAGES = [
{
id: 'ace',
name: 'Achinese',
nativeName: 'بهسا اچيه',
},
{
id: 'ady',
name: 'Adyghe',
nativeName: 'Адыгабзэ',
},
{
id: 'arn',
name: 'Mapudungun',
nativeName: 'Mapudungun',
},
{
id: 'ast',
name: 'Asturian',
nativeName: 'asturianu',
},
{
id: 'bas',
name: 'Basaa',
nativeName: 'Ɓàsàa',
},
{
id: 'bxr',
name: 'Russia Buriat',
nativeName: 'буряад хэлэн',
},
{
id: 'cak',
name: 'Kaqchikel',
nativeName: 'Kaqchikel',
},
{
id: 'ckb',
name: 'Central Kurdish',
nativeName: 'کوردی',
},
{
id: 'cnh',
name: 'Hakha Chin',
nativeName: 'Lai',
},
{
id: 'dsb',
name: 'Lower Sorbian',
nativeName: 'dolnoserbšćina',
},
{
id: 'ff',
name: 'Fulah',
nativeName: 'Pulaar-Fulfulde',
},
{
id: 'fy-NL',
name: 'Frisian',
nativeName: 'Frysk',
},
{
id: 'ga-IE',
name: 'Irish',
nativeName: 'Irish',
},
{
id: 'hsb',
name: 'Upper Sorbian',
nativeName: 'Hornjoserbšćina',
},
{
id: 'izh',
name: 'Izhorian',
nativeName: 'Izhorian',
},
{
id: 'kaa',
name: 'Karakalpak',
nativeName: 'Qaraqalpaq tili',
},
{
id: 'kab',
name: 'Kabyle',
nativeName: 'Taqbaylit',
},
{
id: 'kbd',
name: 'Kabardian',
nativeName: 'Адыгэбзэ',
},
{
id: 'kmr',
name: 'Northern Kurdish',
nativeName: 'Kurdî (Kurmancî)',
},
{
id: 'kpv',
name: 'Komi-Zyrian',
nativeName: 'Коми кыв',
},
{
id: 'ks',
name: 'Kashmiri',
nativeName: 'كٲشُر',
},
{
id: 'lij',
name: 'Ligurian',
nativeName: 'Lìgure',
},
{ // https://github.com/common-voice/common-voice/issues/3043
id: 'mai',
name: 'Maithili',
nativeName: 'मैथिली',
},
{
id: 'mdf',
name: 'Moksha',
nativeName: 'мокшень кяль',
},
{
id: 'mhr',
name: 'Eastern Mari',
nativeName: 'Eastern Mari',
},
{ // https://github.com/common-voice/common-voice/issues/3336
id: 'mni',
name: 'Meetei Lon',
nativeName: 'ꯃꯤꯇꯩꯂꯣꯟ',
},
{ // https://github.com/common-voice/common-voice/issues/2945
id: 'mos',
name: 'Mossi',
nativeName: 'Mooré',
},
{
id: 'mrj',
name: 'Western Mari',
nativeName: 'Western Mari',
},
{
id: 'myv',
name: 'Erzya',
nativeName: 'эрзянь кель',
},
{ // https://github.com/common-voice/common-voice/issues/3194
id: 'nan-tw',
name: 'Taiwanese',
nativeName: '臺語',
},
{
id: 'ne-NP',
name: 'Nepali',
nativeName: 'नेपाली',
},
{ // https://github.com/common-voice/common-voice/issues/3160
id: 'nia',
name: 'Nias',
nativeName: 'Li Niha',
},
{
id: 'nb-NO',
name: 'Norwegian',
nativeName: 'Norsk bokmål',
},
{
id: 'nn-NO',
name: 'Norwegian',
nativeName: 'Norsk nynorsk',
},
{
id: 'nyn',
name: 'Runyankore',
nativeName: 'Runyankore',
},
{
id: 'pa-IN',
name: 'Panjabi',
nativeName: 'ਪੰਜਾਬੀ',
},
{ // https://github.com/common-voice/common-voice/issues/3044
id: 'pap-AW',
name: 'Papiamento - Aruba',
nativeName: 'Papiamento',
},
{
id: 'quc',
name: 'Kʼicheʼ',
nativeName: 'Kʼicheʼ',
},
{
id: 'rm-sursilv',
name: 'Romansh Sursilvan',
nativeName: 'romontsch sursilvan',
},
{
id: 'rm-vallader',
name: 'Romansh Vallader',
nativeName: 'rumantsch vallader',
},
{
id: 'sah',
name: 'Sakha',
nativeName: 'Саха тыла',
},
{ // https://github.com/common-voice/common-voice/issues/3214
id: 'sat',
name: 'Santali',
nativeName: 'ᱥᱟᱱᱛᱟᱲᱤ',
},
{
id: 'scn',
name: 'Sicilian',
nativeName: 'sicilianu',
},
{ // https://github.com/common-voice/common-voice/issues/3032
id: 'shi',
name: 'Shilha',
nativeName: 'Taclḥit',
},
{
id: 'sv-SE',
name: 'Swedish',
nativeName: 'Svenska',
},
{
id: 'syr',
name: 'Syriac',
nativeName: 'ܣܘܼܪܝܝܐ',
},
{
id: 'tig',
name: 'Tigre',
nativeName: 'ትግረ',
},
{
id: 'uby',
name: 'Ubykh',
nativeName: 'Ubykh',
},
{
id: 'udm',
name: 'Udmurt',
nativeName: 'удмурт кыл',
},
{
id: 'vec',
name: 'Venetian',
nativeName: 'vèneto',
},
{
id: 'vot',
name: 'Votic',
nativeName: 'maaceeli',
},
{
id: 'zh-CN',
name: 'Chinese - China',
nativeName: '中文 (中国)',
},
{
id: 'zh-TW',
name: 'Chinese - Taiwan',
nativeName: '中文 (台灣)',
},
{
id: 'zh-HK',
name: 'Chinese - Hong Kong',
nativeName: '中文 (香港)',
},
{
id: 'yue',
name: 'Cantonese',
nativeName: '粵語',
},
];
async function getAllLanguages() {
if (typeof cache.lastCacheUpdate !== 'undefined' && Date.now() - cache.lastCacheUpdate <= MAX_CACHE_AGE) {
debug('RETURN_CACHED_LANGUAGES_LIST');
return cache.languages;
}

const LANGUAGES_TO_REMOVE = [
'ku',
'zh',
'nb', // covered by nb-NO
'nn', // covered by nn-NO
'no', // covered by nb-NO and nn-NO
'ks', // re-added above as native name is wrong
'ff', // re-added - https://discourse.mozilla.org/t/fulah-language-naming-consistency/78378
'ga', // covered by ga-IE
'sv', // covered by sv-SE
'ne', // covered by ne-NP
'pa', // covered by pa-IN
];

const isoLanguages = ISO6391.getLanguages(ISO6391.getAllCodes());
const languagesWithoutRemoved = removeLanguages(isoLanguages);
const codeIdReplaced = languagesWithoutRemoved.map((lang) => ({
id: lang.code,
name: lang.name,
nativeName: lang.nativeName,
}));
const allLanguages = addAdditionalLanguages(codeIdReplaced);
const allLanguagesSorted = allLanguages.sort((a, b) => {
if (a.name < b.name) return -1;
if (a.name > b.name) return 1;
return 0;
});
const loadedLanguages = allLanguagesSorted;

function getAllLanguages() {
return loadedLanguages;
debug('FETCHING_NEW_LANGUAGES_LIST');
const fetchedLanguages = await fetchAllLanguages();
cache.languages = fetchedLanguages;
cache.lastCacheUpdate = Date.now();
return fetchedLanguages;
}

function removeLanguages(languages) {
return languages.filter((language) => !LANGUAGES_TO_REMOVE.includes(language.code));
}

function addAdditionalLanguages(languages) {
return languages.concat(ADDITIONAL_LANGUAGES);
}

async function getMissingLanguages() {
async function fetchAllLanguages() {
const pontoonLanguages = await fetchPontoonLanguages();
const scLanguages = loadedLanguages.map(({ id }) => id);
const missingLanguages = pontoonLanguages.filter((lang) => !scLanguages.includes(lang));
return missingLanguages;
const allLanguages = pontoonLanguages.map((languageCode) => {
return {
id: languageCode,
nativeName: nativeNames[languageCode],
};
});

return allLanguages;
}

async function getLanguagesNotInPontoon() {
Expand Down
Loading

0 comments on commit 0da87d0

Please sign in to comment.