Skip to content

Commit

Permalink
new validator: isLocale, add support for validation of more valid lan…
Browse files Browse the repository at this point in the history
…guage tags (#2189)

Co-authored-by: Wahome Macharia <wahome@Wahomes-MacBook-Pro.local>
  • Loading branch information
kwahome and Wahome Macharia authored Mar 27, 2023
1 parent 698f4e6 commit fc49ad7
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 5 deletions.
110 changes: 105 additions & 5 deletions src/lib/isLocale.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,111 @@
import assertString from './util/assertString';

const localeReg = /^[A-Za-z]{2,4}([_-]([A-Za-z]{4}|[\d]{3}))?([_-]([A-Za-z]{2}|[\d]{3}))?$/;
/*
= 3ALPHA ; selected ISO 639 codes
*2("-" 3ALPHA) ; permanently reserved
*/
const extlang = '([A-Za-z]{3}(-[A-Za-z]{3}){0,2})';

/*
= 2*3ALPHA ; shortest ISO 639 code
["-" extlang] ; sometimes followed by
; extended language subtags
/ 4ALPHA ; or reserved for future use
/ 5*8ALPHA ; or registered language subtag
*/
const language = `(([a-zA-Z]{2,3}(-${extlang})?)|([a-zA-Z]{5,8}))`;

/*
= 4ALPHA ; ISO 15924 code
*/
const script = '([A-Za-z]{4})';

/*
= 2ALPHA ; ISO 3166-1 code
/ 3DIGIT ; UN M.49 code
*/
const region = '([A-Za-z]{2}|\\d{3})';

/*
= 5*8alphanum ; registered variants
/ (DIGIT 3alphanum)
*/
const variant = '([A-Za-z0-9]{5,8}|(\\d[A-Z-a-z0-9]{3}))';

/*
= DIGIT ; 0 - 9
/ %x41-57 ; A - W
/ %x59-5A ; Y - Z
/ %x61-77 ; a - w
/ %x79-7A ; y - z
*/
const singleton = '(\\d|[A-W]|[Y-Z]|[a-w]|[y-z])';

/*
= singleton 1*("-" (2*8alphanum))
; Single alphanumerics
; "x" reserved for private use
*/
const extension = `(${singleton}(-[A-Za-z0-9]{2,8})+)`;

/*
= "x" 1*("-" (1*8alphanum))
*/
const privateuse = '(x(-[A-Za-z0-9]{1,8})+)';

// irregular tags do not match the 'langtag' production and would not
// otherwise be considered 'well-formed'. These tags are all valid, but
// most are deprecated in favor of more modern subtags or subtag combination

const irregular = '((en-GB-oed)|(i-ami)|(i-bnn)|(i-default)|(i-enochian)|' +
'(i-hak)|(i-klingon)|(i-lux)|(i-mingo)|(i-navajo)|(i-pwn)|(i-tao)|' +
'(i-tay)|(i-tsu)|(sgn-BE-FR)|(sgn-BE-NL)|(sgn-CH-DE))';

// regular tags match the 'langtag' production, but their subtags are not
// extended language or variant subtags: their meaning is defined by
// their registration and all of these are deprecated in favor of a more
// modern subtag or sequence of subtags

const regular = '((art-lojban)|(cel-gaulish)|(no-bok)|(no-nyn)|(zh-guoyu)|' +
'(zh-hakka)|(zh-min)|(zh-min-nan)|(zh-xiang))';

/*
= irregular ; non-redundant tags registered
/ regular ; during the RFC 3066 era
*/
const grandfathered = `(${irregular}|${regular})`;

/*
RFC 5646 defines delimitation of subtags via a hyphen:
"Subtag" refers to a specific section of a tag, delimited by a
hyphen, such as the subtags 'zh', 'Hant', and 'CN' in the tag "zh-
Hant-CN". Examples of subtags in this document are enclosed in
single quotes ('Hant')
However, we need to add "_" to maintain the existing behaviour.
*/
const delimiter = '(-|_)';

/*
= language
["-" script]
["-" region]
*("-" variant)
*("-" extension)
["-" privateuse]
*/
const langtag = `${language}(${delimiter}${script})?(${delimiter}${region})?(${delimiter}${variant})*(${delimiter}${extension})*(${delimiter}${privateuse})?`;

/*
Regex implementation based on BCP RFC 5646
Tags for Identifying Languages
https://www.rfc-editor.org/rfc/rfc5646.html
*/
const languageTagRegex = new RegExp(`(^${privateuse}$)|(^${grandfathered}$)|(^${langtag}$)`);

export default function isLocale(str) {
assertString(str);
if (str === 'en_US_POSIX' || str === 'ca_ES_VALENCIA') {
return true;
}
return localeReg.test(str);
return languageTagRegex.test(str);
}
35 changes: 35 additions & 0 deletions test/validators.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4816,16 +4816,51 @@ describe('Validators', () => {
'uz_Latn_UZ',
'en',
'gsw',
'en-US',
'es_ES',
'es-419',
'sw_KE',
'am_ET',
'zh-CHS',
'ca_ES_VALENCIA',
'en_US_POSIX',
'hak-CN',
'zh-Hant',
'zh-Hans',
'sr-Cyrl',
'sr-Latn',
'zh-cmn-Hans-CN',
'cmn-Hans-CN',
'zh-yue-HK',
'yue-HK',
'zh-Hans-CN',
'sr-Latn-RS',
'sl-rozaj',
'sl-rozaj-biske',
'sl-nedis',
'de-CH-1901',
'sl-IT-nedis',
'hy-Latn-IT-arevela',
'i-enochian',
'en-scotland-fonipa',
'sl-IT-rozaj-biske-1994',
'de-CH-x-phonebk',
'az-Arab-x-AZE-derbend',
'x-whatever',
'qaa-Qaaa-QM-x-southern',
'de-Qaaa',
'sr-Latn-QM',
'sr-Qaaa-RS',
'en-US-u-islamcal',
'zh-CN-a-myext-x-private',
'en-a-myext-b-another',
],
invalid: [
'lo_POP',
'12',
'12_DD',
'de-419-DE',
'a-DE',
],
});
});
Expand Down

0 comments on commit fc49ad7

Please sign in to comment.