From 5259593db56e602f8de4e5fbabfad695a0d6bce4 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sun, 27 Nov 2022 17:26:28 +0700 Subject: [PATCH 1/6] Create lo.js --- server/lib/validation/languages/lo.js | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 server/lib/validation/languages/lo.js diff --git a/server/lib/validation/languages/lo.js b/server/lib/validation/languages/lo.js new file mode 100644 index 00000000..04b1a790 --- /dev/null +++ b/server/lib/validation/languages/lo.js @@ -0,0 +1,34 @@ +// Lao rules +// use any rule from Thai rules https://github.com/wannaphong/sentence-collector/blob/main/server/lib/validation/languages/th.js +const MIN_LENGTH = 2; +const MAX_LENGTH = 100; + +const INVALIDATIONS = [{ + fn: (sentence) => { + return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH; + }, + error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`, +}, { + // Lao digits and Thai digits + regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/, + error: 'Sentence should not contain numbers', +}, { + // English and Thai character is not allowed + regex: /[A-Za-zก-ฮ]/, + error: 'Sentence should not contain latin alphabet characters', +}, { + // < > + * \ # @ ^ [ ] ( ) / + // ellipsis: \u0EAF ຯ + // repetition: \u0EC6 ໆ + regex: /[<>+*\\#@^[\]()/\u0EAF\u0EC6]/, + error: 'Sentence should not contain symbols, including ellipsis and repetition', +}, { + // Emoji range from https://www.regextester.com/106421 and + // https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript + regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/, + error: 'Sentence should not contain emojis or other special Unicode symbols', +}]; + +module.exports = { + INVALIDATIONS, +}; From 5dde8f2ba22f56786e4ae7ae3856ad25898b7de2 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sun, 27 Nov 2022 17:28:15 +0700 Subject: [PATCH 2/6] Update lo.js --- server/lib/validation/languages/lo.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/lib/validation/languages/lo.js b/server/lib/validation/languages/lo.js index 04b1a790..e07545f2 100644 --- a/server/lib/validation/languages/lo.js +++ b/server/lib/validation/languages/lo.js @@ -1,5 +1,5 @@ // Lao rules -// use any rule from Thai rules https://github.com/wannaphong/sentence-collector/blob/main/server/lib/validation/languages/th.js +// use any rule from Thai rules https://github.com/common-voice/sentence-collector/blob/main/server/lib/validation/languages/th.js const MIN_LENGTH = 2; const MAX_LENGTH = 100; From 6d8027686eae11cc13e9a813e512df7be892520a Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sun, 27 Nov 2022 17:32:10 +0700 Subject: [PATCH 3/6] Update lo.js --- server/lib/validation/languages/lo.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/lib/validation/languages/lo.js b/server/lib/validation/languages/lo.js index e07545f2..0897a51d 100644 --- a/server/lib/validation/languages/lo.js +++ b/server/lib/validation/languages/lo.js @@ -13,9 +13,9 @@ const INVALIDATIONS = [{ regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/, error: 'Sentence should not contain numbers', }, { - // English and Thai character is not allowed + // English and Thai characters are not allowed regex: /[A-Za-zก-ฮ]/, - error: 'Sentence should not contain latin alphabet characters', + error: 'Sentence should not contain latin alphabet characters or Thai characters', }, { // < > + * \ # @ ^ [ ] ( ) / // ellipsis: \u0EAF ຯ From cf9cc151ea6cc5ca64a01b7b50a7f5e7e1041a9d Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Sun, 27 Nov 2022 17:34:07 +0700 Subject: [PATCH 4/6] Add lo to server/lib/validation/index.js --- server/lib/validation/index.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/lib/validation/index.js b/server/lib/validation/index.js index cbead814..42bf424a 100644 --- a/server/lib/validation/index.js +++ b/server/lib/validation/index.js @@ -8,6 +8,7 @@ const ig = require('./languages/ig'); const it = require('./languages/it'); const kab = require( './languages/kab'); const ko = require( './languages/ko'); +const lo = require( './languages/lo'); const ne = require('./languages/ne'); const or = require('./languages/or'); const ru = require('./languages/ru'); @@ -27,6 +28,7 @@ const VALIDATORS = { it, kab, ko, + lo, ne, or, ru, From 48ac385644dad2e899167be7b1f01e4e47ed204b Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 30 Nov 2022 22:27:17 +0700 Subject: [PATCH 5/6] Update lo.js --- server/lib/validation/languages/lo.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/lib/validation/languages/lo.js b/server/lib/validation/languages/lo.js index 0897a51d..547f3e34 100644 --- a/server/lib/validation/languages/lo.js +++ b/server/lib/validation/languages/lo.js @@ -11,22 +11,22 @@ const INVALIDATIONS = [{ }, { // Lao digits and Thai digits regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/, - error: 'Sentence should not contain numbers', + error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວເລກ', }, { // English and Thai characters are not allowed - regex: /[A-Za-zก-ฮ]/, - error: 'Sentence should not contain latin alphabet characters or Thai characters', + regex: /[A-Za-z\u0E00-\u0E7F]/, + error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວອັກສອນລາຕິນ ຫຼືຕົວອັກສອນໄທ', }, { // < > + * \ # @ ^ [ ] ( ) / // ellipsis: \u0EAF ຯ // repetition: \u0EC6 ໆ regex: /[<>+*\\#@^[\]()/\u0EAF\u0EC6]/, - error: 'Sentence should not contain symbols, including ellipsis and repetition', + error: 'ປະໂຫຍກບໍ່ຄວນມີສັນຍາລັກ, ລວມທັງ ຯ ແລະ ໆ', }, { // Emoji range from https://www.regextester.com/106421 and // https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/, - error: 'Sentence should not contain emojis or other special Unicode symbols', + error: 'ປະໂຫຍກບໍ່ຄວນມີ ອີໂມຈິ ຫຼືສັນຍາລັກຂອງ Unicode ພິເສດອື່ນໆ', }]; module.exports = { From a11f4d10b25309ad729ae64675d88a8c1b594b03 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 30 Nov 2022 22:29:48 +0700 Subject: [PATCH 6/6] Change MAX_LENGTH in lo.js --- server/lib/validation/languages/lo.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/lib/validation/languages/lo.js b/server/lib/validation/languages/lo.js index 547f3e34..7aa02eee 100644 --- a/server/lib/validation/languages/lo.js +++ b/server/lib/validation/languages/lo.js @@ -1,13 +1,13 @@ // Lao rules // use any rule from Thai rules https://github.com/common-voice/sentence-collector/blob/main/server/lib/validation/languages/th.js const MIN_LENGTH = 2; -const MAX_LENGTH = 100; +const MAX_LENGTH = 140; const INVALIDATIONS = [{ fn: (sentence) => { return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH; }, - error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`, + error: `ຈຳນວນຕົວອັກສອນຕ້ອງຢູ່ລະຫວ່າງ ${MIN_LENGTH} ຫາ ${MAX_LENGTH} (ລວມ)`, }, { // Lao digits and Thai digits regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/,