Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Commit

Permalink
feat: add validator for Lao (lo) (#651)
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong authored Dec 1, 2022
1 parent c79fcfe commit f56d8a0
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 0 deletions.
2 changes: 2 additions & 0 deletions server/lib/validation/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const ig = require('./languages/ig');
const it = require('./languages/it');
const kab = require( './languages/kab');
const ko = require( './languages/ko');
const lo = require( './languages/lo');
const ne = require('./languages/ne');
const or = require('./languages/or');
const ru = require('./languages/ru');
Expand All @@ -27,6 +28,7 @@ const VALIDATORS = {
it,
kab,
ko,
lo,
ne,
or,
ru,
Expand Down
34 changes: 34 additions & 0 deletions server/lib/validation/languages/lo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Lao rules
// use any rule from Thai rules https://github.com/common-voice/sentence-collector/blob/main/server/lib/validation/languages/th.js
const MIN_LENGTH = 2;
const MAX_LENGTH = 140;

const INVALIDATIONS = [{
fn: (sentence) => {
return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH;
},
error: `ຈຳນວນຕົວອັກສອນຕ້ອງຢູ່ລະຫວ່າງ ${MIN_LENGTH} ຫາ ${MAX_LENGTH} (ລວມ)`,
}, {
// Lao digits and Thai digits
regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວເລກ',
}, {
// English and Thai characters are not allowed
regex: /[A-Za-z\u0E00-\u0E7F]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວອັກສອນລາຕິນ ຫຼືຕົວອັກສອນໄທ',
}, {
// < > + * \ # @ ^ [ ] ( ) /
// ellipsis: \u0EAF ຯ
// repetition: \u0EC6 ໆ
regex: /[<>+*\\#@^[\]()/\u0EAF\u0EC6]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີສັນຍາລັກ, ລວມທັງ ຯ ແລະ ໆ',
}, {
// Emoji range from https://www.regextester.com/106421 and
// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/,
error: 'ປະໂຫຍກບໍ່ຄວນມີ ອີໂມຈິ ຫຼືສັນຍາລັກຂອງ Unicode ພິເສດອື່ນໆ',
}];

module.exports = {
INVALIDATIONS,
};

0 comments on commit f56d8a0

Please sign in to comment.