Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Add validator for Lao (lo) #651

Merged
merged 6 commits into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/lib/validation/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const ig = require('./languages/ig');
const it = require('./languages/it');
const kab = require( './languages/kab');
const ko = require( './languages/ko');
const lo = require( './languages/lo');
const ne = require('./languages/ne');
const or = require('./languages/or');
const ru = require('./languages/ru');
Expand All @@ -27,6 +28,7 @@ const VALIDATORS = {
it,
kab,
ko,
lo,
ne,
or,
ru,
Expand Down
34 changes: 34 additions & 0 deletions server/lib/validation/languages/lo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Lao rules
// use any rule from Thai rules https://github.com/common-voice/sentence-collector/blob/main/server/lib/validation/languages/th.js
const MIN_LENGTH = 2;
const MAX_LENGTH = 100;

const INVALIDATIONS = [{
fn: (sentence) => {
return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH;
},
error: `Number of characters must be between ${MIN_LENGTH} and ${MAX_LENGTH} (inclusive)`,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with other languages, I think it would make sense to translate these error messages into Lao. What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As with other languages, I think it would make sense to translate these error messages into Lao. What do you think?

I agree with you. I will translate these messages into Lao.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

}, {
// Lao digits and Thai digits
regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/,
error: 'Sentence should not contain numbers',
}, {
// English and Thai characters are not allowed
regex: /[A-Za-zก-ฮ]/,
error: 'Sentence should not contain latin alphabet characters or Thai characters',
}, {
// < > + * \ # @ ^ [ ] ( ) /
// ellipsis: \u0EAF ຯ
// repetition: \u0EC6 ໆ
regex: /[<>+*\\#@^[\]()/\u0EAF\u0EC6]/,
error: 'Sentence should not contain symbols, including ellipsis and repetition',
}, {
// Emoji range from https://www.regextester.com/106421 and
// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/,
error: 'Sentence should not contain emojis or other special Unicode symbols',
}];

module.exports = {
INVALIDATIONS,
};