diff --git a/__tests__/constants.ts b/__tests__/constants.ts new file mode 100644 index 0000000..78b959e --- /dev/null +++ b/__tests__/constants.ts @@ -0,0 +1,41 @@ +import type { UnicodeRange } from '../src'; + +export const MockKatakanaRange: UnicodeRange = { + start: '\u30a0', + end: '\u30ff', +}; + +export const MockKatakanaPhoneticExtensionRange: UnicodeRange = { + start: '\u31f0', + end: '\u31FF', +}; + +export const MockHiraganaRange: UnicodeRange = { + start: '\u3040', + end: '\u309f', +}; + +export const MockRareKanjiRange: UnicodeRange = { + start: '\u3400', + end: '\u4dbf', +}; + +export const MockCJKPunctuationsRange: UnicodeRange = { + start: '\u3000', + end: '\u303f', +}; + +export const MockKanjiCompatibilityIdeographsRange: UnicodeRange = { + start: '\uf900', + end: '\ufaff', +}; + +export const MockCommonUncommonKanjiRange: UnicodeRange = { + start: '\u4e00', + end: '\u9fdf', +}; + +export const MockRomanHalfwidthKatakanaRange: UnicodeRange = { + start: '\uff00', + end: '\uffef', +}; diff --git a/__tests__/create-match-score-calculator.test.ts b/__tests__/create-match-score-calculator.test.ts new file mode 100644 index 0000000..5ec9927 --- /dev/null +++ b/__tests__/create-match-score-calculator.test.ts @@ -0,0 +1,134 @@ +import { CharacterSet, createMatchScoreCalculator, CreateValidatorOptions } from '../src'; +import { + escapeUnicode, + escapeUnicodeRange, + generateCharactersFromRange, + getRandomString, + toFixedNumber, +} from './utils'; +import { MockHiraganaRange, MockKatakanaRange } from './constants'; + +describe('createMatchScoreCalculator', () => { + const mockOptions: CreateValidatorOptions = { + characterSets: [CharacterSet.Katakana], + }; + + const fullKanaString = generateCharactersFromRange( + MockKatakanaRange.start, + MockKatakanaRange.end, + ); + + const fullHiraganaString = generateCharactersFromRange( + MockHiraganaRange.start, + MockHiraganaRange.end, + ); + + const invalidString = getRandomString(100); + + describe('errors', () => { + test('should throw an error if options is not provided', () => { + // @ts-ignore + expect(() => createMatchScoreCalculator()).toThrow( + '[japanese-moji]: createMatchScoreCalculator: requires "options" to be supplied', + ); + }); + test('should throw an error if options.characterSets is not provided', () => { + // @ts-ignore + expect(() => createMatchScoreCalculator({})).toThrow( + '[japanese-moji]: createMatchScoreCalculator: requires "options.characterSets" to be supplied', + ); + }); + }); + describe('characterSets', () => { + test('should return a function', () => { + const result = createMatchScoreCalculator(mockOptions); + expect(typeof result).toBe('function'); + }); + + test('should throw an error when invalid character sets are provided', () => { + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createMatchScoreCalculator({ characterSets: ['invalid'] })).toThrow( + '[japanese-moji]: createRegexGroups: key "invalid" not found in the "characterSetMap', + ); + }); + + test('should validate a string with 100% for the provided character sets', () => { + const isValidKana = createMatchScoreCalculator(mockOptions); + const result = isValidKana(fullKanaString); + expect(toFixedNumber(result)).toBe(100); + }); + }); + describe('customRanges', () => { + test('should add custom unicode range and return 100% for fully valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createMatchScoreCalculator(mergedOptions); + const result = isValidKanaWithHiragana(fullKanaString + fullHiraganaString); + expect(toFixedNumber(result)).toBe(100); + }); + + test('should add custom unicode ranges and return 49% for invalid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createMatchScoreCalculator(mergedOptions); + const result = isValidKanaWithHiragana(fullHiraganaString + invalidString); + expect(toFixedNumber(result)).toBe(49); + }); + + test('should throw an error when invalid custom unicode ranges are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [ + { + start: 'invalid stuff', + // This will not get printed since "start" throws + end: 'invalid stuff', + }, + ], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createMatchScoreCalculator(mergedOptions)).toThrow( + '[japanese-moji]: createRange: invalid unicode value supplied for start of the range', + ); + }); + }); + describe('customUnicodes', () => { + test('should add custom unicode and return 100% for fully valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const isValidKanaWithHiragana = createMatchScoreCalculator(mergedOptions); + const result = isValidKanaWithHiragana(MockHiraganaRange.start); + expect(toFixedNumber(result)).toBe(100); + }); + + test('should add custom unicode and return 33% for invalid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const calculateScore = createMatchScoreCalculator(mergedOptions); + const result = calculateScore(fullKanaString + fullHiraganaString + invalidString); + expect(toFixedNumber(result)).toBe(33); + }); + + test('should throw an error when invalid custom unicodes are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [''], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createMatchScoreCalculator(mergedOptions)).toThrow( + '[japanese-moji]: createUnicode: invalid unicode value supplied', + ); + }); + }); +}); diff --git a/__tests__/create-strict-validator.test.ts b/__tests__/create-strict-validator.test.ts new file mode 100644 index 0000000..9ec8ffa --- /dev/null +++ b/__tests__/create-strict-validator.test.ts @@ -0,0 +1,133 @@ +import { CharacterSet, createStrictValidator, CreateValidatorOptions } from '../src'; +import { + escapeUnicode, + escapeUnicodeRange, + generateCharactersFromRange, + getRandomString, +} from './utils'; +import { MockHiraganaRange, MockKatakanaRange } from './constants'; + +describe('createStrictValidator', () => { + const mockOptions: CreateValidatorOptions = { + characterSets: [CharacterSet.Katakana], + }; + + const fullKanaString = generateCharactersFromRange( + MockKatakanaRange.start, + MockKatakanaRange.end, + ); + + const fullHiraganaString = generateCharactersFromRange( + MockHiraganaRange.start, + MockHiraganaRange.end, + ); + + const invalidString = getRandomString(100); + + describe('errors', () => { + test('should throw an error if options is not provided', () => { + // @ts-ignore + expect(() => createStrictValidator()).toThrow( + '[japanese-moji]: createStrictValidator: requires "options" to be supplied', + ); + }); + test('should throw an error if options.characterSets is not provided', () => { + // @ts-ignore + expect(() => createStrictValidator({})).toThrow( + '[japanese-moji]: createStrictValidator: requires "options.characterSets" to be supplied', + ); + }); + }); + describe('characterSets', () => { + test('should return a function', () => { + const result = createStrictValidator(mockOptions); + expect(typeof result).toBe('function'); + }); + + test('should throw an error when invalid character sets are provided', () => { + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createStrictValidator({ characterSets: ['invalid'] })).toThrow( + '[japanese-moji]: createRegexGroups: key "invalid" not found in the "characterSetMap', + ); + }); + + test('should validate a string with the provided character sets', () => { + const isValidKana = createStrictValidator(mockOptions); + const result = isValidKana(fullKanaString); + expect(result).toBe(true); + }); + }); + describe('customRanges', () => { + test('should add custom unicode range and return true for valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createStrictValidator(mergedOptions); + const result = isValidKanaWithHiragana(fullKanaString + fullHiraganaString); + expect(result).toBe(true); + }); + + test('should add custom unicode ranges and return false for invalid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createStrictValidator(mergedOptions); + const result = isValidKanaWithHiragana(invalidString); + expect(result).toBe(false); + }); + + test('should throw an error when invalid custom unicode ranges are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [ + { + start: 'invalid stuff', + // This will not get printed since "start" throws + end: 'invalid stuff', + }, + ], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createStrictValidator(mergedOptions)).toThrow( + '[japanese-moji]: createRange: invalid unicode value supplied for start of the range', + ); + }); + }); + describe('customUnicodes', () => { + test('should add custom unicode and return true for valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const isValidKanaWithHiragana = createStrictValidator(mergedOptions); + const result = isValidKanaWithHiragana(MockHiraganaRange.start); + expect(result).toBe(true); + }); + + test('should add custom unicode and return false for invalid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const isValidKanaWithHiragana = createStrictValidator(mergedOptions); + const result = isValidKanaWithHiragana(invalidString); + expect(result).toBe(false); + }); + + test('should throw an error when invalid custom unicodes are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [''], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createStrictValidator(mergedOptions)).toThrow( + '[japanese-moji]: createUnicode: invalid unicode value supplied', + ); + }); + }); +}); diff --git a/__tests__/create-threshold-validator.test.ts b/__tests__/create-threshold-validator.test.ts new file mode 100644 index 0000000..6af5387 --- /dev/null +++ b/__tests__/create-threshold-validator.test.ts @@ -0,0 +1,155 @@ +import { CharacterSet, createThresholdBasedValidator, CreateValidatorOptions } from '../src'; +import { + escapeUnicode, + escapeUnicodeRange, + generateCharactersFromRange, + getRandomString, +} from './utils'; +import { MockHiraganaRange, MockKatakanaRange } from './constants'; + +describe('createThresholdBasedValidator', () => { + const mockOptions: CreateValidatorOptions = { + characterSets: [CharacterSet.Katakana], + }; + + const fullKanaString = generateCharactersFromRange( + MockKatakanaRange.start, + MockKatakanaRange.end, + ); + + const fullHiraganaString = generateCharactersFromRange( + MockHiraganaRange.start, + MockHiraganaRange.end, + ); + + const invalidString = getRandomString(50); + + describe('normalize threshold', () => { + test('should normalize threshold to maximum 100 if threshold is greater than 100', () => { + const threshold = 150; + const validator = createThresholdBasedValidator(mockOptions); + const result = validator(fullKanaString, threshold); + expect(result).toBe(true); + }); + test('should normalize threshold to minimum 0 if threshold is less than 0', () => { + const threshold = -150; + const validator = createThresholdBasedValidator(mockOptions); + const result = validator(invalidString, threshold); + // This will pass because the score is 0 and threshold is 0 + // It compares it with ">=" to operator + expect(result).toBe(true); + }); + }); + describe('errors', () => { + test('should throw an error if options is not provided', () => { + // @ts-ignore + expect(() => createThresholdBasedValidator()).toThrow( + '[japanese-moji]: createThresholdBasedValidator: requires "options" to be supplied', + ); + }); + test('should throw an error if options.characterSets is not provided', () => { + // @ts-ignore + expect(() => createThresholdBasedValidator({})).toThrow( + '[japanese-moji]: createThresholdBasedValidator: requires "options.characterSets" to be supplied', + ); + }); + }); + describe('characterSets', () => { + test('should return a function', () => { + const result = createThresholdBasedValidator(mockOptions); + expect(typeof result).toBe('function'); + }); + + test('should throw an error when invalid character sets are provided', () => { + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createThresholdBasedValidator({ characterSets: ['invalid'] })).toThrow( + '[japanese-moji]: createRegexGroups: key "invalid" not found in the "characterSetMap', + ); + }); + + test('should validate a string with the provided character sets with default threshold', () => { + const isKanaPresent = createThresholdBasedValidator(mockOptions); + const result = isKanaPresent(fullKanaString); + expect(result).toBe(true); + }); + + test('validator should return false for the provided character sets when its below threshold', () => { + const isKanaPresent = createThresholdBasedValidator(mockOptions); + const result = isKanaPresent(fullKanaString + invalidString, 70); + expect(result).toBe(false); + }); + }); + describe('customRanges', () => { + test('should add custom unicode range and return true for valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createThresholdBasedValidator(mergedOptions); + const result = isValidKanaWithHiragana(fullKanaString + fullHiraganaString); + expect(result).toBe(true); + }); + + test('should add custom unicode ranges and return false for invalid string when its below threshold', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [escapeUnicodeRange(MockHiraganaRange)], + }; + const isValidKanaWithHiragana = createThresholdBasedValidator(mergedOptions); + const result = isValidKanaWithHiragana(invalidString, 85); + expect(result).toBe(false); + }); + + test('should throw an error when invalid custom unicode ranges are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customRanges: [ + { + start: 'invalid stuff', + // This will not get printed since "start" throws + end: 'invalid stuff', + }, + ], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createThresholdBasedValidator(mergedOptions)).toThrow( + '[japanese-moji]: createRange: invalid unicode value supplied for start of the range', + ); + }); + }); + describe('customUnicodes', () => { + test('should add custom unicode and return true for valid string', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const isValidKanaWithHiragana = createThresholdBasedValidator(mergedOptions); + const result = isValidKanaWithHiragana(MockHiraganaRange.start); + expect(result).toBe(true); + }); + + test('should add custom unicode and return false for invalid string when its below threshold', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [escapeUnicode(MockHiraganaRange.start)], + }; + const isValidKanaWithHiragana = createThresholdBasedValidator(mergedOptions); + const result = isValidKanaWithHiragana(invalidString, 85); + expect(result).toBe(false); + }); + + test('should throw an error when invalid custom unicodes are provided', () => { + const mergedOptions: CreateValidatorOptions = { + ...mockOptions, + customUnicodes: [''], + }; + // Disabled ts intentionally to test the error message + // @ts-ignore + expect(() => createThresholdBasedValidator(mergedOptions)).toThrow( + '[japanese-moji]: createUnicode: invalid unicode value supplied', + ); + }); + }); +}); diff --git a/__tests__/utils.ts b/__tests__/utils.ts new file mode 100644 index 0000000..7ace25c --- /dev/null +++ b/__tests__/utils.ts @@ -0,0 +1,37 @@ +import type { UnicodeRange } from '../src'; +import { makeString } from '../src/utils'; + +export const generateCharactersFromRange = (start: string, end: string): string => { + const startCode = start.charCodeAt(0); + const endCode = end.charCodeAt(0); + const length = endCode - startCode + 1; + return Array.from({ length }, (_, i) => String.fromCharCode(startCode + i)).join(''); +}; + +export const generateCharactersFromRanges = (ranges: UnicodeRange[]): string => { + return ranges.reduce((acc, range) => { + return acc + generateCharactersFromRange(range.start, range.end); + }, ''); +}; + +export const getRandomString = (length = 0): string => { + return Array.from(Array(length), () => Math.floor(Math.random() * 36).toString(36)).join(''); +}; + +export const escapeUnicode = (str: string): string => { + return str.replace(/[^\\0-~]/g, (ch) => { + return makeString('\\u', `000${ch.charCodeAt(0).toString(16)}`.slice(-4)); + }); +}; + +export const escapeUnicodeRange = (range: UnicodeRange): UnicodeRange => { + return { + start: escapeUnicode(range.start), + end: escapeUnicode(range.end), + }; +}; + +export const toFixedNumber = (num: number, precision = 0): number => { + const fixed = num.toFixed(precision); + return +fixed; +}; diff --git a/__tests__/validators/japanese.test.ts b/__tests__/validators/japanese.test.ts new file mode 100644 index 0000000..5934bb4 --- /dev/null +++ b/__tests__/validators/japanese.test.ts @@ -0,0 +1,73 @@ +import { generateCharactersFromRanges, getRandomString, toFixedNumber } from '../utils'; +import { howMuchJapaneseIsPresent, isJapanesePresent, isValidJapanese } from '../../src'; +import { + MockCJKPunctuationsRange, + MockCommonUncommonKanjiRange, + MockHiraganaRange, + MockKanjiCompatibilityIdeographsRange, + MockKatakanaPhoneticExtensionRange, + MockKatakanaRange, + MockRareKanjiRange, + MockRomanHalfwidthKatakanaRange, +} from '../constants'; + +describe('validator - Japanese', () => { + const fullJapaneseString = generateCharactersFromRanges([ + MockRareKanjiRange, + MockCommonUncommonKanjiRange, + MockKanjiCompatibilityIdeographsRange, + MockKatakanaRange, + MockKatakanaPhoneticExtensionRange, + MockHiraganaRange, + MockCJKPunctuationsRange, + MockRomanHalfwidthKatakanaRange, + ]); + + const invalidString = getRandomString(500); + + describe('isValidJapanese', () => { + test('should return true when only Kanji characters are supplied', () => { + const result = isValidJapanese(fullJapaneseString); + expect(result).toBe(true); + }); + + test('should return false when only Kanji and non Kanji characters are supplied', () => { + const result = isValidJapanese(fullJapaneseString + invalidString); + expect(result).toBe(false); + }); + }); + + describe('isValidJapanesePresent', () => { + test('should return true when only Kanji characters are supplied', () => { + const result = isJapanesePresent(fullJapaneseString); + expect(result).toBe(true); + }); + + test('should return true when string has valid characters above threshold', () => { + const result = isJapanesePresent(fullJapaneseString + invalidString, 85); + expect(result).toBe(true); + }); + + test('should return false when string has valid characters below threshold', () => { + const result = isJapanesePresent(invalidString, 85); + expect(result).toBe(false); + }); + }); + + describe('howMuchKanjiPresent', () => { + test('should return 100 when only Kanji characters are supplied', () => { + const result = howMuchJapaneseIsPresent(fullJapaneseString); + expect(result).toBe(100); + }); + + test('should return 98 when only Kanji characters are supplied', () => { + const result = +howMuchJapaneseIsPresent(invalidString + fullJapaneseString); + expect(toFixedNumber(result)).toBe(98); + }); + + test('should return 0 when no Kanji characters are supplied', () => { + const result = +howMuchJapaneseIsPresent(invalidString); + expect(toFixedNumber(result)).toBe(0); + }); + }); +}); diff --git a/__tests__/validators/kana.test.ts b/__tests__/validators/kana.test.ts new file mode 100644 index 0000000..c00822e --- /dev/null +++ b/__tests__/validators/kana.test.ts @@ -0,0 +1,63 @@ +import { generateCharactersFromRanges, toFixedNumber } from '../utils'; +import { howMuchKanaIsPresent, isKanaPresent, isValidKana } from '../../src'; +import { + MockHiraganaRange, + MockKatakanaPhoneticExtensionRange, + MockKatakanaRange, +} from '../constants'; + +describe('validator - kana', () => { + const fullKanaString = generateCharactersFromRanges([ + MockKatakanaRange, + MockKatakanaPhoneticExtensionRange, + MockHiraganaRange, + ]); + + const invalidString = 'abcdefghijklmnopqrstuvwxyz'; + + describe('isValidKana', () => { + test('should return true when only kana characters are supplied', () => { + const result = isValidKana(fullKanaString); + expect(result).toBe(true); + }); + + test('should return false when only kana and non kana characters are supplied', () => { + const result = isValidKana(fullKanaString + invalidString); + expect(result).toBe(false); + }); + }); + + describe('isValidKanaPresent', () => { + test('should return true when only kana characters are supplied', () => { + const result = isKanaPresent(fullKanaString); + expect(result).toBe(true); + }); + + test('should return true when string has valid characters above threshold', () => { + const result = isKanaPresent(fullKanaString + invalidString, 85); + expect(result).toBe(true); + }); + + test('should return false when string has valid characters below threshold', () => { + const result = isKanaPresent(invalidString, 85); + expect(result).toBe(false); + }); + }); + + describe('howMuchKanaPresent', () => { + test('should return 100 when only kana characters are supplied', () => { + const result = howMuchKanaIsPresent(fullKanaString); + expect(result).toBe(100); + }); + + test('should return 89 when only kana characters are supplied', () => { + const result = +howMuchKanaIsPresent(fullKanaString + invalidString); + expect(toFixedNumber(result)).toBe(89); + }); + + test('should return 0 when no kana characters are supplied', () => { + const result = +howMuchKanaIsPresent(invalidString); + expect(toFixedNumber(result)).toBe(0); + }); + }); +}); diff --git a/__tests__/validators/kanji.test.ts b/__tests__/validators/kanji.test.ts new file mode 100644 index 0000000..ee71fbf --- /dev/null +++ b/__tests__/validators/kanji.test.ts @@ -0,0 +1,73 @@ +import { + generateCharactersFromRange, + generateCharactersFromRanges, + getRandomString, + toFixedNumber, +} from '../utils'; +import { howMuchKanjiIsPresent, isKanjiPresent, isValidKanji } from '../../src'; +import { + MockCommonUncommonKanjiRange, + MockKanjiCompatibilityIdeographsRange, + MockRareKanjiRange, +} from '../constants'; + +describe('validator - Kanji', () => { + const rareKanjiRange: string = generateCharactersFromRange( + MockRareKanjiRange.start, + MockRareKanjiRange.end, + ); + + const fullKanjiString = generateCharactersFromRanges([ + MockRareKanjiRange, + MockCommonUncommonKanjiRange, + MockKanjiCompatibilityIdeographsRange, + ]); + + const invalidString = getRandomString(300); + + describe('isValidKanji', () => { + test('should return true when only Kanji characters are supplied', () => { + const result = isValidKanji(fullKanjiString); + expect(result).toBe(true); + }); + + test('should return false when only Kanji and non Kanji characters are supplied', () => { + const result = isValidKanji(fullKanjiString + invalidString); + expect(result).toBe(false); + }); + }); + + describe('isValidKanjiPresent', () => { + test('should return true when only Kanji characters are supplied', () => { + const result = isKanjiPresent(fullKanjiString); + expect(result).toBe(true); + }); + + test('should return true when string has valid characters above threshold', () => { + const result = isKanjiPresent(fullKanjiString + invalidString, 85); + expect(result).toBe(true); + }); + + test('should return false when string has valid characters below threshold', () => { + const result = isKanjiPresent(invalidString, 85); + expect(result).toBe(false); + }); + }); + + describe('howMuchKanjiPresent', () => { + test('should return 100 when only Kanji characters are supplied', () => { + const result = howMuchKanjiIsPresent(fullKanjiString); + expect(result).toBe(100); + }); + + test('should return 96 when only Kanji characters are supplied', () => { + const result = howMuchKanjiIsPresent(invalidString + rareKanjiRange); + expect(toFixedNumber(result)).toBe(96); + }); + + test('should return 0 when no Kanji characters are supplied', () => { + const result = howMuchKanjiIsPresent(invalidString); + expect(toFixedNumber(result)).toBe(0); + }); + }); +}); diff --git a/src/constants/built-ranges.ts b/src/constants/built-ranges.ts new file mode 100644 index 0000000..2bbcda5 --- /dev/null +++ b/src/constants/built-ranges.ts @@ -0,0 +1,33 @@ +import { + CJKPunctuationsRange, + CommonUncommonKanjiRange, + RomanHalfwidthKatakanaRange, + HiraganaRange, + KatakanaPhoneticExtensionRange, + KatakanaRange, + RareKanjiRange, + KanjiCompatibilityIdeographsRange, +} from './raw-ranges'; +import { createRange } from '../utils/create-range'; + +export const CJKPunctuations = createRange(CJKPunctuationsRange.start, CJKPunctuationsRange.end); +export const Hiragana = createRange(HiraganaRange.start, HiraganaRange.end); +export const Katakana = createRange(KatakanaRange.start, KatakanaRange.end); +export const KatakanaPhoneticExtension = createRange( + KatakanaPhoneticExtensionRange.start, + KatakanaPhoneticExtensionRange.end, +); +export const RareKanji = createRange(RareKanjiRange.start, RareKanjiRange.end); +export const CommonUncommonKanji = createRange( + CommonUncommonKanjiRange.start, + CommonUncommonKanjiRange.end, +); + +export const KanjiCompatibilityIdeographs = createRange( + KanjiCompatibilityIdeographsRange.start, + KanjiCompatibilityIdeographsRange.end, +); +export const RomanHalfwidthKatakana = createRange( + RomanHalfwidthKatakanaRange.start, + RomanHalfwidthKatakanaRange.end, +); diff --git a/src/constants/index.ts b/src/constants/index.ts new file mode 100644 index 0000000..36fee18 --- /dev/null +++ b/src/constants/index.ts @@ -0,0 +1,25 @@ +import { CharacterDict, CharacterSet } from '../types'; +import { + CJKPunctuations, + CommonUncommonKanji, + RomanHalfwidthKatakana, + Hiragana, + Katakana, + KatakanaPhoneticExtension, + RareKanji, + KanjiCompatibilityIdeographs, +} from './built-ranges'; + +export const characterSetMap: Readonly = Object.freeze({ + [CharacterSet.CJKPunctuations]: CJKPunctuations, + [CharacterSet.Hiragana]: Hiragana, + [CharacterSet.Katakana]: Katakana, + [CharacterSet.KatakanaPhoneticExtension]: KatakanaPhoneticExtension, + [CharacterSet.RareKanji]: RareKanji, + [CharacterSet.KanjiCompatibilityIdeographs]: KanjiCompatibilityIdeographs, + [CharacterSet.CommonUncommonKanji]: CommonUncommonKanji, + [CharacterSet.RomanHalfwidthKatakana]: RomanHalfwidthKatakana, +}); + +export const defaultValidationThreshold = 85; +export const isProduction: boolean = process.env.NODE_ENV === 'production'; diff --git a/src/constants/raw-ranges.ts b/src/constants/raw-ranges.ts new file mode 100644 index 0000000..70f7951 --- /dev/null +++ b/src/constants/raw-ranges.ts @@ -0,0 +1,45 @@ +import type { UnicodeRange } from '../types'; + +export const CJKPunctuationsRange: UnicodeRange = { + start: '\\u3000', + end: '\\u303f', +}; + +export const HiraganaRange: UnicodeRange = { + start: '\\u3040', + end: '\\u309f', +}; + +export const KatakanaRange: UnicodeRange = { + start: '\\u30a0', + end: '\\u30ff', +}; + +export const KatakanaPhoneticExtensionRange: UnicodeRange = { + start: '\\u31f0', + end: '\\u31FF', +}; + +// CJK Unified Ideographs Extension A +export const RareKanjiRange: UnicodeRange = { + start: '\\u3400', + end: '\\u4dbf', +}; + +// CJK Compatibility Ideographs +export const KanjiCompatibilityIdeographsRange: UnicodeRange = { + start: '\\uf900', + end: '\\ufaff', +}; + +// CJK Unified Ideographs +export const CommonUncommonKanjiRange: UnicodeRange = { + start: '\\u4e00', + end: '\\u9fdf', +}; + +// Half width and Fullwidth Forms +export const RomanHalfwidthKatakanaRange: UnicodeRange = { + start: '\\uff00', + end: '\\uffef', +}; diff --git a/src/index.ts b/src/index.ts index 9163cc4..d842de4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,3 +1,7 @@ -export const sayHello = () => { - console.info(`Hello`); -}; +export { + createMatchScoreCalculator, + createStrictValidator, + createThresholdBasedValidator, +} from './utils'; +export * from './validators'; +export * from './types'; diff --git a/src/types.ts b/src/types.ts new file mode 100644 index 0000000..b497f16 --- /dev/null +++ b/src/types.ts @@ -0,0 +1,31 @@ +export enum CharacterSet { + CJKPunctuations = 'CJKPunctuations', + Hiragana = 'Hiragana', + Katakana = 'Katakana', + KatakanaPhoneticExtension = 'KatakanaPhoneticExtension', + RareKanji = 'RareKanji', + KanjiCompatibilityIdeographs = 'KanjiCompatibilityIdeographs', + CommonUncommonKanji = 'CommonUncommonKanji', + RomanHalfwidthKatakana = 'RomanHalfwidthKatakana', +} + +export type CharacterDict = Record; + +export interface UnicodeRange { + start: string; + end: string; +} + +export interface CreateValidatorOptions { + characterSets: CharacterSet[]; + customRanges?: UnicodeRange[]; + customUnicodes?: string[]; +} + +export type StrictValidator = (arg: string) => boolean; +export type ThresholdBasedValidator = ( + arg: string, + // between 0 and 100 + threshold?: number, +) => boolean; +export type ThresholdCalculator = (arg: string) => number; diff --git a/src/utils/create-match-score-calculator.ts b/src/utils/create-match-score-calculator.ts new file mode 100644 index 0000000..667ba7a --- /dev/null +++ b/src/utils/create-match-score-calculator.ts @@ -0,0 +1,33 @@ +import type { CreateValidatorOptions, ThresholdCalculator } from '../types'; +import { makeString } from './make-string'; +import { optionsToRegex } from './options-to-regex'; +import { matchString } from './match-string'; +import { invariant } from './invariant'; + +/** + * Creates a function that calculates the score for a match with `pattern` + * + * @param options - Check {@link CreateValidatorOptions} for all the available options + */ +export const createMatchScoreCalculator = ( + options: CreateValidatorOptions, +): ThresholdCalculator => { + invariant(options, `createMatchScoreCalculator: requires "options" to be supplied`); + invariant( + options?.characterSets, + `createMatchScoreCalculator: requires "options.characterSets" to be supplied`, + ); + + const regexGroup = optionsToRegex(options); + + const finalRegexPattern = makeString(regexGroup, '*'); + + const customCalculator: ThresholdCalculator = (str: string): number => { + const matches = matchString(str, finalRegexPattern, 'gui'); + + const score = (matches.length / str.length) * 100; + return score; + }; + + return customCalculator; +}; diff --git a/src/utils/create-range.ts b/src/utils/create-range.ts new file mode 100644 index 0000000..d0fc909 --- /dev/null +++ b/src/utils/create-range.ts @@ -0,0 +1,22 @@ +import { isValidUnicode } from './is-valid-unicode'; +import { makeString } from './make-string'; +import { invariant } from './invariant'; + +/** + * Builds a regex character unicode range + * + * For example - [\\u1234-\\uFAFF] + * + * @param start - Valid unicode with escaped "\\u" unicode + * @param end - Valid unicode value with escaped "\\u" unicode + */ +export const createRange = (start: string, end: string): string => { + const isStartValid = isValidUnicode(start); + const isEndValid = isValidUnicode(end); + + invariant(isStartValid, `createRange: invalid unicode value supplied for start of the range`); + + invariant(isEndValid, `createRange: invalid unicode value supplied for end of the range`); + + return makeString('[', start, '-', end, ']'); +}; diff --git a/src/utils/create-ranges.ts b/src/utils/create-ranges.ts new file mode 100644 index 0000000..1e4c147 --- /dev/null +++ b/src/utils/create-ranges.ts @@ -0,0 +1,13 @@ +import type { UnicodeRange } from '../types'; +import { createRange } from './create-range'; + +export const createRanges = (unicodeRanges: UnicodeRange[]): string[] => { + const regexRanges = unicodeRanges.map((currentRange: UnicodeRange) => { + const builtRange = createRange(currentRange.start, currentRange.end); + return builtRange; + }); + + const filteredRanges = regexRanges.filter(Boolean); + + return filteredRanges; +}; diff --git a/src/utils/create-regex-groups.ts b/src/utils/create-regex-groups.ts new file mode 100644 index 0000000..0ef92d6 --- /dev/null +++ b/src/utils/create-regex-groups.ts @@ -0,0 +1,22 @@ +import type { CharacterDict, CharacterSet } from '../types'; +import { makeString } from './make-string'; +import { invariant } from './invariant'; + +export const createRegexGroups = ( + characterSetMap: CharacterDict, + characterSets: CharacterSet[], + customRanges: string[], +) => { + const mappedStrings = characterSets.map((key: CharacterSet) => { + const foundSet = characterSetMap[key]; + invariant(foundSet, `createRegexGroups: key "${key}" not found in the "characterSetMap"`); + + return makeString(foundSet); + }); + + const finalCharacterRanges = mappedStrings.filter(Boolean); + finalCharacterRanges.push(...customRanges); + const joinByOr = finalCharacterRanges.join('|'); + + return makeString('(', joinByOr, ')'); +}; diff --git a/src/utils/create-strict-validator.ts b/src/utils/create-strict-validator.ts new file mode 100644 index 0000000..41ab9a4 --- /dev/null +++ b/src/utils/create-strict-validator.ts @@ -0,0 +1,23 @@ +import type { CreateValidatorOptions, StrictValidator } from '../types'; +import { makeString } from './make-string'; +import { optionsToRegex } from './options-to-regex'; +import { invariant } from './invariant'; + +export const createStrictValidator = (options: CreateValidatorOptions): StrictValidator => { + invariant(options, `createStrictValidator: requires "options" to be supplied`); + invariant( + options?.characterSets, + `createStrictValidator: requires "options.characterSets" to be supplied`, + ); + + const regexGroup = optionsToRegex(options); + + const finalRegexPattern = makeString('^', regexGroup, '+$'); + + const customValidator = (str: string): boolean => { + const regexExp = new RegExp(finalRegexPattern, 'i'); + return regexExp.test(str); + }; + + return customValidator; +}; diff --git a/src/utils/create-threshold-validator.ts b/src/utils/create-threshold-validator.ts new file mode 100644 index 0000000..4b6cb84 --- /dev/null +++ b/src/utils/create-threshold-validator.ts @@ -0,0 +1,27 @@ +import type { CreateValidatorOptions, ThresholdBasedValidator } from '../types'; +import { defaultValidationThreshold } from '../constants'; +import { createMatchScoreCalculator } from './create-match-score-calculator'; +import { invariant } from './invariant'; + +export const createThresholdBasedValidator = ( + options: CreateValidatorOptions, +): ThresholdBasedValidator => { + invariant(options, `createThresholdBasedValidator: requires "options" to be supplied`); + invariant( + options?.characterSets, + `createThresholdBasedValidator: requires "options.characterSets" to be supplied`, + ); + + const calculateScore = createMatchScoreCalculator(options); + + const customValidator: ThresholdBasedValidator = ( + str: string, + threshold: number = defaultValidationThreshold, + ): boolean => { + const normalizedThreshold = Math.max(0, Math.min(100, threshold)); + const score = calculateScore(str); + return score >= normalizedThreshold; + }; + + return customValidator; +}; diff --git a/src/utils/create-unicode.ts b/src/utils/create-unicode.ts new file mode 100644 index 0000000..c501f4b --- /dev/null +++ b/src/utils/create-unicode.ts @@ -0,0 +1,11 @@ +import { isValidUnicode } from './is-valid-unicode'; +import { makeString } from './make-string'; +import { invariant } from './invariant'; + +export const createUnicode = (str: string): string => { + const isValidCode = isValidUnicode(str); + + invariant(isValidCode, `createUnicode: invalid unicode value supplied`); + + return makeString('[', str, ']'); +}; diff --git a/src/utils/create-unicodes.ts b/src/utils/create-unicodes.ts new file mode 100644 index 0000000..a1cb1cc --- /dev/null +++ b/src/utils/create-unicodes.ts @@ -0,0 +1,8 @@ +import { createUnicode } from './create-unicode'; + +export const createUnicodes = (unicodes: string[] = []): string[] => { + const regexRanges = unicodes.map(createUnicode); + const filteredUnicodes = regexRanges.filter(Boolean); + + return filteredUnicodes; +}; diff --git a/src/utils/index.ts b/src/utils/index.ts new file mode 100644 index 0000000..9688a89 --- /dev/null +++ b/src/utils/index.ts @@ -0,0 +1,13 @@ +export * from './create-regex-groups'; +export * from './is-valid-unicode'; +export * from './create-range'; +export * from './create-ranges'; +export * from './make-string'; +export * from './create-unicode'; +export * from './create-unicodes'; +export * from './options-to-regex'; +export * from './create-strict-validator'; +export * from './create-threshold-validator'; +export * from './create-match-score-calculator'; +export * from './match-string'; +export * from './invariant'; diff --git a/src/utils/invariant.ts b/src/utils/invariant.ts new file mode 100644 index 0000000..80ca4f2 --- /dev/null +++ b/src/utils/invariant.ts @@ -0,0 +1,20 @@ +// const defaultMessage = 'Invariant failed, an error occurred in " library'; + +// Throw an error if the condition fails +// Strip out error messages for production +// > Not providing an inline default argument for message as the result is smaller + +import { makeString } from './make-string'; + +export function invariant( + condition: any, // eslint-disable-line @typescript-eslint/no-explicit-any + // Can provide a string, or a function that returns a string for cases where + // the message takes a fair amount of effort to compute + message: string, +): asserts condition { + if (condition) { + return; + } + const value: string = makeString('[japanese-moji]: ', message); + throw new Error(value); +} diff --git a/src/utils/is-valid-unicode.ts b/src/utils/is-valid-unicode.ts new file mode 100644 index 0000000..0f70e8e --- /dev/null +++ b/src/utils/is-valid-unicode.ts @@ -0,0 +1,11 @@ +const VALID_UNICODE_PATTERN = /^[\\][u][0-9A-F]{4,6}$/; + +export const isValidUnicode = (str: string): boolean => { + if (str) { + // recreate new regex everytime to avoid index caching issues + // const unicodeRegex = new RegExp(VALID_UNICODE_PATTERN, 'i'); + return new RegExp(VALID_UNICODE_PATTERN, 'ui').test(str); + } + + return false; +}; diff --git a/src/utils/make-string.ts b/src/utils/make-string.ts new file mode 100644 index 0000000..be887a8 --- /dev/null +++ b/src/utils/make-string.ts @@ -0,0 +1,3 @@ +export const makeString = (...args: (string | number)[]): string => { + return args.join(''); +}; diff --git a/src/utils/match-string.ts b/src/utils/match-string.ts new file mode 100644 index 0000000..37e3f56 --- /dev/null +++ b/src/utils/match-string.ts @@ -0,0 +1,6 @@ +export const matchString = (str: string, regexPattern: string, flags?: string): string => { + const regExp = new RegExp(regexPattern, flags); + const result = String(str).match(regExp); + + return result?.join('') || ''; +}; diff --git a/src/utils/options-to-regex.ts b/src/utils/options-to-regex.ts new file mode 100644 index 0000000..76cfbb3 --- /dev/null +++ b/src/utils/options-to-regex.ts @@ -0,0 +1,14 @@ +import type { CreateValidatorOptions } from '../types'; +import { createRanges } from './create-ranges'; +import { createRegexGroups } from './create-regex-groups'; +import { createUnicodes } from './create-unicodes'; +import { characterSetMap } from '../constants'; + +export const optionsToRegex = (options: CreateValidatorOptions): string => { + const customRangesRegex = createRanges(options?.customRanges || []); + const customUnicodeRegex = createUnicodes(options?.customUnicodes); + const mergedCustomRanges = [...customRangesRegex, ...customUnicodeRegex]; + const regexGroup = createRegexGroups(characterSetMap, options.characterSets, mergedCustomRanges); + + return regexGroup; +}; diff --git a/src/validators/index.ts b/src/validators/index.ts new file mode 100644 index 0000000..8397a90 --- /dev/null +++ b/src/validators/index.ts @@ -0,0 +1,3 @@ +export * from './kanji'; +export * from './kana'; +export * from './japanese'; diff --git a/src/validators/japanese.ts b/src/validators/japanese.ts new file mode 100644 index 0000000..8a67794 --- /dev/null +++ b/src/validators/japanese.ts @@ -0,0 +1,27 @@ +import { CharacterSet, CreateValidatorOptions } from '../types'; +import { + createMatchScoreCalculator, + createStrictValidator, + createThresholdBasedValidator, +} from '../utils'; + +const options: CreateValidatorOptions = { + characterSets: [ + CharacterSet.CJKPunctuations, + CharacterSet.Hiragana, + CharacterSet.Katakana, + CharacterSet.KatakanaPhoneticExtension, + CharacterSet.RareKanji, + CharacterSet.CommonUncommonKanji, + CharacterSet.KanjiCompatibilityIdeographs, + CharacterSet.RomanHalfwidthKatakana, + ], +}; + +/** + * Validates Japanese characters in a string + * Includes Kanji, Kana, Hiragana, Half width kana and CJK punctuations + */ +export const isValidJapanese = createStrictValidator(options); +export const isJapanesePresent = createThresholdBasedValidator(options); +export const howMuchJapaneseIsPresent = createMatchScoreCalculator(options); diff --git a/src/validators/kana.ts b/src/validators/kana.ts new file mode 100644 index 0000000..6182df0 --- /dev/null +++ b/src/validators/kana.ts @@ -0,0 +1,22 @@ +import { CharacterSet, CreateValidatorOptions } from '../types'; +import { + createMatchScoreCalculator, + createStrictValidator, + createThresholdBasedValidator, +} from '../utils'; + +const options: CreateValidatorOptions = { + characterSets: [ + CharacterSet.KatakanaPhoneticExtension, + CharacterSet.Katakana, + CharacterSet.Hiragana, + ], +}; + +/** + * Validates Kana characters in a string + * Only includes regular characters without punctuations + */ +export const isValidKana = createStrictValidator(options); +export const isKanaPresent = createThresholdBasedValidator(options); +export const howMuchKanaIsPresent = createMatchScoreCalculator(options); diff --git a/src/validators/kanji.ts b/src/validators/kanji.ts new file mode 100644 index 0000000..0182444 --- /dev/null +++ b/src/validators/kanji.ts @@ -0,0 +1,21 @@ +import { CharacterSet, CreateValidatorOptions } from '../types'; +import { + createMatchScoreCalculator, + createStrictValidator, + createThresholdBasedValidator, +} from '../utils'; + +const options: CreateValidatorOptions = { + characterSets: [ + CharacterSet.RareKanji, + CharacterSet.CommonUncommonKanji, + CharacterSet.KanjiCompatibilityIdeographs, + ], +}; + +/** + * Validates Kanji characters in a string + */ +export const isValidKanji = createStrictValidator(options); +export const isKanjiPresent = createThresholdBasedValidator(options); +export const howMuchKanjiIsPresent = createMatchScoreCalculator(options);