From f835b53da4d7285c0ce00933ca9e84286517b3d4 Mon Sep 17 00:00:00 2001 From: Miles Johnson Date: Sat, 8 Jun 2024 17:59:28 -0700 Subject: [PATCH] fix: Improve regex patterns. (#175) * Update regex. * Go back. * Add more tests. * Fix. --- bin/generateRegex.mjs | 11 +++++++++++ package.json | 1 + packages/regex/tests/regex.test.ts | 31 +++++++++++++++++++++++++++--- 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 bin/generateRegex.mjs diff --git a/bin/generateRegex.mjs b/bin/generateRegex.mjs new file mode 100644 index 00000000..4dfba8df --- /dev/null +++ b/bin/generateRegex.mjs @@ -0,0 +1,11 @@ +#!/usr/bin/env node + +import { generateRegex } from 'emojibase-generator'; + +function handleError(error) { + console.error(error.message, error.stack); +} + +process.on('unhandledRejection', handleError); + +generateRegex().catch(handleError); diff --git a/package.json b/package.json index e0678755..17f461a8 100644 --- a/package.json +++ b/package.json @@ -13,6 +13,7 @@ "generate": "yarn run generate:emojis && yarn run generate:shortcodes && node ./bin/buildFilesizeTable.mjs && yarn run generate:tests", "generate:emojis": "rm -rf ./cache/final && node ./bin/generateEmoji.mjs", "generate:po": "node ./bin/generatePo.mjs", + "generate:regex": "node ./bin/generateRegex.mjs", "generate:shortcodes": "node ./bin/generateShortcodes.mjs", "generate:tests": "cp ./packages/data/en/data.json ./packages/test-utils/test-data.json && cp ./packages/data/en/shortcodes/emojibase.json ./packages/test-utils/test-shortcodes.json && cp ./packages/data/en/messages.json ./packages/test-utils/test-messages.json", "generate:types": "node ./bin/generateTypes.mjs", diff --git a/packages/regex/tests/regex.test.ts b/packages/regex/tests/regex.test.ts index 560bc3bc..4968fd5f 100644 --- a/packages/regex/tests/regex.test.ts +++ b/packages/regex/tests/regex.test.ts @@ -72,7 +72,8 @@ describe('regex', () => { tests.push({ type: 'emoji', unicode: emoji.emoji, - pass: [...BASE_PATTERNS, 'emojiLoose', 'emojiCodepoint', 'emojiCodepointLoose'], + pass: [...BASE_PATTERNS, 'emoji', 'emojiLoose', 'emojiCodepoint', 'emojiCodepointLoose'], + // fail: ['text', 'textLoose', 'textCodepoint', 'textCodepointLoose'], }); } @@ -81,11 +82,12 @@ describe('regex', () => { tests.push({ type: 'text', unicode: emoji.text, - pass: [...BASE_PATTERNS, 'textLoose', 'textCodepoint', 'textCodepointLoose'], + pass: [...BASE_PATTERNS, 'text', 'textLoose', 'textCodepoint', 'textCodepointLoose'], + // fail: ['emoji', 'emojiLoose', 'emojiCodepoint', 'emojiCodepointLoose'], }); } - tests.forEach(({ unicode, type, pass }) => { + tests.forEach(({ unicode, type, pass, fail }) => { describe(`${VARIATION_DESCRIPTIONS[type]}`, () => { pass.forEach((passType) => { const pattern = PATTERNS[passType]; @@ -113,6 +115,17 @@ describe('regex', () => { }); }); }); + + fail?.forEach((passType) => { + const pattern = PATTERNS[passType]; + + describe(`fails ${PATTERN_DESCRIPTIONS[passType]}`, () => { + it(`doesnt match unicode by itself for ${unicode}`, () => { + const match = unicode.match(pattern)!; + expect(match).toBeNull(); + }); + }); + }); }); }); @@ -194,4 +207,16 @@ describe('regex', () => { }); }); }); + + it('manual cases', () => { + expect('👍🏻'.match(PATTERNS.combo)).not.toBeNull(); + expect('👍🏻'.match(PATTERNS.emoji)).not.toBeNull(); + expect('👍🏻'.match(PATTERNS.emojiLoose)).not.toBeNull(); + // expect('👍🏻'.match(PATTERNS.text)).toBeNull(); + + expect('👍'.match(PATTERNS.combo)).not.toBeNull(); + expect('👍'.match(PATTERNS.emoji)).toBeNull(); + expect('👍'.match(PATTERNS.emojiLoose)).not.toBeNull(); + // expect('👍'.match(PATTERNS.text)).toBeNull(); + }); });