Skip to content

Commit

Permalink
Normalize emoji name to NFKC to find it in EMOJI_DATA
Browse files Browse the repository at this point in the history
Put the regular expression in a global variable _EMOJI_NAME_PATTERN, so it can be used by the tests
Escape characters in the regular expression and remove some unused characters
  • Loading branch information
cvzi committed Oct 5, 2022
1 parent fff013b commit 5c19561
Showing 1 changed file with 14 additions and 4 deletions.
18 changes: 14 additions & 4 deletions emoji/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
"""

import sys
import unicodedata
import re

from emoji import unicode_codes
Expand All @@ -21,6 +23,14 @@

_SEARCH_TREE = None
_DEFAULT_DELIMITER = ':'
_EMOJI_NAME_PATTERN = u'\\w\\-&.’”“()!#*+?–,/«»\u0300\u0301\u0302\u0303\u0308\u030a\u0327\u064b\u064e\u064f\u0650\u0653\u0654'
_PY2 = sys.version_info[0] == 2


def _normalize(form, s):
if _PY2:
s = unicode(s)
return unicodedata.normalize(form, s)


def emojize(
Expand Down Expand Up @@ -78,12 +88,12 @@ def emojize(
else:
language_pack = unicode_codes.get_emoji_unicode_dict(language)

pattern = re.compile(u'(%s[\\w\\-&.’”“()!#*+?–,/ًٌٍَُِّْؤئيإأآةك‌ٔء«»]+%s)' %
(re.escape(delimiters[0]), re.escape(delimiters[1])), flags=re.UNICODE)
pattern = re.compile(u'(%s[%s]+%s)' %
(re.escape(delimiters[0]), _EMOJI_NAME_PATTERN, re.escape(delimiters[1])), flags=re.UNICODE)

def replace(match):
mg = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
emj = language_pack.get(_DEFAULT_DELIMITER + mg + _DEFAULT_DELIMITER)
name = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
emj = language_pack.get(_DEFAULT_DELIMITER + _normalize('NFKC', name) + _DEFAULT_DELIMITER)
if emj is None:
return match.group(1)

Expand Down

0 comments on commit 5c19561

Please sign in to comment.