Skip to content

Commit

Permalink
use_aliases=True overrides language='...' (this restores the behaviou…
Browse files Browse the repository at this point in the history
…r of previous versions)

Bugfix: Default delimiters were used instead of the custom delimiters in emojize() when an unknown emoji was found.
  • Loading branch information
cvzi committed Nov 26, 2021
1 parent 148d531 commit 424acc5
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 22 deletions.
31 changes: 15 additions & 16 deletions emoji/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def emojize(
:param use_aliases: (optional) Enable emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``.
:param delimiters: (optional) Use delimiters other than _DEFAULT_DELIMITER
:param variant: (optional) Choose variation selector between "base"(None), VS-15 ("text_type") and VS-16 ("emoji_type")
:param language: Choose language of emoji name
:param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias'
to use English aliases
:param version: (optional) Max version. If set to an Emoji Version,
all emoji above this version will be ignored.
:param handle_version: (optional) Replace the emoji above ``version``
Expand All @@ -77,23 +78,21 @@ def emojize(
"""

if use_aliases and (language not in ('en', 'alias')):
warnings.warn("use_aliases=True is only supported for language='en'. "
"It is recommended to use emojize(string, language='alias') instead", stacklevel=2)

use_aliases = (use_aliases and language == 'en') or language == 'alias'
if use_aliases or language == 'alias':
if language not in ('en', 'alias'):
warnings.warn("use_aliases=True is only supported for language='en'. "
"It is recommended to use emojize(string, language='alias') instead", stacklevel=2)
use_aliases = True
language = 'en'

EMOJI_UNICODE = unicode_codes.EMOJI_ALIAS_UNICODE_ENGLISH if use_aliases else unicode_codes.EMOJI_UNICODE[language]
pattern = re.compile(u'(%s[\\w\\-&.’”“()!#*+?–,/]+%s)' % delimiters, flags=re.UNICODE)

def replace(match):
mg = match.group(1).replace(delimiters[0], _DEFAULT_DELIMITER).replace(
delimiters[1], _DEFAULT_DELIMITER
)

emj = EMOJI_UNICODE.get(mg)
mg = match.group(1)[len(delimiters[0]):-len(delimiters[1])]
emj = EMOJI_UNICODE.get(_DEFAULT_DELIMITER + mg + _DEFAULT_DELIMITER)
if emj is None:
return mg
return match.group(1)

if version is not None:
if emj in unicode_codes.EMOJI_DATA and unicode_codes.EMOJI_DATA[emj]['E'] > version:
Expand Down Expand Up @@ -140,7 +139,8 @@ def demojize(
:param string: String contains unicode characters. MUST BE UNICODE.
:param use_aliases: (optional) Return emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``.
:param delimiters: (optional) User delimiters other than ``_DEFAULT_DELIMITER``
:param language: (optional) Choose language of emoji name
:param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias'
to use English aliases
:param version: (optional) Max version. If set to an Emoji Version,
all emoji above this version will be removed.
:param handle_version: (optional) Replace the emoji above ``version``
Expand All @@ -165,11 +165,10 @@ def demojize(
if language == 'alias':
language = 'en'
use_aliases = True
else:
if use_aliases and language != 'en':
elif use_aliases and language != 'en':
warnings.warn("use_aliases=True is only supported for language='en'. "
"It is recommended to use demojize(string, language='alias') instead", stacklevel=2)
use_aliases = use_aliases and language == 'en'
language = 'en'

tree = _get_search_tree()
result = []
Expand Down
62 changes: 56 additions & 6 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
import pytest


def ascii(s):
# return escaped Code points \U000AB123
return s.encode("unicode-escape").decode()


def test_emojize_name_only():
for lang_code, emoji_pack in emoji.EMOJI_UNICODE.items():
for name in emoji_pack.keys():
Expand Down Expand Up @@ -108,6 +113,8 @@ def test_emojize_invalid_emoji():
string = '__---___--Invalid__--__-Name'
assert emoji.emojize(string, False) == string

string = ':: baby:: :_: : : : : :-: :+:'
assert emoji.emojize(string, False) == string

def test_alias():
# When use_aliases=False aliases should be passed through untouched
Expand Down Expand Up @@ -162,20 +169,28 @@ def test_invalid_alias():
variant="text_type") == ':socer:'


@pytest.mark.filterwarnings("ignore")
def test_alias_wrong_language():
# Alias with wrong languages
thailand = u'🇹🇭'
with pytest.warns(UserWarning) as w:
emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es")
assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") == ':flag_for_Thailand:'
with pytest.warns(UserWarning) as w:
assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="de") == thailand
with pytest.warns(UserWarning) as w:
assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") == thailand
assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="es") == ':flag_for_Thailand:'
assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="en") == thailand
assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="alias") == thailand
assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="alias") == thailand

with pytest.warns(UserWarning) as w:
emoji.demojize(thailand, use_aliases=True, language="es")
assert emoji.demojize(thailand, use_aliases=True, language="es") == ':bandera_tailandia:'
with pytest.warns(UserWarning) as w:
assert emoji.demojize(thailand, use_aliases=True, language="es") == ':flag_for_Thailand:'
assert emoji.demojize(thailand, use_aliases=False, language="es") == ':bandera_tailandia:'
assert emoji.demojize(thailand, use_aliases=True, language="en") == ':flag_for_Thailand:'

assert emoji.demojize(thailand, use_aliases=False, language="alias") == ':flag_for_Thailand:'
assert emoji.demojize(thailand, use_aliases=True, language="alias") == ':flag_for_Thailand:'

def test_demojize_name_only():
for emj, item in emoji.EMOJI_DATA.items():
Expand All @@ -200,11 +215,23 @@ def test_demojize_complicated_string():

def test_demojize_delimiters():
for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']:
for d in [(":", ":"), ("a", "b"), ("123", "456"), (u"😁", u"👌")]:
for d in [(":", ":"), ("a", "b"), ("!", "!!"), ("123", "456"), (u"😁", u"👌")]:
s = emoji.demojize(e, delimiters=d)
assert s.startswith(d[0])
assert s.endswith(d[1])

text = u"Example of a text with an emoji%sin a sentence"
for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']:
for d in [(":", ":"), ("!", "-!-"), ("-", "-"), (":", "::"), ("::", "::"), (u"😁", u"👌")]:
text_with_unicode = text % e
demojized_text = emoji.demojize(text_with_unicode, delimiters=d)
assert text_with_unicode != demojized_text
assert e not in demojized_text
assert emoji.emojize(demojized_text, delimiters=d) == text_with_unicode
text_with_emoji = text % emoji.demojize(e, delimiters=d)
assert demojized_text == text_with_emoji
assert emoji.emojize(text_with_emoji, delimiters=d) == text_with_unicode


def test_emoji_lis():
assert emoji.emoji_lis('Hi, I am 👌 test')[0]['location'] == 9
Expand Down Expand Up @@ -266,7 +293,7 @@ def test_untranslated():
# untranslated
value = emoji.emojize(item['en'], language='en')
roundtrip = emoji.demojize(value, language='es')
assert roundtrip == value, '%s != %s (from %s)' % (roundtrip.encode("unicode-escape").decode(), value.encode("unicode-escape").decode(), item['en'])
assert roundtrip == value, '%s != %s (from %s)' % (ascii(roundtrip), ascii(value), item['en'])
else:
# translated
value = emoji.emojize(item['en'], language='en')
Expand Down Expand Up @@ -377,5 +404,28 @@ def clean(s):


def test_text_multiple_times():
# Run test_text() multiple times because it relies on a random text
for i in range(100):
test_text()


def test_invalid_chars():
invalidchar = u"\U0001F20F"
assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar))
assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar))

invalidchar = u"u\2302 ⌂"
assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar))
assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar))


def test_combine_with_component():
text = u"Example of a combined emoji%sin a sentence"

combined = emoji.emojize(text % u":woman_dark_skin_tone:")
seperated = emoji.emojize(text % u":woman::dark_skin_tone:")
assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated))

combined = emoji.emojize(text % u":woman_dark_skin_tone_white_hair:")
seperated = emoji.emojize(text % u":woman::dark_skin_tone:\u200d:white_hair:")
assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated))

0 comments on commit 424acc5

Please sign in to comment.