From 424acc5cd91cdc3ab7d001d7078bdd23e5deb977 Mon Sep 17 00:00:00 2001 From: cvzi Date: Fri, 26 Nov 2021 23:48:13 +0100 Subject: [PATCH] use_aliases=True overrides language='...' (this restores the behaviour of previous versions) Bugfix: Default delimiters were used instead of the custom delimiters in emojize() when an unknown emoji was found. --- emoji/core.py | 31 +++++++++++------------ tests/test_core.py | 62 +++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 22 deletions(-) diff --git a/emoji/core.py b/emoji/core.py index b085f88b..d9967a31 100644 --- a/emoji/core.py +++ b/emoji/core.py @@ -54,7 +54,8 @@ def emojize( :param use_aliases: (optional) Enable emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``. :param delimiters: (optional) Use delimiters other than _DEFAULT_DELIMITER :param variant: (optional) Choose variation selector between "base"(None), VS-15 ("text_type") and VS-16 ("emoji_type") - :param language: Choose language of emoji name + :param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias' + to use English aliases :param version: (optional) Max version. If set to an Emoji Version, all emoji above this version will be ignored. :param handle_version: (optional) Replace the emoji above ``version`` @@ -77,23 +78,21 @@ def emojize( """ - if use_aliases and (language not in ('en', 'alias')): - warnings.warn("use_aliases=True is only supported for language='en'. " - "It is recommended to use emojize(string, language='alias') instead", stacklevel=2) - - use_aliases = (use_aliases and language == 'en') or language == 'alias' + if use_aliases or language == 'alias': + if language not in ('en', 'alias'): + warnings.warn("use_aliases=True is only supported for language='en'. " + "It is recommended to use emojize(string, language='alias') instead", stacklevel=2) + use_aliases = True + language = 'en' EMOJI_UNICODE = unicode_codes.EMOJI_ALIAS_UNICODE_ENGLISH if use_aliases else unicode_codes.EMOJI_UNICODE[language] pattern = re.compile(u'(%s[\\w\\-&.’”“()!#*+?–,/]+%s)' % delimiters, flags=re.UNICODE) def replace(match): - mg = match.group(1).replace(delimiters[0], _DEFAULT_DELIMITER).replace( - delimiters[1], _DEFAULT_DELIMITER - ) - - emj = EMOJI_UNICODE.get(mg) + mg = match.group(1)[len(delimiters[0]):-len(delimiters[1])] + emj = EMOJI_UNICODE.get(_DEFAULT_DELIMITER + mg + _DEFAULT_DELIMITER) if emj is None: - return mg + return match.group(1) if version is not None: if emj in unicode_codes.EMOJI_DATA and unicode_codes.EMOJI_DATA[emj]['E'] > version: @@ -140,7 +139,8 @@ def demojize( :param string: String contains unicode characters. MUST BE UNICODE. :param use_aliases: (optional) Return emoji aliases. See ``emoji.UNICODE_EMOJI_ALIAS``. :param delimiters: (optional) User delimiters other than ``_DEFAULT_DELIMITER`` - :param language: (optional) Choose language of emoji name + :param language: Choose language of emoji name: language code 'es', 'de', etc. or 'alias' + to use English aliases :param version: (optional) Max version. If set to an Emoji Version, all emoji above this version will be removed. :param handle_version: (optional) Replace the emoji above ``version`` @@ -165,11 +165,10 @@ def demojize( if language == 'alias': language = 'en' use_aliases = True - else: - if use_aliases and language != 'en': + elif use_aliases and language != 'en': warnings.warn("use_aliases=True is only supported for language='en'. " "It is recommended to use demojize(string, language='alias') instead", stacklevel=2) - use_aliases = use_aliases and language == 'en' + language = 'en' tree = _get_search_tree() result = [] diff --git a/tests/test_core.py b/tests/test_core.py index 03d93de1..7cef4b08 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -11,6 +11,11 @@ import pytest +def ascii(s): + # return escaped Code points \U000AB123 + return s.encode("unicode-escape").decode() + + def test_emojize_name_only(): for lang_code, emoji_pack in emoji.EMOJI_UNICODE.items(): for name in emoji_pack.keys(): @@ -108,6 +113,8 @@ def test_emojize_invalid_emoji(): string = '__---___--Invalid__--__-Name' assert emoji.emojize(string, False) == string + string = ':: baby:: :_: : : : : :-: :+:' + assert emoji.emojize(string, False) == string def test_alias(): # When use_aliases=False aliases should be passed through untouched @@ -162,20 +169,28 @@ def test_invalid_alias(): variant="text_type") == ':socer:' -@pytest.mark.filterwarnings("ignore") def test_alias_wrong_language(): # Alias with wrong languages thailand = u'🇹🇭' with pytest.warns(UserWarning) as w: emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") - assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") == ':flag_for_Thailand:' + with pytest.warns(UserWarning) as w: + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="de") == thailand + with pytest.warns(UserWarning) as w: + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="es") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="es") == ':flag_for_Thailand:' assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="en") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=False, language="alias") == thailand + assert emoji.emojize(':flag_for_Thailand:', use_aliases=True, language="alias") == thailand with pytest.warns(UserWarning) as w: emoji.demojize(thailand, use_aliases=True, language="es") - assert emoji.demojize(thailand, use_aliases=True, language="es") == ':bandera_tailandia:' + with pytest.warns(UserWarning) as w: + assert emoji.demojize(thailand, use_aliases=True, language="es") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, use_aliases=False, language="es") == ':bandera_tailandia:' assert emoji.demojize(thailand, use_aliases=True, language="en") == ':flag_for_Thailand:' - + assert emoji.demojize(thailand, use_aliases=False, language="alias") == ':flag_for_Thailand:' + assert emoji.demojize(thailand, use_aliases=True, language="alias") == ':flag_for_Thailand:' def test_demojize_name_only(): for emj, item in emoji.EMOJI_DATA.items(): @@ -200,11 +215,23 @@ def test_demojize_complicated_string(): def test_demojize_delimiters(): for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']: - for d in [(":", ":"), ("a", "b"), ("123", "456"), (u"😁", u"👌")]: + for d in [(":", ":"), ("a", "b"), ("!", "!!"), ("123", "456"), (u"😁", u"👌")]: s = emoji.demojize(e, delimiters=d) assert s.startswith(d[0]) assert s.endswith(d[1]) + text = u"Example of a text with an emoji%sin a sentence" + for e in [u'\U000026BD', u'\U0001f44d', u'\U0001F3C8']: + for d in [(":", ":"), ("!", "-!-"), ("-", "-"), (":", "::"), ("::", "::"), (u"😁", u"👌")]: + text_with_unicode = text % e + demojized_text = emoji.demojize(text_with_unicode, delimiters=d) + assert text_with_unicode != demojized_text + assert e not in demojized_text + assert emoji.emojize(demojized_text, delimiters=d) == text_with_unicode + text_with_emoji = text % emoji.demojize(e, delimiters=d) + assert demojized_text == text_with_emoji + assert emoji.emojize(text_with_emoji, delimiters=d) == text_with_unicode + def test_emoji_lis(): assert emoji.emoji_lis('Hi, I am 👌 test')[0]['location'] == 9 @@ -266,7 +293,7 @@ def test_untranslated(): # untranslated value = emoji.emojize(item['en'], language='en') roundtrip = emoji.demojize(value, language='es') - assert roundtrip == value, '%s != %s (from %s)' % (roundtrip.encode("unicode-escape").decode(), value.encode("unicode-escape").decode(), item['en']) + assert roundtrip == value, '%s != %s (from %s)' % (ascii(roundtrip), ascii(value), item['en']) else: # translated value = emoji.emojize(item['en'], language='en') @@ -377,5 +404,28 @@ def clean(s): def test_text_multiple_times(): + # Run test_text() multiple times because it relies on a random text for i in range(100): test_text() + + +def test_invalid_chars(): + invalidchar = u"\U0001F20F" + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + + invalidchar = u"u\2302 ⌂" + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + assert emoji.demojize(invalidchar) == invalidchar, "%r != %r" % (ascii(emoji.demojize(invalidchar)), ascii(invalidchar)) + + +def test_combine_with_component(): + text = u"Example of a combined emoji%sin a sentence" + + combined = emoji.emojize(text % u":woman_dark_skin_tone:") + seperated = emoji.emojize(text % u":woman::dark_skin_tone:") + assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated)) + + combined = emoji.emojize(text % u":woman_dark_skin_tone_white_hair:") + seperated = emoji.emojize(text % u":woman::dark_skin_tone:\u200d:white_hair:") + assert combined == seperated, "%r != %r" % (ascii(combined), ascii(seperated))