From 0dec496727ff57c569de17a3f6ffe611abec118e Mon Sep 17 00:00:00 2001 From: dgw Date: Sat, 7 Aug 2021 00:30:17 -0700 Subject: [PATCH] Merge pull request #2162 from sopel-irc/tell-stripping tell: do not lstrip leading formatting Cherry-picked from master: 8eee9ab08023ba7e979a1d6b7b1c18a5e9f90814 --- sopel/modules/tell.py | 42 +++++++++++- test/modules/test_modules_tell.py | 102 ++++++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 2 deletions(-) diff --git a/sopel/modules/tell.py b/sopel/modules/tell.py index f96f3111bb..2a2717511a 100644 --- a/sopel/modules/tell.py +++ b/sopel/modules/tell.py @@ -15,8 +15,9 @@ import os import threading import time +import unicodedata -from sopel import plugin, tools +from sopel import formatting, plugin, tools from sopel.config import types from sopel.tools.time import format_time, get_timezone @@ -130,6 +131,43 @@ def shutdown(bot): pass +def _format_safe_lstrip(text): + """``str.lstrip()`` but without eating IRC formatting. + + :param str text: text to clean + :rtype: str + :raises TypeError: if the passed ``text`` is not a string + + Stolen and tweaked from the ``choose`` plugin's ``_format_safe()`` + function by the person who wrote it. + """ + if not isinstance(text, str): + raise TypeError("A string is required.") + elif not text: + # unnecessary optimization + return '' + + start = 0 + + # strip left + pos = 0 + while pos < len(text): + is_whitespace = unicodedata.category(text[pos]) == 'Zs' + is_non_printing = ( + text[pos] in formatting.CONTROL_NON_PRINTING and + text[pos] not in formatting.CONTROL_FORMATTING + ) + if not is_whitespace and not is_non_printing: + start = pos + break + pos += 1 + else: + # skipped everything; string is all whitespace + return '' + + return text[start:] + + @plugin.command('tell', 'ask') @plugin.nickname_command('tell', 'ask') @plugin.example('$nickname, tell dgw he broke something again.') @@ -143,7 +181,7 @@ def f_remind(bot, trigger): return tellee = trigger.group(3).rstrip('.,:;') - msg = trigger.group(2).lstrip(tellee).lstrip() + msg = _format_safe_lstrip(trigger.group(2).split(' ', 1)[1]) if not msg: bot.reply("%s %s what?" % (verb, tellee)) diff --git a/test/modules/test_modules_tell.py b/test/modules/test_modules_tell.py index 66c113f0d0..9c4d6de3c7 100644 --- a/test/modules/test_modules_tell.py +++ b/test/modules/test_modules_tell.py @@ -6,6 +6,9 @@ import io import os +import pytest + +from sopel import formatting from sopel.modules import tell @@ -229,3 +232,102 @@ def test_get_reminders(): 'Exirel: ' '%s - 14:35:55UTC ' ' tell Exirel Thanks for the review.' % today) + + +# Test custom lstrip implementation + +UNICODE_ZS_CATEGORY = [ + '\u0020', # SPACE + '\u00A0', # NO-BREAK SPACE + '\u1680', # OGHAM SPACE MARK + '\u2000', # EN QUAD + '\u2001', # EM QUAD + '\u2002', # EN SPACE + '\u2003', # EM SPACE + '\u2004', # THREE-PER-EM SPACE + '\u2005', # FOUR-PER-EM SPACE + '\u2006', # SIX-PER-EM SPACE + '\u2007', # FIGURE SPACE + '\u2008', # PUNCTUATION SPACE + '\u2009', # THIN SPACE + '\u200A', # HAIR SPACE + '\u202F', # NARROW NO-BREAK SPACE + '\u205F', # MEDIUM MATHEMATICAL SPACE + '\u3000', # IDEOGRAPHIC SPACE +] + +SAFE_PAIRS = ( + # regression checks vs. old string.lstrip() + ('', + ''), + ('a', # one iteration of this code returned '' for one-char strings + 'a'), + ('aa', + 'aa'), + # basic whitespace + (' leading space', # removed + 'leading space'), + ('trailing space ', # kept + 'trailing space '), + (' leading AND trailing space ', # removed AND kept + 'leading AND trailing space '), + # advanced whitespace + ('\tleading tab', # removed + 'leading tab'), + ('trailing tab\t', # kept + 'trailing tab\t'), + # whitespace inside formatting (kept) + ('\x02 leading space inside formatting\x02', + '\x02 leading space inside formatting\x02'), + ('\x02trailing space inside formatting \x02', + '\x02trailing space inside formatting \x02'), + ('\x02 leading AND trailing inside formatting \x02', + '\x02 leading AND trailing inside formatting \x02'), + # whitespace outside formatting + (' \x02leading space outside formatting\x02', # removed + '\x02leading space outside formatting\x02'), + ('\x02trailing space outside formatting\x02 ', # kept + '\x02trailing space outside formatting\x02 '), + # whitespace both inside and outside formatting + (' \x02 leading space inside AND outside\x02', # outside removed + '\x02 leading space inside AND outside\x02'), + ('\x02trailing space inside AND outside \x02 ', # left alone + '\x02trailing space inside AND outside \x02 '), + (' \x02 leading AND trailing inside AND outside \x02 ', # only leading removed + '\x02 leading AND trailing inside AND outside \x02 '), +) + + +def test_format_safe_lstrip_basic(): + """Test handling of basic whitespace.""" + assert tell._format_safe_lstrip( + ''.join(UNICODE_ZS_CATEGORY)) == '' + + +def test_format_safe_lstrip_control(): + """Test handling of non-printing control characters.""" + all_formatting = ''.join(formatting.CONTROL_FORMATTING) + + # no formatting chars should be stripped, + # but a reset should be added to the end + assert tell._format_safe_lstrip(all_formatting) == all_formatting + + # control characters not recognized as formatting should be stripped + assert tell._format_safe_lstrip( + ''.join( + c + for c in formatting.CONTROL_NON_PRINTING + if c not in formatting.CONTROL_FORMATTING + )) == '' + + +def test_format_safe_lstrip_invalid_arg(): + """Test for correct exception if non-string is passed.""" + with pytest.raises(TypeError): + tell._format_safe_lstrip(None) + + +@pytest.mark.parametrize('text, cleaned', SAFE_PAIRS) +def test_format_safe_lstrip_pairs(text, cleaned): + """Test expected formatting-safe string sanitization.""" + assert tell._format_safe_lstrip(text) == cleaned