From f97f25ef5dfcdfec0d9a359fd970abd139cf3428 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 5 Mar 2024 17:49:01 +0200 Subject: [PATCH] gh-76511: Fix email.Message.as_string() for non-ASCII message with ASCII charset (GH-116125) --- Lib/email/generator.py | 2 +- Lib/email/message.py | 2 +- Lib/test/test_email/test_email.py | 15 +++++++++++++++ .../2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst | 4 ++++ 4 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst diff --git a/Lib/email/generator.py b/Lib/email/generator.py index 7ccbe10eb76856..c8056ad47baa0f 100644 --- a/Lib/email/generator.py +++ b/Lib/email/generator.py @@ -243,7 +243,7 @@ def _handle_text(self, msg): # existing message. msg = deepcopy(msg) del msg['content-transfer-encoding'] - msg.set_payload(payload, charset) + msg.set_payload(msg._payload, charset) payload = msg.get_payload() self._munge_cte = (msg['content-transfer-encoding'], msg['content-type']) diff --git a/Lib/email/message.py b/Lib/email/message.py index fe769580fed5d0..a14cca56b3745a 100644 --- a/Lib/email/message.py +++ b/Lib/email/message.py @@ -340,7 +340,7 @@ def set_payload(self, payload, charset=None): return if not isinstance(charset, Charset): charset = Charset(charset) - payload = payload.encode(charset.output_charset) + payload = payload.encode(charset.output_charset, 'surrogateescape') if hasattr(payload, 'decode'): self._payload = payload.decode('ascii', 'surrogateescape') else: diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index 39d4ace8d4a1d8..d9af05c306eb30 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -337,6 +337,21 @@ def test_nonascii_as_string_without_cte(self): msg = email.message_from_bytes(source) self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_with_ascii_charset(self): + m = textwrap.dedent("""\ + MIME-Version: 1.0 + Content-type: text/plain; charset="us-ascii" + Content-Transfer-Encoding: 8bit + + Test if non-ascii messages with no Content-Transfer-Encoding set + can be as_string'd: + Föö bär + """) + source = m.encode('iso-8859-1') + expected = source.decode('ascii', 'replace') + msg = email.message_from_bytes(source) + self.assertEqual(msg.as_string(), expected) + def test_nonascii_as_string_without_content_type_and_cte(self): m = textwrap.dedent("""\ MIME-Version: 1.0 diff --git a/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst new file mode 100644 index 00000000000000..da62f8a2450711 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-29-17-06-54.gh-issue-76511.WqjRLP.rst @@ -0,0 +1,4 @@ +Fix UnicodeEncodeError in :meth:`email.Message.as_string` that results when +a message that claims to be in the ascii character set actually has non-ascii +characters. Non-ascii characters are now replaced with the U+FFFD replacement +character, like in the ``replace`` error handler.