Skip to content

Commit

Permalink
Fix for email.generator.Generator with whitespace between encoded words.
Browse files Browse the repository at this point in the history
email.generator.Generator currently does not handle whitespace between
encoded words correctly when the encoded words span multiple lines.  The
current generator will create an encoded word for each line.  If the end
of the line happens to correspond with the end real word in the
plaintext, the generator will place an unencoded space at the start of
the subsequent lines to represent the whitespace between the plaintext
words.

A compliant decoder will strip all the whitespace from between two
encoded words which leads to missing spaces in the round-tripped
output.

The fix for this is to make sure that whitespace between two encoded
words ends up inside of one or the other of the encoded words.  This
fix places the space inside of the second encoded word.

Test case from python#92081
  • Loading branch information
abadger committed May 4, 2022
1 parent feca9bb commit eb1cdc1
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
14 changes: 10 additions & 4 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2766,10 +2766,11 @@ def _refold_parse_tree(parse_tree, *, policy):
# max_line_length 0/None means no limit, ie: infinitely long.
maxlen = policy.max_line_length or sys.maxsize
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
lines = ['']
last_ew = None
lines = [''] # Folded lines to be output
last_ew = None # Points to the last encoded character if there's an ew on
# the line
wrap_as_ew_blocked = 0
want_encoding = False
want_encoding = False # True if we need to encode this part
end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
parts = list(parse_tree)
while parts:
Expand All @@ -2793,10 +2794,12 @@ def _refold_parse_tree(parse_tree, *, policy):
# 'charset' property on the policy.
charset = 'utf-8'
want_encoding = True

if part.token_type == 'mime-parameters':
# Mime parameter folding (using RFC2231) is extra special.
_fold_mime_parameters(part, lines, maxlen, encoding)
continue

if want_encoding and not wrap_as_ew_blocked:
if not part.as_ew_allowed:
want_encoding = False
Expand Down Expand Up @@ -2826,6 +2829,7 @@ def _refold_parse_tree(parse_tree, *, policy):
part.ew_combine_allowed, charset)
want_encoding = False
continue

if len(tstr) <= maxlen - len(lines[-1]):
lines[-1] += tstr
continue
Expand Down Expand Up @@ -2860,6 +2864,7 @@ def _refold_parse_tree(parse_tree, *, policy):
else:
# We can't fold it onto the next line either...
lines[-1] += tstr

return policy.linesep.join(lines) + policy.linesep

def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
Expand All @@ -2877,14 +2882,15 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset):
to_encode = str(
get_unstructured(lines[-1][last_ew:] + to_encode))
lines[-1] = lines[-1][:last_ew]
if to_encode[0] in WSP:
elif to_encode[0] in WSP:
# We're joining this to non-encoded text, so don't encode
# the leading blank.
leading_wsp = to_encode[0]
to_encode = to_encode[1:]
if (len(lines[-1]) == maxlen):
lines.append(_steal_trailing_WSP_if_exists(lines))
lines[-1] += leading_wsp

trailing_wsp = ''
if to_encode[-1] in WSP:
# Likewise for the trailing space.
Expand Down
13 changes: 13 additions & 0 deletions Lib/test/test_email/test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,19 @@ class TestBytesGenerator(TestGeneratorBase, TestEmailBase):
ioclass = io.BytesIO
typ = lambda self, x: x.encode('ascii')

def test_defaults_handle_spaces_between_encoded_words_when_wrapped(self):
source = ("Уведомление о принятии в работу обращения для"
" подключения услуги")
expected = ('Subject: =?utf-8?b?0KPQstC10LTQvtC80LvQtdC90LjQtSDQviDQv9GA0LjQvdGP0YLQuNC4?=\n'
' =?utf-8?b?INCyINGA0LDQsdC+0YLRgyDQvtCx0YDQsNGJ0LXQvdC40Y8g0LTQu9GPINC/0L4=?=\n'
' =?utf-8?b?0LTQutC70Y7Rh9C10L3QuNGPINGD0YHQu9GD0LPQuA==?=\n\n').encode('ascii')
msg = EmailMessage()
msg['Subject'] = source
s = io.BytesIO()
g = BytesGenerator(s)
g.flatten(msg)
self.assertEqual(s.getvalue(), expected)

def test_cte_type_7bit_handles_unknown_8bit(self):
source = ("Subject: Maintenant je vous présente mon "
"collègue\n\n").encode('utf-8')
Expand Down

0 comments on commit eb1cdc1

Please sign in to comment.