From 0a03e3d1cf557541a7937f8603821a868c5302fe Mon Sep 17 00:00:00 2001 From: Guillaume Ayoub Date: Mon, 29 May 2023 14:57:14 +0200 Subject: [PATCH] Use UTF8 indices instead of unicode indices for line split Related to ietf-tools/datatracker#5507 --- tests/layout/test_inline.py | 28 ++++++++++++++++++++++++++++ weasyprint/layout/preferred.py | 10 +++++----- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/tests/layout/test_inline.py b/tests/layout/test_inline.py index 5c5659d15..49b0eddaa 100644 --- a/tests/layout/test_inline.py +++ b/tests/layout/test_inline.py @@ -464,6 +464,34 @@ def test_breaking_linebox_regression_14(): assert line2.children[1].children[0].text == 'c' +@assert_no_logs +def test_breaking_linebox_regression_15(): + # Regression test for https://github.com/ietf-tools/datatracker/issues/5507 + page, = render_pages( + '' + '
ab©\n'
+        'déf\n'
+        'ghïj\n'
+        'klm
') + html, = page.children + body, = html.children + pre, = body.children + line1, line2, line3, line4 = pre.children + assert line1.children[0].text == 'ab©' + assert line2.children[0].text == 'déf' + assert line3.children[0].text == 'ghïj' + assert line4.children[0].text == 'klm' + assert line1.children[0].width == 4 * 3 + assert line2.children[0].width == 4 * 3 + assert line3.children[0].width == 4 * 4 + assert line4.children[0].width == 4 * 3 + assert pre.width == 4 * 4 + + @assert_no_logs def test_linebox_text(): page, = render_pages(''' diff --git a/weasyprint/layout/preferred.py b/weasyprint/layout/preferred.py index 917e42705..c508399de 100644 --- a/weasyprint/layout/preferred.py +++ b/weasyprint/layout/preferred.py @@ -289,10 +289,10 @@ def inline_line_widths(context, box, outer, is_line_start, minimum, else: (skip, skip_stack), = skip_stack.items() assert skip_stack is None - child_text = child.text[(skip or 0):] + child_text = child.text.encode()[(skip or 0):] if is_line_start and space_collapse: - child_text = child_text.lstrip(' ') - if minimum and child_text == ' ': + child_text = child_text.lstrip(b' ') + if minimum and child_text == b' ': lines = [0, 0] else: max_width = 0 if minimum else None @@ -302,8 +302,8 @@ def inline_line_widths(context, box, outer, is_line_start, minimum, resume_index += new_resume_index _, _, new_resume_index, width, _, _ = ( split_first_line( - child_text[resume_index:], child.style, context, - max_width, child.justification_spacing, + child_text[resume_index:].decode(), child.style, + context, max_width, child.justification_spacing, is_line_start=is_line_start, minimum=True)) lines.append(width) if first_line: