Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-91810: ElementTree: Use text file's encoding by default in XML declaration #91903

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 15 additions & 16 deletions Lib/test/test_xml_etree.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import html
import io
import itertools
import locale
import operator
import os
import pickle
Expand Down Expand Up @@ -978,15 +977,13 @@ def test_tostring_xml_declaration(self):

def test_tostring_xml_declaration_unicode_encoding(self):
elem = ET.XML('<body><tag/></body>')
preferredencoding = locale.getpreferredencoding()
self.assertEqual(
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>",
ET.tostring(elem, encoding='unicode', xml_declaration=True)
ET.tostring(elem, encoding='unicode', xml_declaration=True),
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
)

def test_tostring_xml_declaration_cases(self):
elem = ET.XML('<body><tag>ø</tag></body>')
preferredencoding = locale.getpreferredencoding()
TESTCASES = [
# (expected_retval, encoding, xml_declaration)
# ... xml_declaration = None
Expand All @@ -1013,7 +1010,7 @@ def test_tostring_xml_declaration_cases(self):
b"<body><tag>&#248;</tag></body>", 'US-ASCII', True),
(b"<?xml version='1.0' encoding='ISO-8859-1'?>\n"
b"<body><tag>\xf8</tag></body>", 'ISO-8859-1', True),
(f"<?xml version='1.0' encoding='{preferredencoding}'?>\n"
("<?xml version='1.0' encoding='utf-8'?>\n"
"<body><tag>ø</tag></body>", 'unicode', True),

]
Expand Down Expand Up @@ -1051,11 +1048,10 @@ def test_tostringlist_xml_declaration(self):
b"<?xml version='1.0' encoding='us-ascii'?>\n<body><tag /></body>"
)

preferredencoding = locale.getpreferredencoding()
stringlist = ET.tostringlist(elem, encoding='unicode', xml_declaration=True)
self.assertEqual(
''.join(stringlist),
f"<?xml version='1.0' encoding='{preferredencoding}'?>\n<body><tag /></body>"
"<?xml version='1.0' encoding='utf-8'?>\n<body><tag /></body>"
)
self.assertRegex(stringlist[0], r"^<\?xml version='1.0' encoding='.+'?>")
self.assertEqual(['<body', '>', '<tag', ' />', '</body>'], stringlist[1:])
Expand Down Expand Up @@ -3740,17 +3736,16 @@ def test_write_to_filename_as_unicode(self):
encoding = f.encoding
os_helper.unlink(TESTFN)

try:
'\xf8'.encode(encoding)
except UnicodeEncodeError:
self.skipTest(f'default file encoding {encoding} not supported')

tree = ET.ElementTree(ET.XML('''<site>\xf8</site>'''))
tree.write(TESTFN, encoding='unicode')
with open(TESTFN, 'rb') as f:
data = f.read()
expected = "<site>\xf8</site>".encode(encoding, 'xmlcharrefreplace')
self.assertEqual(data, expected)
if encoding.lower() in ('utf-8', 'ascii'):
self.assertEqual(data, expected)
else:
self.assertIn(b"<?xml version='1.0' encoding=", data)
self.assertIn(expected, data)

def test_write_to_text_file(self):
self.addCleanup(os_helper.unlink, TESTFN)
Expand All @@ -3765,13 +3760,17 @@ def test_write_to_text_file(self):
tree.write(f, encoding='unicode')
self.assertFalse(f.closed)
with open(TESTFN, 'rb') as f:
self.assertEqual(f.read(), b'''<site>&#248;</site>''')
self.assertEqual(f.read(), convlinesep(
b'''<?xml version='1.0' encoding='ascii'?>\n'''
b'''<site>&#248;</site>'''))

with open(TESTFN, 'w', encoding='ISO-8859-1') as f:
tree.write(f, encoding='unicode')
self.assertFalse(f.closed)
with open(TESTFN, 'rb') as f:
self.assertEqual(f.read(), b'''<site>\xf8</site>''')
self.assertEqual(f.read(), convlinesep(
b'''<?xml version='1.0' encoding='ISO-8859-1'?>\n'''
b'''<site>\xf8</site>'''))

def test_write_to_binary_file(self):
self.addCleanup(os_helper.unlink, TESTFN)
Expand Down
23 changes: 9 additions & 14 deletions Lib/xml/etree/ElementTree.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,16 +728,10 @@ def write(self, file_or_filename,
encoding = "utf-8"
else:
encoding = "us-ascii"
enc_lower = encoding.lower()
with _get_writer(file_or_filename, enc_lower) as write:
with _get_writer(file_or_filename, encoding) as (write, declared_encoding):
if method == "xml" and (xml_declaration or
(xml_declaration is None and
enc_lower not in ("utf-8", "us-ascii", "unicode"))):
declared_encoding = encoding
if enc_lower == "unicode":
# Retrieve the default encoding for the xml declaration
import locale
declared_encoding = locale.getpreferredencoding()
declared_encoding.lower() not in ("utf-8", "us-ascii"))):
write("<?xml version='1.0' encoding='%s'?>\n" % (
declared_encoding,))
if method == "text":
Expand All @@ -762,19 +756,20 @@ def _get_writer(file_or_filename, encoding):
write = file_or_filename.write
except AttributeError:
# file_or_filename is a file name
if encoding == "unicode":
file = open(file_or_filename, "w")
if encoding.lower() == "unicode":
file = open(file_or_filename, "w",
errors="xmlcharrefreplace")
else:
file = open(file_or_filename, "w", encoding=encoding,
errors="xmlcharrefreplace")
with file:
yield file.write
yield file.write, file.encoding
else:
# file_or_filename is a file-like object
# encoding determines if it is a text or binary writer
if encoding == "unicode":
if encoding.lower() == "unicode":
# use a text writer as is
yield write
yield write, getattr(file_or_filename, "encoding", None) or "utf-8"
else:
# wrap a binary writer with TextIOWrapper
with contextlib.ExitStack() as stack:
Expand Down Expand Up @@ -805,7 +800,7 @@ def _get_writer(file_or_filename, encoding):
# Keep the original file open when the TextIOWrapper is
# destroyed
stack.callback(file.detach)
yield file.write
yield file.write, encoding

def _namespaces(elem, default_namespace=None):
# identify namespaces used in this tree
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
:class:`~xml.etree.ElementTree.ElementTree` method
:meth:`~xml.etree.ElementTree.ElementTree.write` and function
:func:`~xml.etree.ElementTree.tostring` now use the text file's encoding
("UTF-8" if not available) instead of locale encoding in XML declaration
when ``encoding="unicode"`` is specified.