From 9877f4c6249ac7f374dc48beaf21ea2bf3ee6996 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 23 Jun 2022 12:09:57 +0900 Subject: [PATCH] gh-85308: argparse: Use filesystem encoding for arguments file (GH-93277) --- Doc/library/argparse.rst | 11 ++++++++++- Doc/whatsnew/3.12.rst | 6 ++++++ Lib/argparse.py | 4 +++- .../2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst | 4 ++++ 4 files changed, 23 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index 0e62e99d706d4c..b2fa0b3c23c3a1 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -562,7 +562,7 @@ at the command line. If the ``fromfile_prefix_chars=`` argument is given to the specified characters will be treated as files, and will be replaced by the arguments they contain. For example:: - >>> with open('args.txt', 'w') as fp: + >>> with open('args.txt', 'w', encoding=sys.getfilesystemencoding()) as fp: ... fp.write('-f\nbar') >>> parser = argparse.ArgumentParser(fromfile_prefix_chars='@') >>> parser.add_argument('-f') @@ -575,9 +575,18 @@ were in the same place as the original file referencing argument on the command line. So in the example above, the expression ``['-f', 'foo', '@args.txt']`` is considered equivalent to the expression ``['-f', 'foo', '-f', 'bar']``. +:class:`ArgumentParser` uses :term:`filesystem encoding and error handler` +to read the file containing arguments. + The ``fromfile_prefix_chars=`` argument defaults to ``None``, meaning that arguments will never be treated as file references. +.. versionchanged:: 3.12 + :class:`ArgumentParser` changed encoding and errors to read arguments files + from default (e.g. :func:`locale.getpreferredencoding(False)` and + ``"strict"``) to :term:`filesystem encoding and error handler`. + Arguments file should be encoded in UTF-8 instead of ANSI Codepage on Windows. + argument_default ^^^^^^^^^^^^^^^^ diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 2439479b458d37..8dde1350a7b166 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -233,6 +233,12 @@ Changes in the Python API select from a larger range than ``randrange(10**25)``. (Originally suggested by Serhiy Storchaka gh-86388.) +* :class:`argparse.ArgumentParser` changed encoding and error handler + for reading arguments from file (e.g. ``fromfile_prefix_chars`` option) + from default text encoding (e.g. :func:`locale.getpreferredencoding(False) `) + to :term:`filesystem encoding and error handler`. + Argument files should be encoded in UTF-8 instead of ANSI Codepage on Windows. + Build Changes ============= diff --git a/Lib/argparse.py b/Lib/argparse.py index 1c5520c4b41bd1..02e98bbf920cf1 100644 --- a/Lib/argparse.py +++ b/Lib/argparse.py @@ -2161,7 +2161,9 @@ def _read_args_from_files(self, arg_strings): # replace arguments referencing files with the file content else: try: - with open(arg_string[1:]) as args_file: + with open(arg_string[1:], + encoding=_sys.getfilesystemencoding(), + errors=_sys.getfilesystemencodeerrors()) as args_file: arg_strings = [] for arg_line in args_file.read().splitlines(): for arg in self.convert_arg_line_to_args(arg_line): diff --git a/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst new file mode 100644 index 00000000000000..4574264dd4d433 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-05-27-10-52-06.gh-issue-85308.K6r-tJ.rst @@ -0,0 +1,4 @@ +Changed :class:`argparse.ArgumentParser` to use :term:`filesystem encoding +and error handler` instead of default text encoding to read arguments from +file (e.g. ``fromfile_prefix_chars`` option). This change affects Windows; +argument file should be encoded with UTF-8 instead of ANSI Codepage.