From ca777747cb19cf61b3be7c71ecb30882bf8155a6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 30 Aug 2022 02:02:36 -0700 Subject: [PATCH 01/21] Backport CVE-2020-10735 to 3.7 from 3.8. This is based off of psrt/CVE-2020-10735-3.8backport branch at cd54fc39876a1de6853a8cf436b4b5ae6778f5a3. --- Doc/library/functions.rst | 8 + Doc/library/json.rst | 11 ++ Doc/library/stdtypes.rst | 162 ++++++++++++++++++ Doc/library/sys.rst | 59 +++++-- Doc/library/test.rst | 10 ++ Doc/using/cmdline.rst | 13 ++ Include/internal/pycore_long.h | 49 ++++++ Include/internal/pystate.h | 6 + Lib/test/support/__init__.py | 10 ++ Lib/test/test_ast.py | 8 + Lib/test/test_cmd_line.py | 34 ++++ Lib/test/test_compile.py | 13 ++ Lib/test/test_decimal.py | 18 ++ Lib/test/test_int.py | 114 ++++++++++++ Lib/test/test_json/test_decode.py | 8 + Lib/test/test_sys.py | 10 +- Lib/test/test_xmlrpc.py | 10 ++ ...22-08-07-16-53.gh-issue-95778.ch010gps.rst | 14 ++ Modules/main.c | 56 ++++++ Objects/longobject.c | 45 ++++- Python/ast.c | 27 ++- Python/clinic/sysmodule.c.h | 50 +++++- Python/pylifecycle.c | 4 + Python/sysmodule.c | 44 ++++- 24 files changed, 764 insertions(+), 19 deletions(-) create mode 100644 Include/internal/pycore_long.h create mode 100644 Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index d4553e045d6030..c0a22d5f16b453 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -774,6 +774,14 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.7 *x* is now a positional-only parameter. + .. versionchanged:: 3.7.14 + :class:`int` string inputs and string representations can be limited to + help avoid denial of service attacks. A :exc:`ValueError` is raised when + the limit is exceeded while converting a string *x* to an :class:`int` or + when converting an :class:`int` into a string would exceed the limit. + See the :ref:`integer string conversion length limitation + ` documentation. + .. function:: isinstance(object, classinfo) diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 510e30733fed0a..feec0d0725fb52 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,6 +18,11 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. warning:: + Be cautious when parsing JSON data from untrusted sources. A malicious + JSON string may cause the decoder to consume considerable CPU and memory + resources. Limiting the size of data to be parsed is recommended. + :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -243,6 +248,12 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). + .. versionchanged:: 3.7.14 + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. + *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to raise an exception if invalid JSON numbers diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index c35cb2e11d32d4..482465825958db 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -556,6 +556,13 @@ class`. float also has the following additional methods. :exc:`OverflowError` on infinities and a :exc:`ValueError` on NaNs. + .. note:: + + The values returned by ``as_integer_ratio()`` can be huge. Attempts + to render such integers into decimal strings may bump into the + :ref:`integer string conversion length limitation + `. + .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral @@ -4741,6 +4748,161 @@ types, where they are relevant. Some of these are not reported by the [] +.. _int_max_str_digits: + +Integer string conversion length limitation +=========================================== + +CPython has a global limit for converting between :class:`int` and :class:`str` +to mitigate denial of service attacks. This limit *only* applies to decimal or +other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +are unlimited. The limit can be configured. + +The :class:`int` type in CPython is an abitrary length number stored in binary +form (commonly known as a "bignum"). There exists no algorithm that can convert +a string to a binary integer or a binary integer to a string in linear time, +*unless* the base is a power of 2. Even the best known algorithms for base 10 +have sub-quadratic complexity. Converting a large value such as ``int('1' * +500_000)`` can take over a second on a fast CPU. + +Limiting conversion size offers a practical way to avoid `CVE-2020-10735 +`_. + +The limit is applied to the number of digit characters in the input or output +string when a non-linear conversion algorithm would be involved. Underscores +and the sign are not counted towards the limit. + +When an operation would exceed the limit, a :exc:`ValueError` is raised:: + + >>> import sys + >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. + >>> _ = int('2' * 5432) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. + >>> i = int('2' * 4300) + >>> len(str(i)) + 4300 + >>> i_squared = i*i + >>> len(str(i_squared)) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. + >>> len(hex(i_squared)) + 7144 + >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. + +The default limit is 4300 digits as provided in +:data:`sys.int_info.default_max_str_digits `. +The lowest limit that can be configured is 640 digits as provided in +:data:`sys.int_info.str_digits_check_threshold `. + +Verification:: + + >>> import sys + >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info + >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info + >>> msg = int('578966293710682886880994035146873798396722250538762761564' + ... '9252925514383915483333812743580549779436104706260696366600' + ... '571186405732').to_bytes(53, 'big') + ... + +.. versionadded:: 3.7.14 + +Affected APIs +------------- + +The limition only applies to potentially slow conversions between :class:`int` +and :class:`str` or :class:`bytes`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not a power of 2. +* ``str(integer)``. +* ``repr(integer)`` +* any other string conversion to base 10, for example ``f"{integer}"``, + ``"{}".format(integer)``, or ``b"%d" % integer``. + +The limitations do not apply to functions with a linear algorithm: + +* ``int(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`int.from_bytes` and :func:`int.to_bytes`. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. + +Configuring the limit +--------------------- + +Before Python starts up you can use an environment variable or an interpreter +command line flag to configure the limit: + +* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. + ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or + ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. +* :option:`-X int_max_str_digits <-X>`, e.g. + ``python3 -X int_max_str_digits=640`` +* :data:`sys.flags.int_max_str_digits` contains the value of + :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. + If both the env var and the ``-X`` option are set, the ``-X`` option takes + precedence. A value of *-1* indicates that both were unset, thus a value of + :data:`sys.int_info.default_max_str_digits` was used during initilization. + +From code, you can inspect the current limit and set a new one using these +:mod:`sys` APIs: + +* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are + a getter and setter for the interpreter-wide limit. Subinterpreters have + their own limit. + +Information about the default and minimum can be found in :attr:`sys.int_info`: + +* :data:`sys.int_info.default_max_str_digits ` is the compiled-in + default limit. +* :data:`sys.int_info.str_digits_check_threshold ` is the lowest + accepted value for the limit (other than 0 which disables it). + +.. versionadded:: 3.7.14 + +.. caution:: + + Setting a low limit *can* lead to problems. While rare, code exists that + contains integer constants in decimal in their source that exceed the + minimum threshold. A consequence of setting the limit is that Python source + code containing decimal integer literals longer than the limit will + encounter an error during parsing, usually at startup time or import time or + even at installation time - anytime an up to date ``.pyc`` does not already + exist for the code. A workaround for source that contains such large + constants is to convert them to ``0x`` hexidecimal form as it has no limit. + + Test your application thoroughly if you use a low limit. Ensure your tests + run with the limit set early via the environment or flag so that it applies + during startup and even during any installation step that may invoke Python + to precompile ``.py`` sources to ``.pyc`` files. + +Recommended configuration +------------------------- + +The default :data:`sys.int_info.default_max_str_digits` is expected to be +reasonable for most applications. If your application requires a different +limit, set it from your main entry point using Python version agnostic code as +these APIs were added in security patch releases in versions before 3.11. + +Example:: + + >>> import sys + >>> if hasattr(sys, "set_int_max_str_digits"): + ... upper_bound = 68000 + ... lower_bound = 4004 + ... current_limit = sys.get_int_max_str_digits() + ... if current_limit == 0 or current_limit > upper_bound: + ... sys.set_int_max_str_digits(upper_bound) + ... elif current_limit < lower_bound: + ... sys.set_int_max_str_digits(lower_bound) + +If you need to disable it entirely, set it to ``0``. + + .. rubric:: Footnotes .. [1] Additional information on these special methods may be found in the Python diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 1760ae3cd50f09..c636b739254f02 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -327,9 +327,9 @@ always available. The :term:`named tuple` *flags* exposes the status of command line flags. The attributes are read only. - ============================= ============================= + ============================= ============================================================================================================== attribute flag - ============================= ============================= + ============================= ============================================================================================================== :const:`debug` :option:`-d` :const:`inspect` :option:`-i` :const:`interactive` :option:`-i` @@ -345,7 +345,8 @@ always available. :const:`hash_randomization` :option:`-R` :const:`dev_mode` :option:`-X` ``dev`` :const:`utf8_mode` :option:`-X` ``utf8`` - ============================= ============================= + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) + ============================= ============================================================================================================== .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. @@ -363,6 +364,9 @@ always available. Added ``dev_mode`` attribute for the new :option:`-X` ``dev`` flag and ``utf8_mode`` attribute for the new :option:`-X` ``utf8`` flag. + .. versionchanged:: 3.7.14 + Added the ``int_max_str_digits`` attribute. + .. data:: float_info @@ -539,6 +543,15 @@ always available. .. versionadded:: 3.6 + +.. function:: get_int_max_str_digits() + + Returns the current value for the :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits`. + + .. versionadded:: 3.7.14 + + .. function:: getrefcount(object) Return the reference count of the *object*. The count returned is generally one @@ -821,19 +834,31 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------+----------------------------------------------+ - | Attribute | Explanation | - +=========================+==============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**int_info.bits_per_digit`` | - +-------------------------+----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------+----------------------------------------------+ + +----------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +========================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +----------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +----------------------------------------+-----------------------------------------------+ + | :const:`default_max_str_digits` | default value for | + | | :func:`sys.get_int_max_str_digits` when it | + | | is not otherwise explicitly configured. | + +----------------------------------------+-----------------------------------------------+ + | :const:`str_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_str_digits`, | + | | :envvar:`PYTHONINTMAXSTRDIGITS`, or | + | | :option:`-X int_max_str_digits <-X>`. | + +----------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 + .. versionchanged:: 3.7.14 + Added ``default_max_str_digits`` and ``str_digits_check_threshold``. + .. data:: __interactivehook__ @@ -1092,6 +1117,14 @@ always available. .. availability:: Unix. +.. function:: set_int_max_str_digits(n) + + Set the :ref:`integer string conversion length limitation + ` used by this interpreter. See also + :func:`get_int_max_str_digits`. + + .. versionadded:: 3.7.14 + .. function:: setprofile(profilefunc) .. index:: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index e93ef450f02277..d59cd405fa3c5d 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -1207,6 +1207,16 @@ The :mod:`test.support` module defines the following functions: .. versionadded:: 3.6 +.. function:: adjust_int_max_str_digits(max_digits) + + This function returns a context manager that will change the global + :func:`sys.set_int_max_str_digits` setting for the duration of the + context to allow execution of test code that needs a different limit + on the number of digits when converting between an integer and string. + + .. versionadded:: 3.7.14 + + The :mod:`test.support` module defines the following classes: .. class:: TransientResource(exc, **kwargs) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index be92642e2fa31d..000fa6592e0e9d 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -432,6 +432,9 @@ Miscellaneous options * ``-X showalloccount`` to output the total count of allocated objects for each type when the program finishes. This only works when Python was built with ``COUNT_ALLOCS`` defined. + * ``-X int_max_str_digits`` configures the :ref:`integer string conversion + length limitation `. See also + :envvar:`PYTHONINTMAXSTRDIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -474,6 +477,9 @@ Miscellaneous options .. versionadded:: 3.7 The ``-X importtime``, ``-X dev`` and ``-X utf8`` options. + .. versionadded:: 3.7.14 + The ``-X int_max_str_digits`` option. + Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -618,6 +624,13 @@ conflict. .. versionadded:: 3.2.3 +.. envvar:: PYTHONINTMAXSTRDIGITS + + If this variable is set to an integer, it is used to configure the + interpreter's global :ref:`integer string conversion length limitation + `. + + .. versionadded:: 3.7.14 .. envvar:: PYTHONIOENCODING diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h new file mode 100644 index 00000000000000..f509fe2ee90386 --- /dev/null +++ b/Include/internal/pycore_long.h @@ -0,0 +1,49 @@ +#ifndef Py_INTERNAL_LONG_H +#define Py_INTERNAL_LONG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* + * Default int base conversion size limitation: Denial of Service prevention. + * + * Chosen such that this isn't wildly slow on modern hardware and so that + * everyone's existing deployed numpy test suite passes before + * https://github.com/numpy/numpy/issues/22098 is widely available. + * + * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * 2000 loops, best of 5: 125 usec per loop + * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * 1000 loops, best of 5: 311 usec per loop + * (zen2 cloud VM) + * + * 4300 decimal digits fits a ~14284 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 +/* + * Threshold for max digits check. For performance reasons int() and + * int.__str__() don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' + * 20000 loops, best of 5: 12 usec per loop + * + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. + */ +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 + +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LONG_H */ diff --git a/Include/internal/pystate.h b/Include/internal/pystate.h index 5891339b543482..f282708a86e603 100644 --- a/Include/internal/pystate.h +++ b/Include/internal/pystate.h @@ -105,6 +105,8 @@ typedef struct pyruntimestate { struct _gilstate_runtime_state gilstate; // XXX Consolidate globals found via the check-c-globals script. + + int int_max_str_digits; } _PyRuntimeState; #define _PyRuntimeState_INIT {.initialized = 0, .core_initialized = 0} @@ -120,6 +122,10 @@ PyAPI_FUNC(_PyInitError) _PyRuntime_Initialize(void); PyAPI_FUNC(void) _PyRuntime_Finalize(void); +/* Excluded from public struct _PyCoreConfig for backporting reasons. */ +/* Modules/main.c config_init_int_max_str_digits() configures it. */ +/* Storage declared in pylifecycle.c */ +extern int _Py_global_config_int_max_str_digits; #define _Py_CURRENTLY_FINALIZING(tstate) \ (_PyRuntime.finalizing == tstate) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index b78451b9e6c568..8de486fa876cd5 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2985,3 +2985,13 @@ def __gt__(self, other): return False SMALLEST = _SMALLEST() + +@contextlib.contextmanager +def adjust_int_max_str_digits(max_digits): + """Temporarily change the integer string conversion length limit.""" + current = sys.get_int_max_str_digits() + try: + sys.set_int_max_str_digits(max_digits) + yield + finally: + sys.set_int_max_str_digits(current) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 830fb58a02b66f..754c5cab8bb289 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -632,6 +632,14 @@ def test_literal_eval(self): self.assertRaises(ValueError, ast.literal_eval, '+True') self.assertRaises(ValueError, ast.literal_eval, '2+3') + def test_literal_eval_str_int_limit(self): + with support.adjust_int_max_str_digits(4000): + ast.literal_eval('3'*4000) # no error + with self.assertRaises(SyntaxError) as err_ctx: + ast.literal_eval('3'*4001) + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) + self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + def test_literal_eval_complex(self): # Issue #4907 self.assertEqual(ast.literal_eval('6j'), 6j) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 95cdc8db7efbcc..0ca90fd4ab453d 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -711,6 +711,40 @@ def test_argv0_normalization(self): self.assertEqual(proc.returncode, 0, proc) self.assertEqual(proc.stdout.strip(), b'0') + def test_int_max_str_digits(self): + code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())" + + assert_python_failure('-X', 'int_max_str_digits', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=100', '-c', code) + + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') + + def res2int(res): + out = res.out.strip().decode("utf-8") + return tuple(int(i) for i in out.split()) + + res = assert_python_ok('-c', code) + self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code) + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code) + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code) + self.assertEqual(res2int(res), (100000, 100000)) + + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok( + '-X', 'int_max_str_digits=6000', '-c', code, + PYTHONINTMAXSTRDIGITS='4000' + ) + self.assertEqual(res2int(res), (6000, 6000)) + + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') class IgnoreEnvironmentTest(unittest.TestCase): diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 5ac1c5f2713152..c546122c0dc718 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -189,6 +189,19 @@ def test_literals_with_leading_zeroes(self): self.assertEqual(eval("0o777"), 511) self.assertEqual(eval("-0o0000010"), -8) + def test_int_literals_too_long(self): + n = 3000 + source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" + with support.adjust_int_max_str_digits(n): + compile(source, "", "exec") # no errors. + with support.adjust_int_max_str_digits(n-1): + with self.assertRaises(SyntaxError) as err_ctx: + compile(source, "", "exec") + exc = err_ctx.exception + self.assertEqual(exc.lineno, 3) + self.assertIn('Exceeds the limit ', str(exc)) + self.assertIn(' Consider hexidecimal ', str(exc)) + def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 if sys.maxsize == 2147483647: diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 1f37b5372a3e7e..cfa9e17051ee76 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -2446,6 +2446,15 @@ class CUsabilityTest(UsabilityTest): class PyUsabilityTest(UsabilityTest): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PythonAPItests(unittest.TestCase): def test_abc(self): @@ -4503,6 +4512,15 @@ class CCoverage(Coverage): class PyCoverage(Coverage): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PyFunctionality(unittest.TestCase): """Extra functionality in decimal.py""" diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index c048b712da83ce..493bc093de97f2 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -516,5 +516,119 @@ def test_issue31619(self): self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) +class IntStrDigitLimitsTests(unittest.TestCase): + + int_class = int # Override this in subclasses to reuse the suite. + + def setUp(self): + super().setUp() + self._previous_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(2048) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_limit) + super().tearDown() + + def test_disabled_limit(self): + self.assertGreater(sys.get_int_max_str_digits(), 0) + self.assertLess(sys.get_int_max_str_digits(), 20_000) + with support.adjust_int_max_str_digits(0): + self.assertEqual(sys.get_int_max_str_digits(), 0) + i = self.int_class('1' * 20_000) + str(i) + self.assertGreater(sys.get_int_max_str_digits(), 0) + + def test_max_str_digits_edge_cases(self): + """Ignore the +/- sign and space padding.""" + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + int_class('1' * maxdigits) + int_class(' ' + '1' * maxdigits) + int_class('1' * maxdigits + ' ') + int_class('+' + '1' * maxdigits) + int_class('-' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + def check(self, i, base=None): + with self.assertRaises(ValueError): + if base is None: + self.int_class(i) + else: + self.int_class(i, base) + + def test_max_str_digits(self): + maxdigits = sys.get_int_max_str_digits() + + self.check('1' * (maxdigits + 1)) + self.check(' ' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1) + ' ') + self.check('+' + '1' * (maxdigits + 1)) + self.check('-' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1)) + + i = 10 ** maxdigits + with self.assertRaises(ValueError): + str(i) + + def test_power_of_two_bases_unlimited(self): + """The limit does not apply to power of 2 bases.""" + maxdigits = sys.get_int_max_str_digits() + + for base in (2, 4, 8, 16, 32): + with self.subTest(base=base): + self.int_class('1' * (maxdigits + 1), base) + assert maxdigits < 100_000 + self.int_class('1' * 100_000, base) + + def test_underscores_ignored(self): + maxdigits = sys.get_int_max_str_digits() + + triples = maxdigits // 3 + s = '111' * triples + s_ = '1_11' * triples + self.int_class(s) # succeeds + self.int_class(s_) # succeeds + self.check(f'{s}111') + self.check(f'{s_}_111') + + def test_sign_not_counted(self): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '5' * max_digits + i = int_class(s) + pos_i = int_class(f'+{s}') + assert i == pos_i + neg_i = int_class(f'-{s}') + assert -pos_i == neg_i + str(pos_i) + str(neg_i) + + def _other_base_helper(self, base): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '2' * max_digits + i = int_class(s, base) + if base > 10: + with self.assertRaises(ValueError): + str(i) + elif base < 10: + str(i) + with self.assertRaises(ValueError) as err: + int_class(f'{s}1', base) + + def test_int_from_other_bases(self): + base = 3 + with self.subTest(base=base): + self._other_base_helper(base) + base = 36 + with self.subTest(base=base): + self._other_base_helper(base) + + +class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests): + int_class = IntSubclass + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index fdb9e62124ece1..0c3e98db722fb3 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -2,6 +2,7 @@ from io import StringIO from collections import OrderedDict from test.test_json import PyTest, CTest +from test import support class TestDecode: @@ -95,5 +96,12 @@ def test_negative_index(self): d = self.json.JSONDecoder() self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + def test_limit_int(self): + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + self.loads('1' * maxdigits) + with self.assertRaises(ValueError): + self.loads('1' * (maxdigits + 1)) + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 0478f20cd33b4d..90304520d36c5c 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -447,11 +447,17 @@ def test_attributes(self): self.assertIsInstance(sys.executable, str) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) - self.assertEqual(len(sys.int_info), 2) + self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) + self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500) + self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100) + self.assertGreater(sys.int_info.default_max_str_digits, + sys.int_info.str_digits_check_threshold) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) + self.assertIsInstance(sys.int_info.default_max_str_digits, int) + self.assertIsInstance(sys.int_info.str_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -554,7 +560,7 @@ def test_sys_flags(self): "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", - "dev_mode", "utf8_mode") + "dev_mode", "utf8_mode", "int_max_str_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index 0e002ec4ef9f8c..9cbbd2128e034f 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -283,6 +283,16 @@ def test_load_extension_types(self): check('9876543210.0123456789', decimal.Decimal('9876543210.0123456789')) + def test_limit_int(self): + check = self.check_loads + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + s = '1' * (maxdigits + 1) + with self.assertRaises(ValueError): + check(f'{s}', None) + with self.assertRaises(ValueError): + check(f'{s}', None) + def test_get_host_info(self): # see bug #3613, this raised a TypeError transp = xmlrpc.client.Transport() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst new file mode 100644 index 00000000000000..a205fb31ad7b95 --- /dev/null +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -0,0 +1,14 @@ +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +raises a :exc:`ValueError` if the number of digits in string form is above a +limit to avoid potential denial of service attacks due to the algorithmic +complexity. This is a mitigation for `CVE-2020-10735 +`_. + +This new limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length +limitation ` documentation. The default limit is 4300 +digits in string form. + +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from +Victor Stinner, Thomas Wouters, and Steve Dower. diff --git a/Modules/main.c b/Modules/main.c index be0807b6a99b5e..b646fe52e6b35c 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -3,6 +3,7 @@ #include "Python.h" #include "osdefs.h" #include "internal/import.h" +#include "internal/pycore_long.h" #include "internal/pygetopt.h" #include "internal/pystate.h" @@ -142,6 +143,9 @@ static const char usage_3[] = "\ -X utf8: enable UTF-8 mode for operating system interfaces, overriding the default\n\ locale-aware mode. -X utf8=0 explicitly disables UTF-8 mode (even when it would\n\ otherwise activate automatically)\n\ + -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ + This helps avoid denial of service attacks when parsing untrusted data.\n\ + The default is sys.int_info.default_max_str_digits. 0 disables.\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -167,6 +171,10 @@ static const char usage_6[] = " to seed the hashes of str, bytes and datetime objects. It can also be\n" " set to an integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" +" when converting from a string and when converting an int back to a str.\n" +" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" +" 16, and 32 are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -1801,6 +1809,48 @@ config_init_tracemalloc(_PyCoreConfig *config) return _Py_INIT_OK(); } +static _PyInitError +config_init_int_max_str_digits(_PyCoreConfig *config) +{ + int maxdigits; + int valid = 0; + + const char *env = config_get_env_var("PYTHONINTMAXSTRDIGITS"); + if (env) { + if (!pymain_str_to_int(env, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + if (!valid) { +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + return _Py_INIT_USER_ERR( + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); + } + _Py_global_config_int_max_str_digits = maxdigits; + } + + const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); + if (xoption) { + const wchar_t *sep = wcschr(xoption, L'='); + if (sep) { + if (!pymain_wstr_to_int(sep + 1, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + } + if (!valid) { + return _Py_INIT_USER_ERR( + "-X int_max_str_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY + } + _Py_global_config_int_max_str_digits = maxdigits; + } + return _Py_INIT_OK(); +} static void get_env_flag(int *flag, const char *name) @@ -2020,6 +2070,12 @@ config_read_complex_options(_PyCoreConfig *config) return err; } } + if (_Py_global_config_int_max_str_digits < 0) { + _PyInitError err = config_init_int_max_str_digits(config); + if (_Py_INIT_FAILED(err)) { + return err; + } + } return _Py_INIT_OK(); } diff --git a/Objects/longobject.c b/Objects/longobject.c index 202f652fc6df1e..a363692d823842 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3,6 +3,8 @@ /* XXX The functional organization of this file is terrible */ #include "Python.h" +#include "internal/pycore_long.h" +#include "internal/pystate.h" // _Py_global_config_int_max_str_digits #include "longintrepr.h" #include @@ -45,6 +47,8 @@ static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; Py_ssize_t quick_int_allocs, quick_neg_int_allocs; #endif +#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" + static PyObject * get_small_int(sdigit ival) { @@ -1661,6 +1665,16 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + int max_str_digits = _PyRuntime.int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, strlen_nosign); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2174,6 +2188,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2325,6 +2340,16 @@ digit beyond the first. goto onError; } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + int max_str_digits = _PyRuntime.int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + max_str_digits, digits); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -4807,6 +4832,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); @@ -5430,6 +5456,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -5437,7 +5465,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -5452,6 +5480,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -5503,6 +5542,10 @@ _PyLong_Init(void) if (PyStructSequence_InitType2(&Int_InfoType, &int_info_desc) < 0) return 0; } + _PyRuntime.int_max_str_digits = _Py_global_config_int_max_str_digits; + if (_PyRuntime.int_max_str_digits == -1) { + _PyRuntime.int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 1; } diff --git a/Python/ast.c b/Python/ast.c index 9d8a3544bdfc9f..eddd2914464dbb 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -9,6 +9,7 @@ #include "ast.h" #include "token.h" #include "pythonrun.h" +#include "internal/pystate.h" #include #include @@ -2138,8 +2139,32 @@ ast_for_atom(struct compiling *c, const node *n) } case NUMBER: { PyObject *pynum = parsenumber(c, STR(ch)); - if (!pynum) + if (!pynum) { + PyThreadState *tstate = PyThreadState_GET(); + // The only way a ValueError should happen in _this_ code is via + // PyLong_FromString hitting a length limit. + if (tstate->curexc_type == PyExc_ValueError && + tstate->curexc_value != NULL) { + PyObject *type, *value, *tb; + // This acts as PyErr_Clear() as we're replacing curexc. + PyErr_Fetch(&type, &value, &tb); + Py_XDECREF(tb); + Py_DECREF(type); + PyObject *helpful_msg = PyUnicode_FromFormat( + "%S - Consider hexidecimal for huge integer literals " + "to avoid decimal conversion limits.", + value); + if (helpful_msg) { + const char* error_msg = PyUnicode_AsUTF8(helpful_msg); + if (error_msg) { + ast_error(c, ch, error_msg); + } + Py_DECREF(helpful_msg); + } + Py_DECREF(value); + } return NULL; + } if (PyArena_AddPyObject(c->c_arena, pynum) < 0) { Py_DECREF(pynum); diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 3e1480513f6c2a..bc6c99a5d65964 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -63,4 +63,52 @@ sys_get_coroutine_origin_tracking_depth(PyObject *module, PyObject *Py_UNUSED(ig exit: return return_value; } -/*[clinic end generated code: output=4a3ac42b97d710ff input=a9049054013a1b77]*/ + +PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, +"get_int_max_str_digits($module, /)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module); + +static PyObject * +sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_get_int_max_str_digits_impl(module); +} + +PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, +"set_int_max_str_digits($module, /, maxdigits)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ + {"set_int_max_str_digits", (PyCFunction)sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); + +static PyObject * +sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"maxdigits", NULL}; + static _PyArg_Parser _parser = {"i:set_int_max_str_digits", _keywords, 0}; + int maxdigits; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + &maxdigits)) { + goto exit; + } + return_value = sys_set_int_max_str_digits_impl(module, maxdigits); + +exit: + return return_value; +} +/*[clinic end generated code: output=c566fcdbb8f6ae2c input=a9049054013a1b77]*/ diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 55d1ba5734434a..ff0087e2153647 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -6,6 +6,7 @@ #undef Yield /* undefine macro conflicting with winbase.h */ #include "internal/context.h" #include "internal/hamt.h" +#include "internal/pycore_long.h" #include "internal/pystate.h" #include "grammar.h" #include "node.h" @@ -130,6 +131,9 @@ int Py_LegacyWindowsFSEncodingFlag = 0; /* Uses mbcs instead of utf-8 */ int Py_LegacyWindowsStdioFlag = 0; /* Uses FileIO instead of WindowsConsoleIO */ #endif +/* Unusual name compared to the above for backporting from 3.12 reasons. */ +int _Py_global_config_int_max_str_digits = -1; /* -X int_max_str_digits or PYTHONINTMAXSTRDIGITS */ + /* Hack to force loading of object files */ int (*_PyOS_mystrnicmp_hack)(const char *, const char *, Py_ssize_t) = \ PyOS_mystrnicmp; /* Python/pystrcmp.o */ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b953a000977522..82e029fd38018e 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -15,6 +15,7 @@ Data members: */ #include "Python.h" +#include "internal/pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "internal/pystate.h" #include "code.h" #include "frameobject.h" @@ -1218,6 +1219,43 @@ sys_mdebug(PyObject *self, PyObject *args) } #endif /* USE_MALLOPT */ + +/*[clinic input] +sys.get_int_max_str_digits + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module) +/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ +{ + return PyLong_FromSsize_t(_PyRuntime.int_max_str_digits); +} + +/*[clinic input] +sys.set_int_max_str_digits + + maxdigits: int + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ +{ + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + _PyRuntime.int_max_str_digits = maxdigits; + Py_RETURN_NONE; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); + return NULL; + } +} + size_t _PySys_GetSizeOf(PyObject *o) { @@ -1605,6 +1643,8 @@ static PyMethodDef sys_methods[] = { {"getandroidapilevel", (PyCFunction)sys_getandroidapilevel, METH_NOARGS, getandroidapilevel_doc}, #endif + SYS_GET_INT_MAX_STR_DIGITS_METHODDEF + SYS_SET_INT_MAX_STR_DIGITS_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2051,6 +2091,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, + {"int_max_str_digits", "-X int_max_str_digits"}, {0} }; @@ -2058,7 +2099,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 15 + 16 }; static PyObject* @@ -2092,6 +2133,7 @@ make_flags(void) SetFlag(Py_IsolatedFlag); PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(core_config->dev_mode)); SetFlag(Py_UTF8Mode); + SetFlag(_Py_global_config_int_max_str_digits); #undef SetFlag if (PyErr_Occurred()) { From f145128d86dd63fab31aafe22b5c9b00e77adbc0 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Tue, 30 Aug 2022 13:26:58 -0700 Subject: [PATCH 02/21] Add What's New entry. --- Doc/whatsnew/3.7.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 2cc380bf5aa6db..ddc7291a62f961 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -539,6 +539,17 @@ Other Language Changes the timing of each module import. (Contributed by Victor Stinner in :issue:`31415`.) +* New security feature in 3.7.14: + Converting between :class:`int` and :class:`str` in bases other than 2 + (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) + now raises a :exc:`ValueError` if the number of digits in string form is + above a limit to avoid potential denial of service attacks due to the + algorithmic complexity. This is a mitigation for `CVE-2020-10735 + `_. + This limit can be configured or disabled by environment variable, command + line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion + length limitation ` documentation. The default limit + is 4300 digits in string form. New Modules =========== From 00a5114cf6e1326acbe490abc5e3ee6dfdddf686 Mon Sep 17 00:00:00 2001 From: Christian Heimes Date: Thu, 1 Sep 2022 10:40:29 +0200 Subject: [PATCH 03/21] Hack: Force CI run --- .github/workflows/build.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index be3bbcf90bf201..013916c06bc703 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,6 +23,7 @@ jobs: - name: Check for source changes id: check run: | + echo '::set-output name=run_tests::true' if [ -z "$GITHUB_BASE_REF" ]; then echo '::set-output name=run_tests::true' else From 7eaad2052f00b87a035f52c142f49136030d22e8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 12:36:25 -0700 Subject: [PATCH 04/21] revert 1dae140b610a465b4d3e6fb2109ec13da6093e6d CI hack --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 013916c06bc703..be3bbcf90bf201 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -23,7 +23,6 @@ jobs: - name: Check for source changes id: check run: | - echo '::set-output name=run_tests::true' if [ -z "$GITHUB_BASE_REF" ]; then echo '::set-output name=run_tests::true' else From 635e292f360c7ab179ca6e27a25a2b165238ec55 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Sep 2022 13:56:50 -0700 Subject: [PATCH 05/21] Backport ctypes test_macholib fix from b29d0a5a7811418c0a1082ca188fd4850185e290. This is required for the 3.7 tree to pass on modern macOS. --- Lib/ctypes/test/test_macholib.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Lib/ctypes/test/test_macholib.py b/Lib/ctypes/test/test_macholib.py index 6b3526951acfab..a1bac26a7df058 100644 --- a/Lib/ctypes/test/test_macholib.py +++ b/Lib/ctypes/test/test_macholib.py @@ -45,19 +45,22 @@ def find_lib(name): class MachOTest(unittest.TestCase): @unittest.skipUnless(sys.platform == "darwin", 'OSX-specific test') def test_find(self): - - self.assertEqual(find_lib('pthread'), - '/usr/lib/libSystem.B.dylib') + # On Mac OS 11, system dylibs are only present in the shared cache, + # so symlinks like libpthread.dylib -> libSystem.B.dylib will not + # be resolved by dyld_find + self.assertIn(find_lib('pthread'), + ('/usr/lib/libSystem.B.dylib', '/usr/lib/libpthread.dylib')) result = find_lib('z') # Issue #21093: dyld default search path includes $HOME/lib and # /usr/local/lib before /usr/lib, which caused test failures if # a local copy of libz exists in one of them. Now ignore the head # of the path. - self.assertRegex(result, r".*/lib/libz\..*.*\.dylib") + self.assertRegex(result, r".*/lib/libz.*\.dylib") - self.assertEqual(find_lib('IOKit'), - '/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit') + self.assertIn(find_lib('IOKit'), + ('/System/Library/Frameworks/IOKit.framework/Versions/A/IOKit', + '/System/Library/Frameworks/IOKit.framework/IOKit')) if __name__ == "__main__": unittest.main() From a2956f3dd4fbe68dfa5997b9eb73ae4ff9be833f Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Sep 2022 14:22:35 -0700 Subject: [PATCH 06/21] annotate test_bad_password @requires_zlib. I don't know why, but macOS in 3.7 CI is failing to build the zlib module these days so it's exposing this test that didn't have the proper `@requires_zlib` annotation. Getting it to build with zlib and other things that are now wrongly "missing" in the 3.7 CI setup would be nice, but probably involves invasive backporting of parts of https://github.com/python/cpython/commit/b29d0a5a7811418c0a1082ca188fd4850185e290 by a macOS domain expert. Not worth it. --- Lib/test/test_zipfile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 7e8e8d2c89f080..6a6df859856cdd 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2077,6 +2077,7 @@ def test_no_password(self): self.assertRaises(RuntimeError, self.zip.read, "test.txt") self.assertRaises(RuntimeError, self.zip2.read, "zero") + @requires_zlib def test_bad_password(self): self.zip.setpassword(b"perl") self.assertRaises(RuntimeError, self.zip.read, "test.txt") From 95645b6928f4c9142f9ab0aef64f673d3db92e22 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Sep 2022 15:07:43 -0700 Subject: [PATCH 07/21] disable MachOTest.test_find unless macOS 11+ support is backported. This test also appears to require changes to Lib/ctypes/macholib/dyld.py to work in the existing macOS CI config. I'm just skipping it, backporting that would be a feature. Not going to happen in 3.7. There may be a way to configure macOS CI to use an older macOS and toolchain instead as an alternate option. Someone else can figure that out if so. This branch only lives for another 9 months per https://peps.python.org/pep-0537/ --- Lib/ctypes/test/test_macholib.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/ctypes/test/test_macholib.py b/Lib/ctypes/test/test_macholib.py index a1bac26a7df058..07379104a89477 100644 --- a/Lib/ctypes/test/test_macholib.py +++ b/Lib/ctypes/test/test_macholib.py @@ -32,6 +32,10 @@ # -bob from ctypes.macholib.dyld import dyld_find +try: + from _ctypes import _dyld_shared_cache_contains_path +else: + _dyld_shared_cache_contains_path = None def find_lib(name): possible = ['lib'+name+'.dylib', name+'.dylib', name+'.framework/'+name] @@ -44,6 +48,7 @@ def find_lib(name): class MachOTest(unittest.TestCase): @unittest.skipUnless(sys.platform == "darwin", 'OSX-specific test') + @unittest.skipUnless(_dyld_shared_cache_contains_path, 'macOS 11+ _ctypes support not present.') def test_find(self): # On Mac OS 11, system dylibs are only present in the shared cache, # so symlinks like libpthread.dylib -> libSystem.B.dylib will not From 2cc321ea4bc0ea471011836eb56c46bdd5c2181d Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 15:30:26 -0700 Subject: [PATCH 08/21] Move the whatsnew 3.7.14 text per review. Thanks Ned! --- Doc/whatsnew/3.7.rst | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index ddc7291a62f961..52590cc3460314 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -539,18 +539,6 @@ Other Language Changes the timing of each module import. (Contributed by Victor Stinner in :issue:`31415`.) -* New security feature in 3.7.14: - Converting between :class:`int` and :class:`str` in bases other than 2 - (binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) - now raises a :exc:`ValueError` if the number of digits in string form is - above a limit to avoid potential denial of service attacks due to the - algorithmic complexity. This is a mitigation for `CVE-2020-10735 - `_. - This limit can be configured or disabled by environment variable, command - line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion - length limitation ` documentation. The default limit - is 4300 digits in string form. - New Modules =========== @@ -2614,4 +2602,16 @@ URL by the parser :func:`urllib.parse` preventing such attacks. The removal characters are controlled by a new module level variable ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) - +Notable security feature in 3.7.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. From bc83515f02c45c333770a823ae0f3e1a64180956 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Sep 2022 15:43:40 -0700 Subject: [PATCH 09/21] LOL at my typo --- Lib/ctypes/test/test_macholib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ctypes/test/test_macholib.py b/Lib/ctypes/test/test_macholib.py index 07379104a89477..8b13b3af2239c6 100644 --- a/Lib/ctypes/test/test_macholib.py +++ b/Lib/ctypes/test/test_macholib.py @@ -34,7 +34,7 @@ from ctypes.macholib.dyld import dyld_find try: from _ctypes import _dyld_shared_cache_contains_path -else: +except ImportError: _dyld_shared_cache_contains_path = None def find_lib(name): From 76c9c2b3c0400192ff4f4190cbdc6fb7a45c0024 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 16:57:08 -0700 Subject: [PATCH 10/21] Make the doctest actually run & fix it. --- Doc/library/stdtypes.rst | 4 +++- .../Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 482465825958db..9b520a6be004f7 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4774,6 +4774,8 @@ and the sign are not counted towards the limit. When an operation would exceed the limit, a :exc:`ValueError` is raised:: +.. doctest:: + >>> import sys >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. >>> _ = int('2' * 5432) @@ -4790,7 +4792,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised:: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst index a205fb31ad7b95..f9386b2ac140da 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst @@ -10,5 +10,5 @@ line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion length limitation ` documentation. The default limit is 4300 digits in string form. -Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback from -Victor Stinner, Thomas Wouters, and Steve Dower. +Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback +from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. From e7bc47e884a6ab67f487249a01428716d4fc8960 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 17:21:14 -0700 Subject: [PATCH 11/21] remove a line that prevents doctest error reporting. the 3.8 branch got rid of this line already. it blocks seeing the actual error while testing a doc build! --- Doc/tools/extensions/suspicious.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/tools/extensions/suspicious.py b/Doc/tools/extensions/suspicious.py index fd50f318170b15..9e814fb94d2b56 100644 --- a/Doc/tools/extensions/suspicious.py +++ b/Doc/tools/extensions/suspicious.py @@ -150,7 +150,6 @@ def is_ignored(self, line, lineno, issue): return False def report_issue(self, text, lineno, issue): - if not self.any_issue: self.logger.info() self.any_issue = True self.write_log_entry(lineno, issue, text) if py3: From 0ef7ec0c4f031ccc0fad0a034af61863c0b5b976 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 17:22:01 -0700 Subject: [PATCH 12/21] Fix the docs build. --- Doc/library/stdtypes.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 9b520a6be004f7..b7ee09c1ab3e4e 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4772,7 +4772,7 @@ The limit is applied to the number of digit characters in the input or output string when a non-linear conversion algorithm would be involved. Underscores and the sign are not counted towards the limit. -When an operation would exceed the limit, a :exc:`ValueError` is raised:: +When an operation would exceed the limit, a :exc:`ValueError` is raised: .. doctest:: @@ -4799,7 +4799,9 @@ The default limit is 4300 digits as provided in The lowest limit that can be configured is 640 digits as provided in :data:`sys.int_info.str_digits_check_threshold `. -Verification:: +Verification: + +.. doctest:: >>> import sys >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info From ad13c5070d76c66ebff8dbadfa2313acbf43b6e6 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Thu, 1 Sep 2022 18:16:59 -0700 Subject: [PATCH 13/21] Update the ABI dump with the new private symbols. --- Doc/data/python3.7m.abi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/data/python3.7m.abi b/Doc/data/python3.7m.abi index c42f67ca89a2e1..d5c17dc4373eac 100644 --- a/Doc/data/python3.7m.abi +++ b/Doc/data/python3.7m.abi @@ -1860,6 +1860,7 @@ + @@ -11958,6 +11959,9 @@ + + + From ca92fd2db0e031c24aefcc330d3b6edb5c8310d8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Thu, 1 Sep 2022 21:47:15 -0700 Subject: [PATCH 14/21] Rename the news file to appease the Bedevere bot. --- ...010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Misc/NEWS.d/next/Security/{2022-08-07-16-53.gh-issue-95778.ch010gps.rst => 2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst} (100%) diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst similarity index 100% rename from Misc/NEWS.d/next/Security/2022-08-07-16-53.gh-issue-95778.ch010gps.rst rename to Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst From db48ddca2a1241a80af7ad76cbeb3d72036d76a9 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Fri, 2 Sep 2022 16:12:22 +0000 Subject: [PATCH 15/21] hexadecimal spelling =) --- Doc/library/stdtypes.rst | 6 +++--- Doc/whatsnew/3.7.rst | 2 +- Lib/test/test_ast.py | 2 +- Lib/test/test_compile.py | 2 +- .../2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Python/ast.c | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index b7ee09c1ab3e4e..109629fdf975c0 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4755,7 +4755,7 @@ Integer string conversion length limitation CPython has a global limit for converting between :class:`int` and :class:`str` to mitigate denial of service attacks. This limit *only* applies to decimal or -other non-power-of-two number bases. Hexidecimal, octal, and binary conversions +other non-power-of-two number bases. Hexadecimal, octal, and binary conversions are unlimited. The limit can be configured. The :class:`int` type in CPython is an abitrary length number stored in binary @@ -4792,7 +4792,7 @@ When an operation would exceed the limit, a :exc:`ValueError` is raised: ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. >>> len(hex(i_squared)) 7144 - >>> assert int(hex(i_squared), base=16) == i*i # Hexidecimal is unlimited. + >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. The default limit is 4300 digits as provided in :data:`sys.int_info.default_max_str_digits `. @@ -4877,7 +4877,7 @@ Information about the default and minimum can be found in :attr:`sys.int_info`: encounter an error during parsing, usually at startup time or import time or even at installation time - anytime an up to date ``.pyc`` does not already exist for the code. A workaround for source that contains such large - constants is to convert them to ``0x`` hexidecimal form as it has no limit. + constants is to convert them to ``0x`` hexadecimal form as it has no limit. Test your application thoroughly if you use a low limit. Ensure your tests run with the limit set early via the environment or flag so that it applies diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst index 52590cc3460314..21f96228cf1cd6 100644 --- a/Doc/whatsnew/3.7.rst +++ b/Doc/whatsnew/3.7.rst @@ -2606,7 +2606,7 @@ Notable security feature in 3.7.14 ================================== Converting between :class:`int` and :class:`str` in bases other than 2 -(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index 754c5cab8bb289..63c4207d89bb4d 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -638,7 +638,7 @@ def test_literal_eval_str_int_limit(self): with self.assertRaises(SyntaxError) as err_ctx: ast.literal_eval('3'*4001) self.assertIn('Exceeds the limit ', str(err_ctx.exception)) - self.assertIn(' Consider hexidecimal ', str(err_ctx.exception)) + self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) def test_literal_eval_complex(self): # Issue #4907 diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index c546122c0dc718..b8f04d93ba1bfa 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -200,7 +200,7 @@ def test_int_literals_too_long(self): exc = err_ctx.exception self.assertEqual(exc.lineno, 3) self.assertIn('Exceeds the limit ', str(exc)) - self.assertIn(' Consider hexidecimal ', str(exc)) + self.assertIn(' Consider hexadecimal ', str(exc)) def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index f9386b2ac140da..ea3b85d632e083 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -1,5 +1,5 @@ Converting between :class:`int` and :class:`str` in bases other than 2 -(binary), 4, 8 (octal), 16 (hexidecimal), or 32 such as base 10 (decimal) now +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) now raises a :exc:`ValueError` if the number of digits in string form is above a limit to avoid potential denial of service attacks due to the algorithmic complexity. This is a mitigation for `CVE-2020-10735 diff --git a/Python/ast.c b/Python/ast.c index eddd2914464dbb..83a99e562216b2 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -2151,7 +2151,7 @@ ast_for_atom(struct compiling *c, const node *n) Py_XDECREF(tb); Py_DECREF(type); PyObject *helpful_msg = PyUnicode_FromFormat( - "%S - Consider hexidecimal for huge integer literals " + "%S - Consider hexadecimal for huge integer literals " "to avoid decimal conversion limits.", value); if (helpful_msg) { From 38ec6a9632db77af9867e41d6663f55cf0c44109 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google]" Date: Fri, 2 Sep 2022 17:05:35 +0000 Subject: [PATCH 16/21] Work around Windows Yield macro vs Python-ast.h --- Python/ast.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Python/ast.c b/Python/ast.c index 83a99e562216b2..d67e2b207af234 100644 --- a/Python/ast.c +++ b/Python/ast.c @@ -9,7 +9,11 @@ #include "ast.h" #include "token.h" #include "pythonrun.h" +/* A Windows header defines its own Yield macro, so we don't use the one + * from Python-ast.h and instead call _Py_Yield() directly. [ugh] */ +#undef Yield #include "internal/pystate.h" +#undef Yield #include #include @@ -2703,7 +2707,7 @@ ast_for_expr(struct compiling *c, const node *n) } if (is_from) return YieldFrom(exp, LINENO(n), n->n_col_offset, c->c_arena); - return Yield(exp, LINENO(n), n->n_col_offset, c->c_arena); + return _Py_Yield(exp, LINENO(n), n->n_col_offset, c->c_arena); } case factor: if (NCH(n) == 1) { From feaded83b5bc15efde244bc70fd0b5775d50e9aa Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:17:22 -0700 Subject: [PATCH 17/21] doc typo: limitation https://github.com/python/cpython/pull/96542 --- Doc/library/stdtypes.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 109629fdf975c0..ffe4c94f0cb0d3 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -4816,7 +4816,7 @@ Verification: Affected APIs ------------- -The limition only applies to potentially slow conversions between :class:`int` +The limitation only applies to potentially slow conversions between :class:`int` and :class:`str` or :class:`bytes`: * ``int(string)`` with default base 10. From c9f2c575d264692200bd7cb2a3c501058c008242 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:00:58 -0700 Subject: [PATCH 18/21] Misc: Fix a typo in the header comment. --- Include/internal/pycore_long.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index f509fe2ee90386..ae04332a7a84ce 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -15,9 +15,9 @@ extern "C" { * everyone's existing deployed numpy test suite passes before * https://github.com/numpy/numpy/issues/22098 is widely available. * - * $ python -m timeit -s 's = * "1"*4300' 'int(s)' + * $ python -m timeit -s 's = "1"*4300' 'int(s)' * 2000 loops, best of 5: 125 usec per loop - * $ python -m timeit -s 's = * "1"*4300; v = int(s)' 'str(v)' + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' * 1000 loops, best of 5: 311 usec per loop * (zen2 cloud VM) * From f69b587c4248cc0e2798134f2b5083e60f7f6e0e Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sat, 3 Sep 2022 23:35:01 -0700 Subject: [PATCH 19/21] remove unneeded doc note on float.as_integer_ratio Per mdickinson@'s comment on the main branch PR. --- Doc/library/stdtypes.rst | 7 ------- 1 file changed, 7 deletions(-) diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index ffe4c94f0cb0d3..36e0bed90d4475 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -556,13 +556,6 @@ class`. float also has the following additional methods. :exc:`OverflowError` on infinities and a :exc:`ValueError` on NaNs. - .. note:: - - The values returned by ``as_integer_ratio()`` can be huge. Attempts - to render such integers into decimal strings may bump into the - :ref:`integer string conversion length limitation - `. - .. method:: float.is_integer() Return ``True`` if the float instance is finite with integral From 39837b68cf67c3ba7f3c790672117dad7fc52e71 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 4 Sep 2022 17:21:18 +0100 Subject: [PATCH 20/21] gh-95778: Correctly pre-check for int-to-str conversion (#96537) Converting a large enough `int` to a decimal string raises `ValueError` as expected. However, the raise comes _after_ the quadratic-time base-conversion algorithm has run to completion. For effective DOS prevention, we need some kind of check before entering the quadratic-time loop. Oops! =) The quick fix: essentially we catch _most_ values that exceed the threshold up front. Those that slip through will still be on the small side (read: sufficiently fast), and will get caught by the existing check so that the limit remains exact. The justification for the current check. The C code check is: ```c max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10 ``` In GitHub markdown math-speak, writing $M$ for `max_str_digits`, $L$ for `PyLong_SHIFT` and $s$ for `size_a`, that check is: $$\left\lfloor\frac{M}{3L}\right\rfloor \le \left\lfloor\frac{s - 11}{10}\right\rfloor$$ From this it follows that $$\frac{M}{3L} < \frac{s-1}{10}$$ hence that $$\frac{L(s-1)}{M} > \frac{10}{3} > \log_2(10).$$ So $$2^{L(s-1)} > 10^M.$$ But our input integer $a$ satisfies $|a| \ge 2^{L(s-1)}$, so $|a|$ is larger than $10^M$. This shows that we don't accidentally capture anything _below_ the intended limit in the check. * Issue: gh-95778 Co-authored-by: Gregory P. Smith [Google LLC] --- Lib/test/test_int.py | 82 +++++++++++++++++++ ...08-07-16-53-38.gh-issue-95778.ch010gps.rst | 2 +- Objects/longobject.c | 26 +++++- 3 files changed, 105 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 493bc093de97f2..98ba847e7d003f 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,5 @@ import sys +import time import unittest from test import support @@ -571,6 +572,87 @@ def test_max_str_digits(self): with self.assertRaises(ValueError): str(i) + def test_denial_of_service_prevented_int_to_str(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 50_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. + digits = 78_268 + with support.adjust_int_max_str_digits(digits): + start = get_time() + huge_decimal = str(huge_int) + seconds_to_convert = get_time() - start + self.assertEqual(len(huge_decimal), digits) + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + # We test with the limit almost at the size needed to check performance. + # The performant limit check is slightly fuzzy, give it a some room. + with support.adjust_int_max_str_digits(int(.995 * digits)): + with self.assertRaises(ValueError) as err: + start = get_time() + str(huge_int) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds said Zen based cloud VM. + str(extra_huge_int) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_denial_of_service_prevented_str_to_int(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 100_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + digits = 133700 + huge = '8'*digits + with support.adjust_int_max_str_digits(digits): + start = get_time() + int(huge) + seconds_to_convert = get_time() - start + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + with support.adjust_int_max_str_digits(digits - 1): + with self.assertRaises(ValueError) as err: + start = get_time() + int(huge) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge = '7'*1_200_000 + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds in the Zen based cloud VM. + int(extra_huge) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" maxdigits = sys.get_int_max_str_digits() diff --git a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst index ea3b85d632e083..8eb8a34884dced 100644 --- a/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst +++ b/Misc/NEWS.d/next/Security/2022-08-07-16-53-38.gh-issue-95778.ch010gps.rst @@ -11,4 +11,4 @@ limitation ` documentation. The default limit is 4300 digits in string form. Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with feedback -from Victor Stinner, Thomas Wouters, Steve Dower, and Ned Deily. +from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and Mark Dickinson. diff --git a/Objects/longobject.c b/Objects/longobject.c index a363692d823842..250a8207d81ac3 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -47,7 +47,8 @@ static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; Py_ssize_t quick_int_allocs, quick_neg_int_allocs; #endif -#define _MAX_STR_DIGITS_ERROR_FMT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" static PyObject * get_small_int(sdigit ival) @@ -1606,6 +1607,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1670,8 +1688,8 @@ long_to_decimal_string_internal(PyObject *aa, Py_ssize_t strlen_nosign = strlen - negative; if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { Py_DECREF(scratch); - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, - max_str_digits, strlen_nosign); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); return -1; } } @@ -2344,7 +2362,7 @@ digit beyond the first. if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { int max_str_digits = _PyRuntime.int_max_str_digits; if ((max_str_digits > 0) && (digits > max_str_digits)) { - PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT, + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, max_str_digits, digits); return NULL; } From 7f911c1058acb6571998b9098598f87cd5a05dac Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith [Google LLC]" Date: Sun, 4 Sep 2022 09:53:54 -0700 Subject: [PATCH 21/21] backport cherry pick fix: lookup max from the right place. --- Objects/longobject.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Objects/longobject.c b/Objects/longobject.c index 250a8207d81ac3..a481a16eb01ca1 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -1614,8 +1614,7 @@ long_to_decimal_string_internal(PyObject *aa, */ if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD / (3 * PyLong_SHIFT) + 2) { - PyInterpreterState *interp = _PyInterpreterState_GET(); - int max_str_digits = interp->int_max_str_digits; + int max_str_digits = _PyRuntime.int_max_str_digits; if ((max_str_digits > 0) && (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR,