From 8dbe08eb7c807f484fe9870f5b7f5ae2881fd966 Mon Sep 17 00:00:00 2001 From: colorfulappl Date: Thu, 24 Nov 2022 22:01:26 +0800 Subject: [PATCH] gh-99240: Fix double-free bug in Argument Clinic str_converter generated code (GH-99241) Fix double-free bug mentioned at https://github.com/python/cpython/issues/99240, by moving memory clean up out of "exit" label. Automerge-Triggered-By: GH:erlend-aasland --- Lib/test/clinic.test | 33 +++----- Lib/test/test_clinic.py | 15 ++++ ...2-11-08-15-54-43.gh-issue-99240.MhYwcz.rst | 2 + Modules/_testclinic.c | 79 +++++++++++++++++++ Modules/clinic/_testclinic.c.h | 72 ++++++++++++++++- Tools/clinic/clinic.py | 25 +++++- 6 files changed, 201 insertions(+), 25 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index 7b804e8576aedc..f4842cc962f142 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1740,29 +1740,18 @@ test_str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t goto exit; } return_value = test_str_converter_encoding_impl(module, a, b, c, d, d_length, e, e_length); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + /* Post parse cleanup for c */ + PyMem_FREE(c); + /* Post parse cleanup for d */ + PyMem_FREE(d); + /* Post parse cleanup for e */ + PyMem_FREE(e); exit: - /* Cleanup for a */ - if (a) { - PyMem_FREE(a); - } - /* Cleanup for b */ - if (b) { - PyMem_FREE(b); - } - /* Cleanup for c */ - if (c) { - PyMem_FREE(c); - } - /* Cleanup for d */ - if (d) { - PyMem_FREE(d); - } - /* Cleanup for e */ - if (e) { - PyMem_FREE(e); - } - return return_value; } @@ -1770,7 +1759,7 @@ static PyObject * test_str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, char *d, Py_ssize_t d_length, char *e, Py_ssize_t e_length) -/*[clinic end generated code: output=8acb886a3843f3bc input=eb4c38e1f898f402]*/ +/*[clinic end generated code: output=999c1deecfa15b0a input=eb4c38e1f898f402]*/ /*[clinic input] diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py index a590fa50aab04f..890beeb9efe29f 100644 --- a/Lib/test/test_clinic.py +++ b/Lib/test/test_clinic.py @@ -1045,6 +1045,17 @@ def test_str_converter(self): self.assertEqual(ac_tester.str_converter('a', b'b', b'c'), ('a', 'b', 'c')) self.assertEqual(ac_tester.str_converter('a', b'b', 'c\0c'), ('a', 'b', 'c\0c')) + def test_str_converter_encoding(self): + with self.assertRaises(TypeError): + ac_tester.str_converter_encoding(1) + self.assertEqual(ac_tester.str_converter_encoding('a', 'b', 'c'), ('a', 'b', 'c')) + with self.assertRaises(TypeError): + ac_tester.str_converter_encoding('a', b'b\0b', 'c') + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c')])), ('a', 'b', 'c')) + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', bytearray([ord('c'), 0, ord('c')])), + ('a', 'b', 'c\x00c')) + self.assertEqual(ac_tester.str_converter_encoding('a', b'b', b'c\x00c'), ('a', 'b', 'c\x00c')) + def test_py_buffer_converter(self): with self.assertRaises(TypeError): ac_tester.py_buffer_converter('a', 'b') @@ -1225,6 +1236,10 @@ def test_gh_99233_refcount(self): arg_refcount_after = sys.getrefcount(arg) self.assertEqual(arg_refcount_origin, arg_refcount_after) + def test_gh_99240_double_free(self): + expected_error = r'gh_99240_double_free\(\) argument 2 must be encoded string without null bytes, not str' + with self.assertRaisesRegex(TypeError, expected_error): + ac_tester.gh_99240_double_free('a', '\0b') if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst new file mode 100644 index 00000000000000..0a4db052755f87 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-11-08-15-54-43.gh-issue-99240.MhYwcz.rst @@ -0,0 +1,2 @@ +Fix double-free bug in Argument Clinic ``str_converter`` by +extracting memory clean up to a new ``post_parsing`` section. diff --git a/Modules/_testclinic.c b/Modules/_testclinic.c index a23ece2ae0355b..56eddfd6fdbf2d 100644 --- a/Modules/_testclinic.c +++ b/Modules/_testclinic.c @@ -551,6 +551,64 @@ str_converter_impl(PyObject *module, const char *a, const char *b, } +/*[clinic input] +str_converter_encoding + + a: str(encoding="idna") + b: str(encoding="idna", accept={bytes, bytearray, str}) + c: str(encoding="idna", accept={bytes, bytearray, str}, zeroes=True) + / + +[clinic start generated code]*/ + +static PyObject * +str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, + Py_ssize_t c_length) +/*[clinic end generated code: output=af68766049248a1c input=0c5cf5159d0e870d]*/ +{ + assert(!PyErr_Occurred()); + PyObject *out[3] = {NULL,}; + int i = 0; + PyObject *arg; + + arg = PyUnicode_FromString(a); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + arg = PyUnicode_FromString(b); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + arg = PyUnicode_FromStringAndSize(c, c_length); + assert(arg || PyErr_Occurred()); + if (!arg) { + goto error; + } + out[i++] = arg; + + PyObject *tuple = PyTuple_New(3); + if (!tuple) { + goto error; + } + for (int j = 0; j < 3; j++) { + PyTuple_SET_ITEM(tuple, j, out[j]); + } + return tuple; + +error: + for (int j = 0; j < i; j++) { + Py_DECREF(out[j]); + } + return NULL; +} + + static PyObject * bytes_from_buffer(Py_buffer *buf) { @@ -927,6 +985,25 @@ gh_99233_refcount_impl(PyObject *module, PyObject *args) } +/*[clinic input] +gh_99240_double_free + + a: str(encoding="idna") + b: str(encoding="idna") + / + +Proof-of-concept of GH-99240 double-free bug. + +[clinic start generated code]*/ + +static PyObject * +gh_99240_double_free_impl(PyObject *module, char *a, char *b) +/*[clinic end generated code: output=586dc714992fe2ed input=23db44aa91870fc7]*/ +{ + Py_RETURN_NONE; +} + + static PyMethodDef tester_methods[] = { TEST_EMPTY_FUNCTION_METHODDEF OBJECTS_CONVERTER_METHODDEF @@ -951,6 +1028,7 @@ static PyMethodDef tester_methods[] = { DOUBLE_CONVERTER_METHODDEF PY_COMPLEX_CONVERTER_METHODDEF STR_CONVERTER_METHODDEF + STR_CONVERTER_ENCODING_METHODDEF PY_BUFFER_CONVERTER_METHODDEF KEYWORDS_METHODDEF KEYWORDS_KWONLY_METHODDEF @@ -970,6 +1048,7 @@ static PyMethodDef tester_methods[] = { KEYWORD_ONLY_PARAMETER_METHODDEF VARARG_AND_POSONLY_METHODDEF GH_99233_REFCOUNT_METHODDEF + GH_99240_DOUBLE_FREE_METHODDEF {NULL, NULL} }; diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index eb425821e9cb3f..9aad44566bcde0 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -1165,6 +1165,43 @@ str_converter(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(str_converter_encoding__doc__, +"str_converter_encoding($module, a, b, c, /)\n" +"--\n" +"\n"); + +#define STR_CONVERTER_ENCODING_METHODDEF \ + {"str_converter_encoding", _PyCFunction_CAST(str_converter_encoding), METH_FASTCALL, str_converter_encoding__doc__}, + +static PyObject * +str_converter_encoding_impl(PyObject *module, char *a, char *b, char *c, + Py_ssize_t c_length); + +static PyObject * +str_converter_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + char *a = NULL; + char *b = NULL; + char *c = NULL; + Py_ssize_t c_length; + + if (!_PyArg_ParseStack(args, nargs, "esetet#:str_converter_encoding", + "idna", &a, "idna", &b, "idna", &c, &c_length)) { + goto exit; + } + return_value = str_converter_encoding_impl(module, a, b, c, c_length); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + /* Post parse cleanup for c */ + PyMem_FREE(c); + +exit: + return return_value; +} + PyDoc_STRVAR(py_buffer_converter__doc__, "py_buffer_converter($module, a, b, /)\n" "--\n" @@ -2353,4 +2390,37 @@ gh_99233_refcount(PyObject *module, PyObject *const *args, Py_ssize_t nargs) Py_XDECREF(__clinic_args); return return_value; } -/*[clinic end generated code: output=a5c9f181f3a32d85 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(gh_99240_double_free__doc__, +"gh_99240_double_free($module, a, b, /)\n" +"--\n" +"\n" +"Proof-of-concept of GH-99240 double-free bug."); + +#define GH_99240_DOUBLE_FREE_METHODDEF \ + {"gh_99240_double_free", _PyCFunction_CAST(gh_99240_double_free), METH_FASTCALL, gh_99240_double_free__doc__}, + +static PyObject * +gh_99240_double_free_impl(PyObject *module, char *a, char *b); + +static PyObject * +gh_99240_double_free(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + char *a = NULL; + char *b = NULL; + + if (!_PyArg_ParseStack(args, nargs, "eses:gh_99240_double_free", + "idna", &a, "idna", &b)) { + goto exit; + } + return_value = gh_99240_double_free_impl(module, a, b); + /* Post parse cleanup for a */ + PyMem_FREE(a); + /* Post parse cleanup for b */ + PyMem_FREE(b); + +exit: + return return_value; +} +/*[clinic end generated code: output=49dced2c99bcd0fb input=a9049054013a1b77]*/ diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 94e17ee9c7dfda..0117a50725da58 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -348,6 +348,12 @@ def __init__(self): # "goto exit" if there are any. self.return_conversion = [] + # The C statements required to do some operations + # after the end of parsing but before cleaning up. + # These operations may be, for example, memory deallocations which + # can only be done without any error happening during argument parsing. + self.post_parsing = [] + # The C statements required to clean up after the impl call. self.cleanup = [] @@ -820,6 +826,7 @@ def parser_body(prototype, *fields, declarations=''): {modifications} {return_value} = {c_basename}_impl({impl_arguments}); {return_conversion} + {post_parsing} {exit_label} {cleanup} @@ -1460,6 +1467,7 @@ def render_function(self, clinic, f): template_dict['impl_parameters'] = ", ".join(data.impl_parameters) template_dict['impl_arguments'] = ", ".join(data.impl_arguments) template_dict['return_conversion'] = format_escape("".join(data.return_conversion).rstrip()) + template_dict['post_parsing'] = format_escape("".join(data.post_parsing).rstrip()) template_dict['cleanup'] = format_escape("".join(data.cleanup)) template_dict['return_value'] = data.return_value @@ -1484,6 +1492,7 @@ def render_function(self, clinic, f): return_conversion=template_dict['return_conversion'], initializers=template_dict['initializers'], modifications=template_dict['modifications'], + post_parsing=template_dict['post_parsing'], cleanup=template_dict['cleanup'], ) @@ -2725,6 +2734,10 @@ def _render_non_self(self, parameter, data): # parse_arguments self.parse_argument(data.parse_arguments) + # post_parsing + if post_parsing := self.post_parsing(): + data.post_parsing.append('/* Post parse cleanup for ' + name + ' */\n' + post_parsing.rstrip() + '\n') + # cleanup cleanup = self.cleanup() if cleanup: @@ -2820,6 +2833,14 @@ def modify(self): """ return "" + def post_parsing(self): + """ + The C statements required to do some operations after the end of parsing but before cleaning up. + Return a string containing this code indented at column 0. + If no operation is necessary, return an empty string. + """ + return "" + def cleanup(self): """ The C statements required to clean up after this variable. @@ -3416,10 +3437,10 @@ def converter_init(self, *, accept={str}, encoding=None, zeroes=False): if NoneType in accept and self.c_default == "Py_None": self.c_default = "NULL" - def cleanup(self): + def post_parsing(self): if self.encoding: name = self.name - return "".join(["if (", name, ") {\n PyMem_FREE(", name, ");\n}\n"]) + return f"PyMem_FREE({name});\n" def parse_arg(self, argname, displayname): if self.format_unit == 's':