From e55f80a2582b43195765f60d1f259047c7b9430f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Sun, 23 Jul 2023 17:37:48 +0200 Subject: [PATCH] gh-107137: Add _PyTupleBuilder API to the internal C API Add _PyTupleBuilder structure and functions: * _PyTupleBuilder_Init() * _PyTupleBuilder_Alloc() * _PyTupleBuilder_Append() * _PyTupleBuilder_AppendUnsafe() * _PyTupleBuilder_Finish() * _PyTupleBuilder_Dealloc() The builder tracks the size of the tuple and resize it in _PyTupleBuilder_Finish() if needed. Don't allocate empty tuple. Allocate an array of 16 objects on the stack to avoid allocating small tuple. _PyTupleBuilder_Append() overallocates the tuple by 25% to reduce the number of _PyTuple_Resize() calls. Do no track the temporary internal tuple by the GC before _PyTupleBuilder_Finish() creates the final complete and consistent tuple object. Use _PyTupleBuilder API in itertools batched_traverse(), PySequence_Tuple() and initialize_structseq_dict(). Add also helper functions: * _PyTuple_ResizeNoTrack() * _PyTuple_NewNoTrack() --- Include/internal/pycore_tuple.h | 138 ++++++++++++++++++++++++++++++++ Modules/itertoolsmodule.c | 57 ++++++------- Objects/abstract.c | 70 ++++++---------- Objects/structseq.c | 23 +++--- Objects/tupleobject.c | 50 +++++++++--- 5 files changed, 241 insertions(+), 97 deletions(-) diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h index 4fa7a12206bcb2..2fed7f37c5774f 100644 --- a/Include/internal/pycore_tuple.h +++ b/Include/internal/pycore_tuple.h @@ -10,6 +10,8 @@ extern "C" { extern void _PyTuple_MaybeUntrack(PyObject *); extern void _PyTuple_DebugMallocStats(FILE *out); +extern PyObject* _PyTuple_NewNoTrack(Py_ssize_t size); +extern int _PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize); /* runtime lifecycle */ @@ -73,6 +75,142 @@ typedef struct { PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */ } _PyTupleIterObject; + +// --- _PyTupleBuilder API --------------------------------------------------- + +typedef struct _PyTupleBuilder { + PyObject* small_tuple[16]; + PyObject *tuple; + PyObject **items; + size_t size; + size_t allocated; +} _PyTupleBuilder; + +static inline int +_PyTupleBuilder_Alloc(_PyTupleBuilder *builder, size_t size) +{ + if (size > (size_t)PY_SSIZE_T_MAX) { + /* Check for overflow */ + PyErr_NoMemory(); + return -1; + } + if (size <= builder->allocated) { + return 0; + } + + if (size <= Py_ARRAY_LENGTH(builder->small_tuple)) { + assert(builder->tuple == NULL); + builder->items = builder->small_tuple; + builder->allocated = Py_ARRAY_LENGTH(builder->small_tuple); + return 0; + } + + assert(size >= 1); + if (builder->tuple != NULL) { + if (_PyTuple_ResizeNoTrack(&builder->tuple, (Py_ssize_t)size) < 0) { + return -1; + } + } + else { + builder->tuple = _PyTuple_NewNoTrack((Py_ssize_t)size); + if (builder->tuple == NULL) { + return -1; + } + + if (builder->size > 0) { + memcpy(_PyTuple_ITEMS(builder->tuple), + builder->small_tuple, + builder->size * sizeof(builder->small_tuple[0])); + } + } + builder->items = _PyTuple_ITEMS(builder->tuple); + builder->allocated = size; + return 0; +} + +static inline int +_PyTupleBuilder_Init(_PyTupleBuilder *builder, Py_ssize_t size) +{ + memset(builder, 0, sizeof(*builder)); + + int res; + if (size > 0) { + res = _PyTupleBuilder_Alloc(builder, (size_t)size); + } + else { + res = 0; + } + return res; +} + +// The tuple builder must have already enough allocated items to store item. +static inline void +_PyTupleBuilder_AppendUnsafe(_PyTupleBuilder *builder, PyObject *item) +{ + assert(builder->items != NULL); + assert(builder->size < builder->allocated); + builder->items[builder->size] = item; + builder->size++; +} + +static inline int +_PyTupleBuilder_Append(_PyTupleBuilder *builder, PyObject *item) +{ + if (builder->size >= (size_t)PY_SSIZE_T_MAX) { + // prevent integer overflow + PyErr_NoMemory(); + return -1; + } + if (builder->size >= builder->allocated) { + size_t allocated = builder->size; + allocated += (allocated >> 2); // Over-allocate by 25% + if (_PyTupleBuilder_Alloc(builder, allocated) < 0) { + return -1; + } + } + _PyTupleBuilder_AppendUnsafe(builder, item); + return 0; +} + +static inline void +_PyTupleBuilder_Dealloc(_PyTupleBuilder *builder) +{ + Py_CLEAR(builder->tuple); + builder->items = NULL; + builder->size = 0; + builder->allocated = 0; +} + +static inline PyObject* +_PyTupleBuilder_Finish(_PyTupleBuilder *builder) +{ + if (builder->size == 0) { + _PyTupleBuilder_Dealloc(builder); + // return the empty tuple singleton + return PyTuple_New(0); + } + + if (builder->tuple != NULL) { + if (_PyTuple_ResizeNoTrack(&builder->tuple, (Py_ssize_t)builder->size) < 0) { + _PyTupleBuilder_Dealloc(builder); + return NULL; + } + + PyObject *result = builder->tuple; + builder->tuple = NULL; + // Avoid _PyObject_GC_TRACK() to avoid including pycore_object.h + PyObject_GC_Track(result); + return result; + } + else { + PyObject *tuple = _PyTuple_FromArraySteal(builder->items, + (Py_ssize_t)builder->size); + builder->size = 0; + return tuple; + } +} + + #ifdef __cplusplus } #endif diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index f5f7bf33bf8f4b..cd0c7a5a3a4567 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -4,7 +4,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_typeobject.h" // _PyType_GetModuleState() #include "pycore_object.h" // _PyObject_GC_TRACK() -#include "pycore_tuple.h" // _PyTuple_ITEMS() +#include "pycore_tuple.h" // _PyTupleBuilder #include "structmember.h" // PyMemberDef #include // offsetof() @@ -193,47 +193,42 @@ batched_traverse(batchedobject *bo, visitproc visit, void *arg) static PyObject * batched_next(batchedobject *bo) { - Py_ssize_t i; - Py_ssize_t n = bo->batch_size; PyObject *it = bo->it; - PyObject *item; - PyObject *result; - if (it == NULL) { return NULL; } - result = PyTuple_New(n); - if (result == NULL) { + + _PyTupleBuilder builder; + Py_ssize_t n = bo->batch_size; + if (_PyTupleBuilder_Init(&builder, n) < 0) { return NULL; } + iternextfunc iternext = *Py_TYPE(it)->tp_iternext; - PyObject **items = _PyTuple_ITEMS(result); - for (i=0 ; i < n ; i++) { - item = iternext(it); + for (Py_ssize_t i=0 ; i < n; i++) { + PyObject *item = iternext(it); if (item == NULL) { - goto null_item; + if (PyErr_Occurred()) { + if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { + /* Input raised an exception other than StopIteration */ + goto error; + } + PyErr_Clear(); + // StopIteration was raised + } + if (i == 0) { + goto error; + } + break; } - items[i] = item; + _PyTupleBuilder_AppendUnsafe(&builder, item); } - return result; + return _PyTupleBuilder_Finish(&builder); - null_item: - if (PyErr_Occurred()) { - if (!PyErr_ExceptionMatches(PyExc_StopIteration)) { - /* Input raised an exception other than StopIteration */ - Py_CLEAR(bo->it); - Py_DECREF(result); - return NULL; - } - PyErr_Clear(); - } - if (i == 0) { - Py_CLEAR(bo->it); - Py_DECREF(result); - return NULL; - } - _PyTuple_Resize(&result, i); - return result; +error: + _PyTupleBuilder_Dealloc(&builder); + Py_CLEAR(bo->it); + return NULL; } static PyType_Slot batched_slots[] = { diff --git a/Objects/abstract.c b/Objects/abstract.c index b4edcec6007710..c70fdf470183a7 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -2074,11 +2074,6 @@ PySequence_DelSlice(PyObject *s, Py_ssize_t i1, Py_ssize_t i2) PyObject * PySequence_Tuple(PyObject *v) { - PyObject *it; /* iter(v) */ - Py_ssize_t n; /* guess for result tuple size */ - PyObject *result = NULL; - Py_ssize_t j; - if (v == NULL) { return null_error(); } @@ -2091,66 +2086,53 @@ PySequence_Tuple(PyObject *v) a copy, so there's no need for exactness below. */ return Py_NewRef(v); } - if (PyList_CheckExact(v)) + if (PyList_CheckExact(v)) { return PyList_AsTuple(v); + } - /* Get iterator. */ - it = PyObject_GetIter(v); - if (it == NULL) + _PyTupleBuilder builder; + if (_PyTupleBuilder_Init(&builder, 0) < 0) { return NULL; + } + + /* Get iterator. */ + PyObject *it = PyObject_GetIter(v); // iter(v) + if (it == NULL) { + goto Fail; + } /* Guess result size and allocate space. */ - n = PyObject_LengthHint(v, 10); - if (n == -1) + Py_ssize_t n = PyObject_LengthHint(v, 10); // Guess for result tuple size + if (n == -1) { goto Fail; - result = PyTuple_New(n); - if (result == NULL) + } + if (_PyTupleBuilder_Alloc(&builder, n) < 0) { goto Fail; + } /* Fill the tuple. */ + Py_ssize_t j; for (j = 0; ; ++j) { PyObject *item = PyIter_Next(it); if (item == NULL) { - if (PyErr_Occurred()) + if (PyErr_Occurred()) { goto Fail; + } break; } - if (j >= n) { - size_t newn = (size_t)n; - /* The over-allocation strategy can grow a bit faster - than for lists because unlike lists the - over-allocation isn't permanent -- we reclaim - the excess before the end of this routine. - So, grow by ten and then add 25%. - */ - newn += 10u; - newn += newn >> 2; - if (newn > PY_SSIZE_T_MAX) { - /* Check for overflow */ - PyErr_NoMemory(); - Py_DECREF(item); - goto Fail; - } - n = (Py_ssize_t)newn; - if (_PyTuple_Resize(&result, n) != 0) { - Py_DECREF(item); - goto Fail; - } + + if (_PyTupleBuilder_Append(&builder, item) < 0) { + Py_DECREF(item); + goto Fail; } - PyTuple_SET_ITEM(result, j, item); } - /* Cut tuple back if guess was too large. */ - if (j < n && - _PyTuple_Resize(&result, j) != 0) - goto Fail; - Py_DECREF(it); - return result; + return _PyTupleBuilder_Finish(&builder); Fail: - Py_XDECREF(result); - Py_DECREF(it); + _PyTupleBuilder_Dealloc(&builder); + Py_XDECREF(it); return NULL; } diff --git a/Objects/structseq.c b/Objects/structseq.c index 49011139b66534..695d14bf470aed 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -390,11 +390,9 @@ count_members(PyStructSequence_Desc *desc, Py_ssize_t *n_unnamed_members) { static int initialize_structseq_dict(PyStructSequence_Desc *desc, PyObject* dict, Py_ssize_t n_members, Py_ssize_t n_unnamed_members) { - PyObject *v; - #define SET_DICT_FROM_SIZE(key, value) \ do { \ - v = PyLong_FromSsize_t(value); \ + PyObject *v = PyLong_FromSsize_t(value); \ if (v == NULL) { \ return -1; \ } \ @@ -410,13 +408,12 @@ initialize_structseq_dict(PyStructSequence_Desc *desc, PyObject* dict, SET_DICT_FROM_SIZE(unnamed_fields_key, n_unnamed_members); // Prepare and set __match_args__ - Py_ssize_t i, k; - PyObject* keys = PyTuple_New(desc->n_in_sequence); - if (keys == NULL) { + _PyTupleBuilder keys_builder; + if (_PyTupleBuilder_Init(&keys_builder, desc->n_in_sequence) < 0) { return -1; } - for (i = k = 0; i < desc->n_in_sequence; ++i) { + for (Py_ssize_t i = 0; i < desc->n_in_sequence; ++i) { if (desc->fields[i].name == PyStructSequence_UnnamedField) { continue; } @@ -424,23 +421,23 @@ initialize_structseq_dict(PyStructSequence_Desc *desc, PyObject* dict, if (new_member == NULL) { goto error; } - PyTuple_SET_ITEM(keys, k, new_member); - k++; + _PyTupleBuilder_AppendUnsafe(&keys_builder, new_member); } - if (_PyTuple_Resize(&keys, k) == -1) { + PyObject *keys = _PyTupleBuilder_Finish(&keys_builder); + if (keys == NULL) { goto error; } - if (PyDict_SetItemString(dict, match_args_key, keys) < 0) { + Py_DECREF(keys); goto error; } - Py_DECREF(keys); + return 0; error: - Py_DECREF(keys); + _PyTupleBuilder_Dealloc(&keys_builder); return -1; } diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index c3ff40fdb14c60..85d58ab3013503 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -66,23 +66,35 @@ tuple_get_empty(void) } PyObject * -PyTuple_New(Py_ssize_t size) +_PyTuple_NewNoTrack(Py_ssize_t size) { - PyTupleObject *op; if (size == 0) { return tuple_get_empty(); } - op = tuple_alloc(size); + PyTupleObject *op = tuple_alloc(size); if (op == NULL) { return NULL; } for (Py_ssize_t i = 0; i < size; i++) { op->ob_item[i] = NULL; } - _PyObject_GC_TRACK(op); return (PyObject *) op; } +PyObject * +PyTuple_New(Py_ssize_t size) +{ + if (size == 0) { + return tuple_get_empty(); + } + PyObject *op = _PyTuple_NewNoTrack(size); + if (op == NULL) { + return NULL; + } + _PyObject_GC_TRACK(op); + return op; +} + Py_ssize_t PyTuple_Size(PyObject *op) { @@ -894,8 +906,8 @@ PyTypeObject PyTuple_Type = { efficiently. In any case, don't use this if the tuple may already be known to some other part of the code. */ -int -_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) +static int +tuple_resize(PyObject **pv, Py_ssize_t newsize, int track) { PyTupleObject *v; PyTupleObject *sv; @@ -927,7 +939,12 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) /* The empty tuple is statically allocated so we never resize it in-place. */ Py_DECREF(v); - *pv = PyTuple_New(newsize); + if (track) { + *pv = PyTuple_New(newsize); + } + else { + *pv = _PyTuple_NewNoTrack(newsize); + } return *pv == NULL ? -1 : 0; } @@ -952,14 +969,29 @@ _PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) } _Py_NewReferenceNoTotal((PyObject *) sv); /* Zero out items added by growing */ - if (newsize > oldsize) + if (newsize > oldsize) { memset(&sv->ob_item[oldsize], 0, sizeof(*sv->ob_item) * (newsize - oldsize)); + } *pv = (PyObject *) sv; - _PyObject_GC_TRACK(sv); + if (track) { + _PyObject_GC_TRACK(*pv); + } return 0; } +int +_PyTuple_ResizeNoTrack(PyObject **pv, Py_ssize_t newsize) +{ + return tuple_resize(pv, newsize, 0); +} + +int +_PyTuple_Resize(PyObject **pv, Py_ssize_t newsize) +{ + return tuple_resize(pv, newsize, 1); +} + static void maybe_freelist_clear(PyInterpreterState *, int);