From a2a0e6fd920e454df073d9efb66633425549347c Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 12 May 2020 18:00:29 +0200 Subject: [PATCH 01/12] Remove static state from the _csv module Uses code from: https://github.com/python/cpython/pull/16078 Co-authored-by: Marcel Plch Co-authored-by: Eddie Elizondo Co-authored-by: Hai Shi --- ...2020-10-20-23-28-55.bpo-1635741.Iyka3r.rst | 1 + Modules/_csv.c | 512 ++++++++++-------- 2 files changed, 294 insertions(+), 219 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-10-20-23-28-55.bpo-1635741.Iyka3r.rst diff --git a/Misc/NEWS.d/next/Library/2020-10-20-23-28-55.bpo-1635741.Iyka3r.rst b/Misc/NEWS.d/next/Library/2020-10-20-23-28-55.bpo-1635741.Iyka3r.rst new file mode 100644 index 00000000000000..e14e9a12c275dd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-10-20-23-28-55.bpo-1635741.Iyka3r.rst @@ -0,0 +1 @@ +Port the _csv module to the multi-phase initialization API (:pep:`489`). diff --git a/Modules/_csv.c b/Modules/_csv.c index 594f6c14727262..e1a6a22e1269f1 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -18,9 +18,14 @@ module instead. typedef struct { PyObject *error_obj; /* CSV exception */ PyObject *dialects; /* Dialect registry */ + PyTypeObject *dialect_type; + PyTypeObject *reader_type; + PyTypeObject *writer_type; long field_limit; /* max parsed field size */ } _csvstate; +static struct PyModuleDef _csvmodule; + static inline _csvstate* get_csv_state(PyObject *module) { @@ -32,16 +37,24 @@ get_csv_state(PyObject *module) static int _csv_clear(PyObject *m) { - Py_CLEAR(get_csv_state(m)->error_obj); - Py_CLEAR(get_csv_state(m)->dialects); + _csvstate *m_state = PyModule_GetState(m); + Py_CLEAR(m_state->error_obj); + Py_CLEAR(m_state->dialects); + Py_CLEAR(m_state->dialect_type); + Py_CLEAR(m_state->reader_type); + Py_CLEAR(m_state->writer_type); return 0; } static int _csv_traverse(PyObject *m, visitproc visit, void *arg) { - Py_VISIT(get_csv_state(m)->error_obj); - Py_VISIT(get_csv_state(m)->dialects); + _csvstate *m_state = PyModule_GetState(m); + Py_VISIT(m_state->error_obj); + Py_VISIT(m_state->dialects); + Py_VISIT(m_state->dialect_type); + Py_VISIT(m_state->reader_type); + Py_VISIT(m_state->writer_type); return 0; } @@ -51,10 +64,6 @@ _csv_free(void *m) _csv_clear((PyObject *)m); } -static struct PyModuleDef _csvmodule; - -#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule))) - typedef enum { START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, @@ -92,8 +101,6 @@ typedef struct { } DialectObj; -static PyTypeObject Dialect_Type; - typedef struct { PyObject_HEAD @@ -110,8 +117,6 @@ typedef struct { unsigned long line_num; /* Source-file line number */ } ReaderObj; -static PyTypeObject Reader_Type; - typedef struct { PyObject_HEAD @@ -123,26 +128,27 @@ typedef struct { Py_ssize_t rec_size; /* size of allocated record */ Py_ssize_t rec_len; /* length of record */ int num_fields; /* number of fields in record */ -} WriterObj; -static PyTypeObject Writer_Type; + PyObject *error_obj; /* cached error object */ +} WriterObj; /* * DIALECT class */ static PyObject * -get_dialect_from_registry(PyObject * name_obj) +get_dialect_from_registry(PyObject *name_obj, _csvstate *m_state) { PyObject *dialect_obj; - dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj); + dialect_obj = PyDict_GetItemWithError(m_state->dialects, name_obj); if (dialect_obj == NULL) { if (!PyErr_Occurred()) - PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); + PyErr_Format(m_state->error_obj, "unknown dialect"); } else Py_INCREF(dialect_obj); + return dialect_obj; } @@ -308,9 +314,17 @@ static PyGetSetDef Dialect_getsetlist[] = { static void Dialect_dealloc(DialectObj *self) +{ + PyTypeObject *tp = Py_TYPE(self); + Py_XDECREF(self->lineterminator); + tp->tp_free((PyObject *)self); + Py_DECREF(tp); +} + +static void +Dialect_finalize(DialectObj *self) { Py_XDECREF(self->lineterminator); - Py_TYPE(self)->tp_free((PyObject *)self); } static char *dialect_kws[] = { @@ -354,16 +368,27 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) &strict)) return NULL; + PyObject *m = _PyType_GetModuleByDef(type, &_csvmodule); + if (m == NULL) { + return NULL; + } + _csvstate *m_state = PyModule_GetState(m); + if (m_state == NULL) { + return PyErr_Format(PyExc_SystemError, + "dialect_new: No _csv module state found"); + } + + if (dialect != NULL) { if (PyUnicode_Check(dialect)) { - dialect = get_dialect_from_registry(dialect); + dialect = get_dialect_from_registry(dialect, m_state); if (dialect == NULL) return NULL; } else Py_INCREF(dialect); /* Can we reuse this instance? */ - if (PyObject_TypeCheck(dialect, &Dialect_Type) && + if (PyObject_TypeCheck(dialect, m_state->dialect_type) && delimiter == NULL && doublequote == NULL && escapechar == NULL && @@ -454,63 +479,62 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return ret; } +/* Since dialect is now a heap type, it inherits pickling method for + * protocol 0 and 1 from object, therefore it needs to be overriden */ + +PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling"); + +static PyObject * +Dialect_reduce(PyObject *self, PyObject *args) { + PyErr_Format(PyExc_TypeError, + "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self))); + return NULL; +} + +static struct PyMethodDef dialect_methods[] = { + {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, + {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc}, + {NULL, NULL} +}; PyDoc_STRVAR(Dialect_Type_doc, "CSV dialect\n" "\n" "The Dialect type records CSV parsing and generation options.\n"); -static PyTypeObject Dialect_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.Dialect", /* tp_name */ - sizeof(DialectObj), /* tp_basicsize */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)Dialect_dealloc, /* tp_dealloc */ - 0, /* tp_vectorcall_offset */ - (getattrfunc)0, /* tp_getattr */ - (setattrfunc)0, /* tp_setattr */ - 0, /* tp_as_async */ - (reprfunc)0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - (hashfunc)0, /* tp_hash */ - (ternaryfunc)0, /* tp_call */ - (reprfunc)0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ +static PyType_Slot Dialect_Type_slots[] = { + {Py_tp_doc, (char*)Dialect_Type_doc}, + {Py_tp_members, Dialect_memberlist}, + {Py_tp_getset, Dialect_getsetlist}, + {Py_tp_new, dialect_new}, + {Py_tp_methods, dialect_methods}, + {Py_tp_finalize, Dialect_finalize}, + {Py_tp_dealloc, Dialect_dealloc}, + {0, NULL} +}; + +PyType_Spec Dialect_Type_spec = { + "_csv.Dialect", + sizeof(DialectObj), + 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - Dialect_Type_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - Dialect_memberlist, /* tp_members */ - Dialect_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - dialect_new, /* tp_new */ - 0, /* tp_free */ + Dialect_Type_slots, }; + /* * Return an instance of the dialect type, given a Python instance or kwarg * description of the dialect */ static PyObject * -_call_dialect(PyObject *dialect_inst, PyObject *kwargs) +_call_dialect(PyObject *m, PyObject *dialect_inst, PyObject *kwargs) { - PyObject *type = (PyObject *)&Dialect_Type; + _csvstate *m_state = PyModule_GetState(m); + if (m_state == NULL) { + return NULL; + } + + PyObject *type = (PyObject *)m_state->dialect_type; if (dialect_inst) { return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); } @@ -568,11 +592,11 @@ parse_grow_buff(ReaderObj *self) } static int -parse_add_char(ReaderObj *self, Py_UCS4 c) +parse_add_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) { - if (self->field_len >= _csvstate_global->field_limit) { - PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)", - _csvstate_global->field_limit); + if (self->field_len >= m_state->field_limit) { + PyErr_Format(m_state->error_obj, "field larger than field limit (%ld)", + m_state->field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -582,7 +606,7 @@ parse_add_char(ReaderObj *self, Py_UCS4 c) } static int -parse_process_char(ReaderObj *self, Py_UCS4 c) +parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) { DialectObj *dialect = self->dialect; @@ -628,7 +652,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) /* begin new unquoted field */ if (dialect->quoting == QUOTE_NONNUMERIC) self->numeric_field = 1; - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = IN_FIELD; } @@ -636,14 +660,14 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) case ESCAPED_CHAR: if (c == '\n' || c=='\r') { - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = AFTER_ESCAPED_CRNL; break; } if (c == '\0') c = '\n'; - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = IN_FIELD; break; @@ -673,7 +697,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else { /* normal character - save in field */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; } break; @@ -699,7 +723,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) } else { /* normal character - save in field */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; } break; @@ -707,7 +731,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) case ESCAPE_IN_QUOTED_FIELD: if (c == '\0') c = '\n'; - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = IN_QUOTED_FIELD; break; @@ -717,7 +741,7 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) if (dialect->quoting != QUOTE_NONE && c == dialect->quotechar) { /* save "" as " */ - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = IN_QUOTED_FIELD; } @@ -734,13 +758,13 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (!dialect->strict) { - if (parse_add_char(self, c) < 0) + if (parse_add_char(self, m_state, c) < 0) return -1; self->state = IN_FIELD; } else { /* illegal */ - PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'", + PyErr_Format(m_state->error_obj, "'%c' expected after '%c'", dialect->delimiter, dialect->quotechar); return -1; @@ -753,7 +777,8 @@ parse_process_char(ReaderObj *self, Py_UCS4 c) else if (c == '\0') self->state = START_RECORD; else { - PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); + PyErr_Format(m_state->error_obj, + "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; } break; @@ -784,6 +809,16 @@ Reader_iternext(ReaderObj *self) const void *data; PyObject *lineobj; + PyObject *m = _PyType_GetModuleByDef(Py_TYPE(self), &_csvmodule); + if (m == NULL) { + return NULL; + } + _csvstate *m_state = PyModule_GetState(m); + if (m_state == NULL) { + return PyErr_Format(PyExc_SystemError, + "dialect_new: No _csv module state found"); + } + if (parse_reset(self) < 0) return NULL; do { @@ -793,7 +828,7 @@ Reader_iternext(ReaderObj *self) if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { if (self->dialect->strict) - PyErr_SetString(_csvstate_global->error_obj, + PyErr_SetString(m_state->error_obj, "unexpected end of data"); else if (parse_save_field(self) >= 0) break; @@ -801,7 +836,7 @@ Reader_iternext(ReaderObj *self) return NULL; } if (!PyUnicode_Check(lineobj)) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(m_state->error_obj, "iterator should return strings, " "not %.200s " "(the file should be opened in text mode)", @@ -823,18 +858,18 @@ Reader_iternext(ReaderObj *self) c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(m_state->error_obj, "line contains NUL"); goto err; } - if (parse_process_char(self, c) < 0) { + if (parse_process_char(self, m_state, c) < 0) { Py_DECREF(lineobj); goto err; } pos++; } Py_DECREF(lineobj); - if (parse_process_char(self, 0) < 0) + if (parse_process_char(self, m_state, 0) < 0) goto err; } while (self->state != START_RECORD); @@ -847,6 +882,7 @@ Reader_iternext(ReaderObj *self) static void Reader_dealloc(ReaderObj *self) { + PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); Py_XDECREF(self->dialect); Py_XDECREF(self->input_iter); @@ -854,6 +890,18 @@ Reader_dealloc(ReaderObj *self) if (self->field != NULL) PyMem_Free(self->field); PyObject_GC_Del(self); + Py_DECREF(tp); +} + +static void +Reader_finalize(ReaderObj *self) +{ + Py_XDECREF(self->dialect); + Py_XDECREF(self->input_iter); + Py_XDECREF(self->fields); + if (self->field != NULL) { + PyMem_Free(self->field); + } } static int @@ -893,47 +941,36 @@ static struct PyMemberDef Reader_memberlist[] = { }; -static PyTypeObject Reader_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.reader", /*tp_name*/ - sizeof(ReaderObj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)Reader_dealloc, /*tp_dealloc*/ - 0, /*tp_vectorcall_offset*/ - (getattrfunc)0, /*tp_getattr*/ - (setattrfunc)0, /*tp_setattr*/ - 0, /*tp_as_async*/ - (reprfunc)0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - (hashfunc)0, /*tp_hash*/ - (ternaryfunc)0, /*tp_call*/ - (reprfunc)0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - Reader_Type_doc, /*tp_doc*/ - (traverseproc)Reader_traverse, /*tp_traverse*/ - (inquiry)Reader_clear, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - PyObject_SelfIter, /*tp_iter*/ - (getiterfunc)Reader_iternext, /*tp_iternext*/ - Reader_methods, /*tp_methods*/ - Reader_memberlist, /*tp_members*/ - 0, /*tp_getset*/ +static PyType_Slot Reader_Type_slots[] = { + {Py_tp_doc, (char*)Reader_Type_doc}, + {Py_tp_traverse, Reader_traverse}, + {Py_tp_clear, Reader_clear}, + {Py_tp_iter, PyObject_SelfIter}, + {Py_tp_iternext, Reader_iternext}, + {Py_tp_methods, Reader_methods}, + {Py_tp_members, Reader_memberlist}, + {Py_tp_finalize, Reader_finalize}, + {Py_tp_dealloc, Reader_dealloc}, + {0, NULL} +}; +PyType_Spec Reader_Type_spec = { + "_csv.reader", + sizeof(ReaderObj), + 0, + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | + Py_TPFLAGS_HAVE_GC, + Reader_Type_slots }; + static PyObject * csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * iterator, * dialect = NULL; - ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); + ReaderObj * self = PyObject_GC_New( + ReaderObj, + get_csv_state(module)->reader_type); if (!self) return NULL; @@ -959,7 +996,7 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); + self->dialect = (DialectObj *)_call_dialect(module, dialect, keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -1048,7 +1085,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat } if (want_escape) { if (!dialect->escapechar) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(self->error_obj, "need to escape, but no escapechar set"); return -1; } @@ -1166,7 +1203,7 @@ csv_writerow(WriterObj *self, PyObject *seq) iter = PyObject_GetIter(seq); if (iter == NULL) { if (PyErr_ExceptionMatches(PyExc_TypeError)) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(self->error_obj, "iterable expected, not %.200s", Py_TYPE(seq)->tp_name); } @@ -1223,7 +1260,7 @@ csv_writerow(WriterObj *self, PyObject *seq) if (self->num_fields > 0 && self->rec_len == 0) { if (dialect->quoting == QUOTE_NONE) { - PyErr_Format(_csvstate_global->error_obj, + PyErr_Format(self->error_obj, "single empty field record must be quoted"); return NULL; } @@ -1292,22 +1329,12 @@ static struct PyMemberDef Writer_memberlist[] = { { NULL } }; -static void -Writer_dealloc(WriterObj *self) -{ - PyObject_GC_UnTrack(self); - Py_XDECREF(self->dialect); - Py_XDECREF(self->write); - if (self->rec != NULL) - PyMem_Free(self->rec); - PyObject_GC_Del(self); -} - static int Writer_traverse(WriterObj *self, visitproc visit, void *arg) { Py_VISIT(self->dialect); Py_VISIT(self->write); + Py_VISIT(self->error_obj); return 0; } @@ -1316,9 +1343,19 @@ Writer_clear(WriterObj *self) { Py_CLEAR(self->dialect); Py_CLEAR(self->write); + Py_CLEAR(self->error_obj); return 0; } +static void +Writer_finalize(WriterObj *self) +{ + Writer_clear(self); + if (self->rec != NULL) { + PyMem_Free(self->rec); + } +} + PyDoc_STRVAR(Writer_Type_doc, "CSV writer\n" "\n" @@ -1326,46 +1363,33 @@ PyDoc_STRVAR(Writer_Type_doc, "in CSV format from sequence input.\n" ); -static PyTypeObject Writer_Type = { - PyVarObject_HEAD_INIT(NULL, 0) - "_csv.writer", /*tp_name*/ - sizeof(WriterObj), /*tp_basicsize*/ - 0, /*tp_itemsize*/ - /* methods */ - (destructor)Writer_dealloc, /*tp_dealloc*/ - 0, /*tp_vectorcall_offset*/ - (getattrfunc)0, /*tp_getattr*/ - (setattrfunc)0, /*tp_setattr*/ - 0, /*tp_as_async*/ - (reprfunc)0, /*tp_repr*/ - 0, /*tp_as_number*/ - 0, /*tp_as_sequence*/ - 0, /*tp_as_mapping*/ - (hashfunc)0, /*tp_hash*/ - (ternaryfunc)0, /*tp_call*/ - (reprfunc)0, /*tp_str*/ - 0, /*tp_getattro*/ - 0, /*tp_setattro*/ - 0, /*tp_as_buffer*/ +static PyType_Slot Writer_Type_slots[] = { + {Py_tp_finalize, Writer_finalize}, + {Py_tp_doc, (char*)Writer_Type_doc}, + {Py_tp_traverse, Writer_traverse}, + {Py_tp_clear, Writer_clear}, + {Py_tp_methods, Writer_methods}, + {Py_tp_members, Writer_memberlist}, + {0, NULL} +}; + +PyType_Spec Writer_Type_spec = { + "_csv.writer", + sizeof(WriterObj), + 0, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /*tp_flags*/ - Writer_Type_doc, - (traverseproc)Writer_traverse, /*tp_traverse*/ - (inquiry)Writer_clear, /*tp_clear*/ - 0, /*tp_richcompare*/ - 0, /*tp_weaklistoffset*/ - (getiterfunc)0, /*tp_iter*/ - (getiterfunc)0, /*tp_iternext*/ - Writer_methods, /*tp_methods*/ - Writer_memberlist, /*tp_members*/ - 0, /*tp_getset*/ + Py_TPFLAGS_HAVE_GC, + Writer_Type_slots, }; + static PyObject * csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * output_file, * dialect = NULL; - WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); + WriterObj * self = PyObject_GC_New( + WriterObj, + get_csv_state(module)->writer_type); _Py_IDENTIFIER(write); if (!self) @@ -1379,6 +1403,9 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) self->rec_len = 0; self->num_fields = 0; + self->error_obj = get_csv_state(module)->error_obj; + Py_INCREF(self->error_obj); + if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { Py_DECREF(self); return NULL; @@ -1393,7 +1420,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); + self->dialect = (DialectObj *)_call_dialect(module, dialect, keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -1408,7 +1435,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) static PyObject * csv_list_dialects(PyObject *module, PyObject *args) { - return PyDict_Keys(_csvstate_global->dialects); + return PyDict_Keys(get_csv_state(module)->dialects); } static PyObject * @@ -1426,10 +1453,10 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) } if (PyUnicode_READY(name_obj) == -1) return NULL; - dialect = _call_dialect(dialect_obj, kwargs); + dialect = _call_dialect(module, dialect_obj, kwargs); if (dialect == NULL) return NULL; - if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) { + if (PyDict_SetItem(get_csv_state(module)->dialects, name_obj, dialect) < 0) { Py_DECREF(dialect); return NULL; } @@ -1440,9 +1467,9 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) static PyObject * csv_unregister_dialect(PyObject *module, PyObject *name_obj) { - if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) { + if (PyDict_DelItem(get_csv_state(module)->dialects, name_obj) < 0) { if (PyErr_ExceptionMatches(PyExc_KeyError)) { - PyErr_Format(_csvstate_global->error_obj, "unknown dialect"); + PyErr_Format(get_csv_state(module)->error_obj, "unknown dialect"); } return NULL; } @@ -1452,14 +1479,14 @@ csv_unregister_dialect(PyObject *module, PyObject *name_obj) static PyObject * csv_get_dialect(PyObject *module, PyObject *name_obj) { - return get_dialect_from_registry(name_obj); + return get_dialect_from_registry(name_obj, get_csv_state(module)); } static PyObject * csv_field_size_limit(PyObject *module, PyObject *args) { PyObject *new_limit = NULL; - long old_limit = _csvstate_global->field_limit; + long old_limit = get_csv_state(module)->field_limit; if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) return NULL; @@ -1469,15 +1496,25 @@ csv_field_size_limit(PyObject *module, PyObject *args) "limit must be an integer"); return NULL; } - _csvstate_global->field_limit = PyLong_AsLong(new_limit); - if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) { - _csvstate_global->field_limit = old_limit; + get_csv_state(module)->field_limit = PyLong_AsLong(new_limit); + if (get_csv_state(module)->field_limit == -1 && PyErr_Occurred()) { + get_csv_state(module)->field_limit = old_limit; return NULL; } } return PyLong_FromLong(old_limit); } +static PyType_Slot error_slots[] = { + {0, NULL}, +}; + +PyType_Spec error_spec = { + .name = "_csv.Error", + .flags = Py_TPFLAGS_DEFAULT, + .slots = error_slots, +}; + /* * MODULE */ @@ -1610,68 +1647,105 @@ static struct PyMethodDef csv_methods[] = { { NULL, NULL } }; -static struct PyModuleDef _csvmodule = { - PyModuleDef_HEAD_INIT, - "_csv", - csv_module_doc, - sizeof(_csvstate), - csv_methods, - NULL, - _csv_traverse, - _csv_clear, - _csv_free -}; - -PyMODINIT_FUNC -PyInit__csv(void) -{ - PyObject *module; +static int +csv_exec(PyObject *module) { const StyleDesc *style; + PyObject *temp; + _csvstate *m_state = get_csv_state(module); - if (PyType_Ready(&Reader_Type) < 0) - return NULL; + temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); + if (temp == NULL) { + return -1; + } + if (PyModule_AddObject(module, "Dialect", temp)) { + return -1; + } + Py_INCREF(temp); + m_state->dialect_type = (PyTypeObject *)temp; - if (PyType_Ready(&Writer_Type) < 0) - return NULL; + temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); + if (temp == NULL) { + return -1; + } + if (PyModule_AddObject(module, "Reader", temp)) { + return -1; + } + Py_INCREF(temp); + m_state->reader_type = (PyTypeObject *)temp; - /* Create the module and add the functions */ - module = PyModule_Create(&_csvmodule); - if (module == NULL) - return NULL; + temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); + if (temp == NULL) { + return -1; + } + if (PyModule_AddObject(module, "Writer", temp)) { + return -1; + } + Py_INCREF(temp); + m_state->writer_type = (PyTypeObject *)temp; /* Add version to the module. */ if (PyModule_AddStringConstant(module, "__version__", - MODULE_VERSION) == -1) - return NULL; + MODULE_VERSION) == -1) { + return -1; + } /* Set the field limit */ - get_csv_state(module)->field_limit = 128 * 1024; - /* Do I still need to add this var to the Module Dict? */ + m_state->field_limit = 128 * 1024; /* Add _dialects dictionary */ - get_csv_state(module)->dialects = PyDict_New(); - if (get_csv_state(module)->dialects == NULL) - return NULL; - Py_INCREF(get_csv_state(module)->dialects); - if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects)) - return NULL; + m_state->dialects = PyDict_New(); + if (m_state->dialects == NULL) { + return -1; + } + Py_INCREF(m_state->dialects); + if (PyModule_AddObject(module, "_dialects", m_state->dialects)) { + return -1; + } /* Add quote styles into dictionary */ for (style = quote_styles; style->name; style++) { if (PyModule_AddIntConstant(module, style->name, style->style) == -1) - return NULL; + return -1; } - if (PyModule_AddType(module, &Dialect_Type)) { - return NULL; + /* Add the CSV exception object to the module. */ + PyObject *bases = PyTuple_New(1); + if (bases == NULL) { + return -1; + } + PyTuple_SET_ITEM(bases, 0, PyExc_Exception); + m_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, bases); + Py_DECREF(bases); + if (m_state->error_obj == NULL) { + return -1; + } + if (PyModule_AddType(module, (PyTypeObject *)m_state->error_obj) != 0) { + return -1; } - /* Add the CSV exception object to the module. */ - get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL); - if (get_csv_state(module)->error_obj == NULL) - return NULL; - Py_INCREF(get_csv_state(module)->error_obj); - PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj); - return module; + return 0; +} + +static PyModuleDef_Slot csv_slots[] = { + {Py_mod_exec, csv_exec}, + {0, NULL} +}; + +static struct PyModuleDef _csvmodule = { + PyModuleDef_HEAD_INIT, + "_csv", + csv_module_doc, + sizeof(_csvstate), + csv_methods, + csv_slots, + _csv_traverse, + _csv_clear, + _csv_free +}; + +PyMODINIT_FUNC +PyInit__csv(void) +{ + return PyModuleDef_Init(&_csvmodule); } From 09802b2eac1813af6b043e128e40724445809bbc Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 10 Nov 2020 22:03:18 +0100 Subject: [PATCH 02/12] Use PyModule_AddObjectRef and PyTuple_Pack --- Modules/_csv.c | 33 ++++++++------------------------- 1 file changed, 8 insertions(+), 25 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index e1a6a22e1269f1..e002c08cff76ee 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1654,34 +1654,22 @@ csv_exec(PyObject *module) { _csvstate *m_state = get_csv_state(module); temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); - if (temp == NULL) { - return -1; - } - if (PyModule_AddObject(module, "Dialect", temp)) { + m_state->dialect_type = (PyTypeObject *)temp; + if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) { return -1; } - Py_INCREF(temp); - m_state->dialect_type = (PyTypeObject *)temp; temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); - if (temp == NULL) { - return -1; - } - if (PyModule_AddObject(module, "Reader", temp)) { + m_state->reader_type = (PyTypeObject *)temp; + if (PyModule_AddObjectRef(module, "Reader", temp) < 0) { return -1; } - Py_INCREF(temp); - m_state->reader_type = (PyTypeObject *)temp; temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); - if (temp == NULL) { - return -1; - } - if (PyModule_AddObject(module, "Writer", temp)) { + m_state->writer_type = (PyTypeObject *)temp; + if (PyModule_AddObjectRef(module, "Writer", temp) < 0) { return -1; } - Py_INCREF(temp); - m_state->writer_type = (PyTypeObject *)temp; /* Add version to the module. */ if (PyModule_AddStringConstant(module, "__version__", @@ -1694,11 +1682,7 @@ csv_exec(PyObject *module) { /* Add _dialects dictionary */ m_state->dialects = PyDict_New(); - if (m_state->dialects == NULL) { - return -1; - } - Py_INCREF(m_state->dialects); - if (PyModule_AddObject(module, "_dialects", m_state->dialects)) { + if (PyModule_AddObjectRef(module, "_dialects", m_state->dialects) < 0) { return -1; } @@ -1710,11 +1694,10 @@ csv_exec(PyObject *module) { } /* Add the CSV exception object to the module. */ - PyObject *bases = PyTuple_New(1); + PyObject *bases = PyTuple_Pack(1, PyExc_Exception); if (bases == NULL) { return -1; } - PyTuple_SET_ITEM(bases, 0, PyExc_Exception); m_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, bases); Py_DECREF(bases); if (m_state->error_obj == NULL) { From 1665ac7f067d4a0d05cd57a6e0a5754adf4d255a Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:36:07 +0100 Subject: [PATCH 03/12] Rename m_state to module_state --- Modules/_csv.c | 110 +++++++++++++++++++++++++------------------------ 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index e002c08cff76ee..303866a7658c95 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -37,24 +37,24 @@ get_csv_state(PyObject *module) static int _csv_clear(PyObject *m) { - _csvstate *m_state = PyModule_GetState(m); - Py_CLEAR(m_state->error_obj); - Py_CLEAR(m_state->dialects); - Py_CLEAR(m_state->dialect_type); - Py_CLEAR(m_state->reader_type); - Py_CLEAR(m_state->writer_type); + _csvstate *module_state = PyModule_GetState(m); + Py_CLEAR(module_state->error_obj); + Py_CLEAR(module_state->dialects); + Py_CLEAR(module_state->dialect_type); + Py_CLEAR(module_state->reader_type); + Py_CLEAR(module_state->writer_type); return 0; } static int _csv_traverse(PyObject *m, visitproc visit, void *arg) { - _csvstate *m_state = PyModule_GetState(m); - Py_VISIT(m_state->error_obj); - Py_VISIT(m_state->dialects); - Py_VISIT(m_state->dialect_type); - Py_VISIT(m_state->reader_type); - Py_VISIT(m_state->writer_type); + _csvstate *module_state = PyModule_GetState(m); + Py_VISIT(module_state->error_obj); + Py_VISIT(module_state->dialects); + Py_VISIT(module_state->dialect_type); + Py_VISIT(module_state->reader_type); + Py_VISIT(module_state->writer_type); return 0; } @@ -137,14 +137,14 @@ typedef struct { */ static PyObject * -get_dialect_from_registry(PyObject *name_obj, _csvstate *m_state) +get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state) { PyObject *dialect_obj; - dialect_obj = PyDict_GetItemWithError(m_state->dialects, name_obj); + dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj); if (dialect_obj == NULL) { if (!PyErr_Occurred()) - PyErr_Format(m_state->error_obj, "unknown dialect"); + PyErr_Format(module_state->error_obj, "unknown dialect"); } else Py_INCREF(dialect_obj); @@ -372,8 +372,8 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (m == NULL) { return NULL; } - _csvstate *m_state = PyModule_GetState(m); - if (m_state == NULL) { + _csvstate *module_state = PyModule_GetState(m); + if (module_state == NULL) { return PyErr_Format(PyExc_SystemError, "dialect_new: No _csv module state found"); } @@ -381,14 +381,14 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) if (dialect != NULL) { if (PyUnicode_Check(dialect)) { - dialect = get_dialect_from_registry(dialect, m_state); + dialect = get_dialect_from_registry(dialect, module_state); if (dialect == NULL) return NULL; } else Py_INCREF(dialect); /* Can we reuse this instance? */ - if (PyObject_TypeCheck(dialect, m_state->dialect_type) && + if (PyObject_TypeCheck(dialect, module_state->dialect_type) && delimiter == NULL && doublequote == NULL && escapechar == NULL && @@ -529,12 +529,12 @@ PyType_Spec Dialect_Type_spec = { static PyObject * _call_dialect(PyObject *m, PyObject *dialect_inst, PyObject *kwargs) { - _csvstate *m_state = PyModule_GetState(m); - if (m_state == NULL) { + _csvstate *module_state = PyModule_GetState(m); + if (module_state == NULL) { return NULL; } - PyObject *type = (PyObject *)m_state->dialect_type; + PyObject *type = (PyObject *)module_state->dialect_type; if (dialect_inst) { return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); } @@ -592,11 +592,12 @@ parse_grow_buff(ReaderObj *self) } static int -parse_add_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) +parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) { - if (self->field_len >= m_state->field_limit) { - PyErr_Format(m_state->error_obj, "field larger than field limit (%ld)", - m_state->field_limit); + if (self->field_len >= module_state->field_limit) { + PyErr_Format(module_state->error_obj, + "field larger than field limit (%ld)", + module_state->field_limit); return -1; } if (self->field_len == self->field_size && !parse_grow_buff(self)) @@ -606,7 +607,7 @@ parse_add_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) } static int -parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) +parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c) { DialectObj *dialect = self->dialect; @@ -652,7 +653,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) /* begin new unquoted field */ if (dialect->quoting == QUOTE_NONNUMERIC) self->numeric_field = 1; - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_FIELD; } @@ -660,14 +661,14 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) case ESCAPED_CHAR: if (c == '\n' || c=='\r') { - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = AFTER_ESCAPED_CRNL; break; } if (c == '\0') c = '\n'; - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_FIELD; break; @@ -697,7 +698,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) } else { /* normal character - save in field */ - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; } break; @@ -723,7 +724,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) } else { /* normal character - save in field */ - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; } break; @@ -731,7 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) case ESCAPE_IN_QUOTED_FIELD: if (c == '\0') c = '\n'; - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_QUOTED_FIELD; break; @@ -741,7 +742,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) if (dialect->quoting != QUOTE_NONE && c == dialect->quotechar) { /* save "" as " */ - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_QUOTED_FIELD; } @@ -758,13 +759,13 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) self->state = (c == '\0' ? START_RECORD : EAT_CRNL); } else if (!dialect->strict) { - if (parse_add_char(self, m_state, c) < 0) + if (parse_add_char(self, module_state, c) < 0) return -1; self->state = IN_FIELD; } else { /* illegal */ - PyErr_Format(m_state->error_obj, "'%c' expected after '%c'", + PyErr_Format(module_state->error_obj, "'%c' expected after '%c'", dialect->delimiter, dialect->quotechar); return -1; @@ -777,7 +778,7 @@ parse_process_char(ReaderObj *self, _csvstate *m_state, Py_UCS4 c) else if (c == '\0') self->state = START_RECORD; else { - PyErr_Format(m_state->error_obj, + PyErr_Format(module_state->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); return -1; } @@ -813,8 +814,8 @@ Reader_iternext(ReaderObj *self) if (m == NULL) { return NULL; } - _csvstate *m_state = PyModule_GetState(m); - if (m_state == NULL) { + _csvstate *module_state = PyModule_GetState(m); + if (module_state == NULL) { return PyErr_Format(PyExc_SystemError, "dialect_new: No _csv module state found"); } @@ -828,7 +829,7 @@ Reader_iternext(ReaderObj *self) if (!PyErr_Occurred() && (self->field_len != 0 || self->state == IN_QUOTED_FIELD)) { if (self->dialect->strict) - PyErr_SetString(m_state->error_obj, + PyErr_SetString(module_state->error_obj, "unexpected end of data"); else if (parse_save_field(self) >= 0) break; @@ -836,7 +837,7 @@ Reader_iternext(ReaderObj *self) return NULL; } if (!PyUnicode_Check(lineobj)) { - PyErr_Format(m_state->error_obj, + PyErr_Format(module_state->error_obj, "iterator should return strings, " "not %.200s " "(the file should be opened in text mode)", @@ -858,18 +859,18 @@ Reader_iternext(ReaderObj *self) c = PyUnicode_READ(kind, data, pos); if (c == '\0') { Py_DECREF(lineobj); - PyErr_Format(m_state->error_obj, + PyErr_Format(module_state->error_obj, "line contains NUL"); goto err; } - if (parse_process_char(self, m_state, c) < 0) { + if (parse_process_char(self, module_state, c) < 0) { Py_DECREF(lineobj); goto err; } pos++; } Py_DECREF(lineobj); - if (parse_process_char(self, m_state, 0) < 0) + if (parse_process_char(self, module_state, 0) < 0) goto err; } while (self->state != START_RECORD); @@ -1651,22 +1652,22 @@ static int csv_exec(PyObject *module) { const StyleDesc *style; PyObject *temp; - _csvstate *m_state = get_csv_state(module); + _csvstate *module_state = get_csv_state(module); temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL); - m_state->dialect_type = (PyTypeObject *)temp; + module_state->dialect_type = (PyTypeObject *)temp; if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) { return -1; } temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL); - m_state->reader_type = (PyTypeObject *)temp; + module_state->reader_type = (PyTypeObject *)temp; if (PyModule_AddObjectRef(module, "Reader", temp) < 0) { return -1; } temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL); - m_state->writer_type = (PyTypeObject *)temp; + module_state->writer_type = (PyTypeObject *)temp; if (PyModule_AddObjectRef(module, "Writer", temp) < 0) { return -1; } @@ -1678,11 +1679,11 @@ csv_exec(PyObject *module) { } /* Set the field limit */ - m_state->field_limit = 128 * 1024; + module_state->field_limit = 128 * 1024; /* Add _dialects dictionary */ - m_state->dialects = PyDict_New(); - if (PyModule_AddObjectRef(module, "_dialects", m_state->dialects) < 0) { + module_state->dialects = PyDict_New(); + if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) { return -1; } @@ -1698,12 +1699,13 @@ csv_exec(PyObject *module) { if (bases == NULL) { return -1; } - m_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, bases); + module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec, + bases); Py_DECREF(bases); - if (m_state->error_obj == NULL) { + if (module_state->error_obj == NULL) { return -1; } - if (PyModule_AddType(module, (PyTypeObject *)m_state->error_obj) != 0) { + if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) { return -1; } From 23f844ad71e4b3084b6d3510c220a740b52044ea Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:37:44 +0100 Subject: [PATCH 04/12] Use Py_CLEAR rather than Py_XDECREF --- Modules/_csv.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 303866a7658c95..6fb5b592d31075 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -316,7 +316,7 @@ static void Dialect_dealloc(DialectObj *self) { PyTypeObject *tp = Py_TYPE(self); - Py_XDECREF(self->lineterminator); + Py_CLEAR(self->lineterminator); tp->tp_free((PyObject *)self); Py_DECREF(tp); } @@ -324,7 +324,7 @@ Dialect_dealloc(DialectObj *self) static void Dialect_finalize(DialectObj *self) { - Py_XDECREF(self->lineterminator); + Py_CLEAR(self->lineterminator); } static char *dialect_kws[] = { @@ -402,7 +402,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) self = (DialectObj *)type->tp_alloc(type, 0); if (self == NULL) { - Py_XDECREF(dialect); + Py_CLEAR(dialect); return NULL; } self->lineterminator = NULL; @@ -466,16 +466,16 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) ret = (PyObject *)self; Py_INCREF(self); err: - Py_XDECREF(self); - Py_XDECREF(dialect); - Py_XDECREF(delimiter); - Py_XDECREF(doublequote); - Py_XDECREF(escapechar); - Py_XDECREF(lineterminator); - Py_XDECREF(quotechar); - Py_XDECREF(quoting); - Py_XDECREF(skipinitialspace); - Py_XDECREF(strict); + Py_CLEAR(self); + Py_CLEAR(dialect); + Py_CLEAR(delimiter); + Py_CLEAR(doublequote); + Py_CLEAR(escapechar); + Py_CLEAR(lineterminator); + Py_CLEAR(quotechar); + Py_CLEAR(quoting); + Py_CLEAR(skipinitialspace); + Py_CLEAR(strict); return ret; } @@ -885,9 +885,9 @@ Reader_dealloc(ReaderObj *self) { PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - Py_XDECREF(self->dialect); - Py_XDECREF(self->input_iter); - Py_XDECREF(self->fields); + Py_CLEAR(self->dialect); + Py_CLEAR(self->input_iter); + Py_CLEAR(self->fields); if (self->field != NULL) PyMem_Free(self->field); PyObject_GC_Del(self); @@ -897,9 +897,9 @@ Reader_dealloc(ReaderObj *self) static void Reader_finalize(ReaderObj *self) { - Py_XDECREF(self->dialect); - Py_XDECREF(self->input_iter); - Py_XDECREF(self->fields); + Py_CLEAR(self->dialect); + Py_CLEAR(self->input_iter); + Py_CLEAR(self->fields); if (self->field != NULL) { PyMem_Free(self->field); } From 8d1f95d2748099b771daa823d409990047da2029 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:39:51 +0100 Subject: [PATCH 05/12] Use `module` rather than `m` --- Modules/_csv.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 6fb5b592d31075..5c2d92155daffa 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -35,9 +35,9 @@ get_csv_state(PyObject *module) } static int -_csv_clear(PyObject *m) +_csv_clear(PyObject *module) { - _csvstate *module_state = PyModule_GetState(m); + _csvstate *module_state = PyModule_GetState(module); Py_CLEAR(module_state->error_obj); Py_CLEAR(module_state->dialects); Py_CLEAR(module_state->dialect_type); @@ -47,9 +47,9 @@ _csv_clear(PyObject *m) } static int -_csv_traverse(PyObject *m, visitproc visit, void *arg) +_csv_traverse(PyObject *module, visitproc visit, void *arg) { - _csvstate *module_state = PyModule_GetState(m); + _csvstate *module_state = PyModule_GetState(module); Py_VISIT(module_state->error_obj); Py_VISIT(module_state->dialects); Py_VISIT(module_state->dialect_type); @@ -59,9 +59,9 @@ _csv_traverse(PyObject *m, visitproc visit, void *arg) } static void -_csv_free(void *m) +_csv_free(void *module) { - _csv_clear((PyObject *)m); + _csv_clear((PyObject *)module); } typedef enum { @@ -368,11 +368,11 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) &strict)) return NULL; - PyObject *m = _PyType_GetModuleByDef(type, &_csvmodule); - if (m == NULL) { + PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule); + if (module == NULL) { return NULL; } - _csvstate *module_state = PyModule_GetState(m); + _csvstate *module_state = PyModule_GetState(module); if (module_state == NULL) { return PyErr_Format(PyExc_SystemError, "dialect_new: No _csv module state found"); @@ -527,9 +527,9 @@ PyType_Spec Dialect_Type_spec = { * description of the dialect */ static PyObject * -_call_dialect(PyObject *m, PyObject *dialect_inst, PyObject *kwargs) +_call_dialect(PyObject *module, PyObject *dialect_inst, PyObject *kwargs) { - _csvstate *module_state = PyModule_GetState(m); + _csvstate *module_state = PyModule_GetState(module); if (module_state == NULL) { return NULL; } @@ -810,11 +810,11 @@ Reader_iternext(ReaderObj *self) const void *data; PyObject *lineobj; - PyObject *m = _PyType_GetModuleByDef(Py_TYPE(self), &_csvmodule); - if (m == NULL) { + PyObject *module = _PyType_GetModuleByDef(Py_TYPE(self), &_csvmodule); + if (module == NULL) { return NULL; } - _csvstate *module_state = PyModule_GetState(m); + _csvstate *module_state = PyModule_GetState(module); if (module_state == NULL) { return PyErr_Format(PyExc_SystemError, "dialect_new: No _csv module state found"); From 2c60895481efc07f69b770ace33ebf13e63a2505 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:44:21 +0100 Subject: [PATCH 06/12] Factor out calling _PyType_GetModuleByDef & PyModule_GetState --- Modules/_csv.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 5c2d92155daffa..bdd0117d04483b 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -340,6 +340,22 @@ static char *dialect_kws[] = { NULL }; +static _csvstate * +_csv_state_from_type(PyTypeObject *type, const char *name) +{ + PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule); + if (module == NULL) { + return NULL; + } + _csvstate *module_state = PyModule_GetState(module); + if (module_state == NULL) { + PyErr_Format(PyExc_SystemError, + "%s: No _csv module state found", name); + return NULL; + } + return module_state; +} + static PyObject * dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) { @@ -368,16 +384,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) &strict)) return NULL; - PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule); - if (module == NULL) { - return NULL; - } - _csvstate *module_state = PyModule_GetState(module); - if (module_state == NULL) { - return PyErr_Format(PyExc_SystemError, - "dialect_new: No _csv module state found"); - } - + _csvstate *module_state = _csv_state_from_type(type, "dialect_new"); if (dialect != NULL) { if (PyUnicode_Check(dialect)) { @@ -810,14 +817,10 @@ Reader_iternext(ReaderObj *self) const void *data; PyObject *lineobj; - PyObject *module = _PyType_GetModuleByDef(Py_TYPE(self), &_csvmodule); - if (module == NULL) { - return NULL; - } - _csvstate *module_state = PyModule_GetState(module); + _csvstate *module_state = _csv_state_from_type(Py_TYPE(self), + "Reader.__next__"); if (module_state == NULL) { - return PyErr_Format(PyExc_SystemError, - "dialect_new: No _csv module state found"); + return NULL; } if (parse_reset(self) < 0) From e69d0a1c4269258c529caf720f24b2ed356fbd43 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:45:17 +0100 Subject: [PATCH 07/12] Set freed pointers to NULL --- Modules/_csv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index bdd0117d04483b..ee3154a5cdc579 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -891,8 +891,10 @@ Reader_dealloc(ReaderObj *self) Py_CLEAR(self->dialect); Py_CLEAR(self->input_iter); Py_CLEAR(self->fields); - if (self->field != NULL) + if (self->field != NULL) { PyMem_Free(self->field); + self->field = NULL; + } PyObject_GC_Del(self); Py_DECREF(tp); } @@ -905,6 +907,7 @@ Reader_finalize(ReaderObj *self) Py_CLEAR(self->fields); if (self->field != NULL) { PyMem_Free(self->field); + self->field = NULL; } } From fb93e7fdb5dcd0f7a7b9c6087fc5110357aba66f Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:50:38 +0100 Subject: [PATCH 08/12] Only call get_csv_state up to once per function --- Modules/_csv.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index ee3154a5cdc579..27de0ea14bbe15 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -975,9 +975,10 @@ static PyObject * csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * iterator, * dialect = NULL; + _csvstate *module_state = get_csv_state(module); ReaderObj * self = PyObject_GC_New( ReaderObj, - get_csv_state(module)->reader_type); + module_state->reader_type); if (!self) return NULL; @@ -1394,9 +1395,8 @@ static PyObject * csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) { PyObject * output_file, * dialect = NULL; - WriterObj * self = PyObject_GC_New( - WriterObj, - get_csv_state(module)->writer_type); + _csvstate *module_state = get_csv_state(module); + WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type); _Py_IDENTIFIER(write); if (!self) @@ -1410,7 +1410,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) self->rec_len = 0; self->num_fields = 0; - self->error_obj = get_csv_state(module)->error_obj; + self->error_obj = module_state->error_obj; Py_INCREF(self->error_obj); if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { @@ -1474,9 +1474,10 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) static PyObject * csv_unregister_dialect(PyObject *module, PyObject *name_obj) { - if (PyDict_DelItem(get_csv_state(module)->dialects, name_obj) < 0) { + _csvstate *module_state = get_csv_state(module); + if (PyDict_DelItem(module_state->dialects, name_obj) < 0) { if (PyErr_ExceptionMatches(PyExc_KeyError)) { - PyErr_Format(get_csv_state(module)->error_obj, "unknown dialect"); + PyErr_Format(module_state->error_obj, "unknown dialect"); } return NULL; } @@ -1493,7 +1494,8 @@ static PyObject * csv_field_size_limit(PyObject *module, PyObject *args) { PyObject *new_limit = NULL; - long old_limit = get_csv_state(module)->field_limit; + _csvstate *module_state = get_csv_state(module); + long old_limit = module_state->field_limit; if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) return NULL; @@ -1503,9 +1505,9 @@ csv_field_size_limit(PyObject *module, PyObject *args) "limit must be an integer"); return NULL; } - get_csv_state(module)->field_limit = PyLong_AsLong(new_limit); - if (get_csv_state(module)->field_limit == -1 && PyErr_Occurred()) { - get_csv_state(module)->field_limit = old_limit; + module_state->field_limit = PyLong_AsLong(new_limit); + if (module_state->field_limit == -1 && PyErr_Occurred()) { + module_state->field_limit = old_limit; return NULL; } } From b62987bd34d4f21f930c0d8f3be72b9a19c87eb9 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 13 Nov 2020 20:52:17 +0100 Subject: [PATCH 09/12] Pass module_state directly to _call_dialect --- Modules/_csv.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 27de0ea14bbe15..6004c0ffdd1bfc 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -534,13 +534,8 @@ PyType_Spec Dialect_Type_spec = { * description of the dialect */ static PyObject * -_call_dialect(PyObject *module, PyObject *dialect_inst, PyObject *kwargs) +_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs) { - _csvstate *module_state = PyModule_GetState(module); - if (module_state == NULL) { - return NULL; - } - PyObject *type = (PyObject *)module_state->dialect_type; if (dialect_inst) { return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs); @@ -1004,7 +999,8 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(module, dialect, keyword_args); + self->dialect = (DialectObj *)_call_dialect(module_state, dialect, + keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -1427,7 +1423,8 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) Py_DECREF(self); return NULL; } - self->dialect = (DialectObj *)_call_dialect(module, dialect, keyword_args); + self->dialect = (DialectObj *)_call_dialect(module_state, dialect, + keyword_args); if (self->dialect == NULL) { Py_DECREF(self); return NULL; @@ -1449,6 +1446,7 @@ static PyObject * csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) { PyObject *name_obj, *dialect_obj = NULL; + _csvstate *module_state = get_csv_state(module); PyObject *dialect; if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) @@ -1460,10 +1458,10 @@ csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) } if (PyUnicode_READY(name_obj) == -1) return NULL; - dialect = _call_dialect(module, dialect_obj, kwargs); + dialect = _call_dialect(module_state, dialect_obj, kwargs); if (dialect == NULL) return NULL; - if (PyDict_SetItem(get_csv_state(module)->dialects, name_obj, dialect) < 0) { + if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) { Py_DECREF(dialect); return NULL; } From f12a7685e0e3adf700db2b6d829f791d046181ee Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 8 Dec 2020 14:58:57 +0100 Subject: [PATCH 10/12] Add NULL check for _csv_state_from_type --- Modules/_csv.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_csv.c b/Modules/_csv.c index 6004c0ffdd1bfc..ada628722f2d2d 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -385,6 +385,9 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) return NULL; _csvstate *module_state = _csv_state_from_type(type, "dialect_new"); + if (module_state == NULL) { + return NULL; + } if (dialect != NULL) { if (PyUnicode_Check(dialect)) { From f302029dfb40eda74f32b67580378bbf7cedb9f7 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 8 Dec 2020 15:00:57 +0100 Subject: [PATCH 11/12] Use Py_NewRef where appropriate --- Modules/_csv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index ada628722f2d2d..701d766f02d326 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -1409,8 +1409,7 @@ csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) self->rec_len = 0; self->num_fields = 0; - self->error_obj = module_state->error_obj; - Py_INCREF(self->error_obj); + self->error_obj = Py_NewRef(module_state->error_obj); if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { Py_DECREF(self); From b8c37bf4d44cf30eb4f4cd0fdf95765888d01677 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 8 Dec 2020 16:53:34 +0100 Subject: [PATCH 12/12] Use designated initializers for spec structs --- Modules/_csv.c | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/Modules/_csv.c b/Modules/_csv.c index 701d766f02d326..cade1ca9d47f06 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -524,11 +524,10 @@ static PyType_Slot Dialect_Type_slots[] = { }; PyType_Spec Dialect_Type_spec = { - "_csv.Dialect", - sizeof(DialectObj), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ - Dialect_Type_slots, + .name = "_csv.Dialect", + .basicsize = sizeof(DialectObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, + .slots = Dialect_Type_slots, }; @@ -960,12 +959,10 @@ static PyType_Slot Reader_Type_slots[] = { }; PyType_Spec Reader_Type_spec = { - "_csv.reader", - sizeof(ReaderObj), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, - Reader_Type_slots + .name = "_csv.reader", + .basicsize = sizeof(ReaderObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = Reader_Type_slots }; @@ -1381,12 +1378,10 @@ static PyType_Slot Writer_Type_slots[] = { }; PyType_Spec Writer_Type_spec = { - "_csv.writer", - sizeof(WriterObj), - 0, - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, - Writer_Type_slots, + .name = "_csv.writer", + .basicsize = sizeof(WriterObj), + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, + .slots = Writer_Type_slots, };