Skip to content

Commit

Permalink
Accelerate object construction using type vector calls
Browse files Browse the repository at this point in the history
  • Loading branch information
wjakob committed Aug 27, 2024
1 parent 62dabf9 commit 7225e07
Show file tree
Hide file tree
Showing 8 changed files with 143 additions and 17 deletions.
1 change: 1 addition & 0 deletions cmake/darwin-ld-cpython.sym
Original file line number Diff line number Diff line change
Expand Up @@ -899,3 +899,4 @@
-U __Py_SwappedOp
-U __Py_TrueStruct
-U __Py_VaBuildValue_SizeT
-U _Py_Version
16 changes: 15 additions & 1 deletion docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,26 @@ case, both modules must use the same nanobind ABI version, or they will be
isolated from each other. Releases that don't explicitly mention an ABI version
below inherit that of the preceding release.

Version 2.1.1 (TBA)
Version 2.2.0 (TBA)
-------------------

- nanobind has always used `PEP 590 vector calls
<https://www.python.org/dev/peps/pep-0590>`__ to efficiently dispatch calls
to function and method bindings, but it lacked the ability to do so for
constructors (e.g., ``MyType(arg1, arg2, ...)``).

Version 2.2.0 adds this missing part, which accelerates object construction
by up to a factor of 2×. The difference is especially pronounced when passing
keyword arguments to constructors. Note that this feature is only supported
on Python 3.9+ and when building non-stable ABI extensions. If CPython PR
#123332 <https://github.com/python/cpython/pull/123332>`__ is accepted, this
fast path might also become available in the stable ABI on Python 3.14+.

* Added the :cpp:class:`bytearray` wrapper type. (PR `#654
<https://github.com/wjakob/nanobind/pull/654>`__)

* ABI version 15.


Version 2.1.0 (Aug 11, 2024)
----------------------------
Expand Down
2 changes: 1 addition & 1 deletion docs/meson.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ pyproject.toml file:
requires = ['meson-python']
build-backend = 'mesonpy'
In your project root, you will also want to create the subprojects folder
that Meson can install into. Then you will need to install the wrap packages
for both nanobind and robin-map:
Expand Down
14 changes: 9 additions & 5 deletions include/nanobind/nb_class.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,12 @@ enum class type_flags : uint32_t {
is_arithmetic = (1 << 16),

/// Is the number type underlying the enumeration signed?
is_signed = (1 << 17)
is_signed = (1 << 17),

// One more flag bits available (18) without needing
// a larger reorganization
/// Does the type implement a custom __new__ operator?
has_new = (1 << 18)

// No more bits bits available without needing a larger reorganization
};

/// Flags about a type that are only relevant when it is being created.
Expand Down Expand Up @@ -103,6 +105,7 @@ struct type_data {
const std::type_info *type;
PyTypeObject *type_py;
nb_alias_chain *alias_chain;
void *init; // Constructor nb_func
void (*destruct)(void *);
void (*copy)(void *, const void *);
void (*move)(void *, void *) noexcept;
Expand All @@ -122,8 +125,8 @@ struct type_data {
void (*set_self_py)(void *, PyObject *) noexcept;
bool (*keep_shared_from_this_alive)(PyObject *) noexcept;
#if defined(Py_LIMITED_API)
size_t dictoffset;
size_t weaklistoffset;
uint32_t dictoffset;
uint32_t weaklistoffset;
#endif
};

Expand Down Expand Up @@ -420,6 +423,7 @@ struct new_<Func, Return(Args...)> {
auto wrapper = [func = (detail::forward_t<Func>) func](handle, Args... args) {
return func((detail::forward_t<Args>) args...);
};

if constexpr ((std::is_base_of_v<arg, Extra> || ...)) {
// If any argument annotations are specified, add another for the
// extra class argument that we don't forward to Func, so visible
Expand Down
35 changes: 30 additions & 5 deletions src/nb_func.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,10 @@ PyObject *nb_func_new(const void *in_) noexcept {
is_implicit = f->flags & (uint32_t) func_flags::is_implicit,
is_method = f->flags & (uint32_t) func_flags::is_method,
return_ref = f->flags & (uint32_t) func_flags::return_ref,
is_constructor = false;
is_constructor = false,
is_init = false,
is_new = false,
is_setstate = false;

PyObject *name = nullptr;
PyObject *func_prev = nullptr;
Expand Down Expand Up @@ -247,10 +250,12 @@ PyObject *nb_func_new(const void *in_) noexcept {
PyErr_Clear();
}

is_init = strcmp(name_cstr, "__init__") == 0;
is_new = strcmp(name_cstr, "__new__") == 0;
is_setstate = strcmp(name_cstr, "__setstate__") == 0;

// Is this method a constructor that takes a class binding as first parameter?
is_constructor = is_method &&
(strcmp(name_cstr, "__init__") == 0 ||
strcmp(name_cstr, "__setstate__") == 0) &&
is_constructor = is_method && (is_init || is_setstate) &&
strncmp(f->descr, "({%}", 4) == 0;

// Don't use implicit conversions in copy constructors (causes infinite recursion)
Expand Down Expand Up @@ -382,6 +387,20 @@ PyObject *nb_func_new(const void *in_) noexcept {
}
}

// Fast path for vector call object construction
if (((is_init && is_method) || (is_new && !is_method)) &&
nb_type_check(f->scope)) {
type_data *td = nb_type_data((PyTypeObject *) f->scope);
bool has_new = td->flags & (uint32_t) type_flags::has_new;

if (is_init && !has_new) {
td->init = func;
} else if (is_new) {
td->init = func;
td->flags |= (uint32_t) type_flags::has_new;
}
}

if (has_scope && name) {
int rv = PyObject_SetAttr(f->scope, name, (PyObject *) func);
check(rv == 0, "nb::detail::nb_func_new(\"%s\"): setattr. failed.",
Expand All @@ -402,7 +421,7 @@ PyObject *nb_func_new(const void *in_) noexcept {
static NB_NOINLINE PyObject *
nb_func_error_overload(PyObject *self, PyObject *const *args_in,
size_t nargs_in, PyObject *kwargs_in) noexcept {
const uint32_t count = (uint32_t) Py_SIZE(self);
uint32_t count = (uint32_t) Py_SIZE(self);
func_data *f = nb_func_data(self);

if (f->flags & (uint32_t) func_flags::is_operator)
Expand All @@ -413,6 +432,12 @@ nb_func_error_overload(PyObject *self, PyObject *const *args_in,
buf.put("(): incompatible function arguments. The following argument types "
"are supported:\n");

// Mask default __new__ overload created by nb::new_()
if (strcmp(f->name, "__new__") == 0 && count > 1 && f->nargs == 1) {
count -= 1;
f += 1;
}

for (uint32_t i = 0; i < count; ++i) {
buf.put(" ");
buf.put_uint32(i + 1);
Expand Down
2 changes: 1 addition & 1 deletion src/nb_internals.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

/// Tracks the ABI of nanobind
#ifndef NB_INTERNALS_VERSION
# define NB_INTERNALS_VERSION 14
# define NB_INTERNALS_VERSION 15
#endif

/// On MSVC, debug and release builds are not ABI-compatible!
Expand Down
86 changes: 82 additions & 4 deletions src/nb_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# pragma warning(disable: 4706) // assignment within conditional expression
#endif

// Pending gh-100554
// #define Py_tp_vectorcall 82

NAMESPACE_BEGIN(NB_NAMESPACE)
NAMESPACE_BEGIN(detail)

Expand Down Expand Up @@ -837,6 +840,67 @@ static PyMethodDef class_getitem_method[] = {
};
#endif

// Implements the vector call protocol directly on a object
// to construct instances more efficiently.
static PyObject *nb_type_vectorcall(PyObject *self, PyObject *const *args_in,
size_t nargsf,
PyObject *kwargs_in) noexcept {
PyTypeObject *tp = (PyTypeObject *) self;
type_data *td = nb_type_data(tp);
nb_func *func = (nb_func *) td->init;
bool is_init = (td->flags & (uint32_t) type_flags::has_new) == 0;
Py_ssize_t nargs = NB_VECTORCALL_NARGS(nargsf);

if (NB_UNLIKELY(!func)) {
PyErr_Format(PyExc_TypeError, "%s: no constructor defined!", td->name);
return nullptr;
}

if (NB_LIKELY(is_init)) {
self = inst_new_int(tp, nullptr, nullptr);
if (!self)
return nullptr;
} else if (nargs == 0 && !kwargs_in) {
if (nb_func_data(func)->nargs != 0) // fail
return func->vectorcall((PyObject *) func, nullptr, 0, nullptr);
}

PyObject **args = nullptr, *temp = nullptr;

if (NB_LIKELY(nargsf & NB_VECTORCALL_ARGUMENTS_OFFSET)) {
args = (PyObject **) (args_in - 1);
temp = args[0];
} else {
size_t size = nargs;
if (kwargs_in)
size += NB_TUPLE_GET_SIZE(kwargs_in);
args = (PyObject **) alloca(((size_t) size + 1) * sizeof(PyObject *));
if (size)
memcpy(args + 1, args_in, sizeof(PyObject *) * size);
}

args[0] = self;

PyObject *rv =
func->vectorcall((PyObject *) func, args, nargs + 1, kwargs_in);

args[0] = temp;

if (NB_LIKELY(is_init)) {
if (!rv) {
Py_DECREF(self);
return nullptr;
}

// __init__ constructor: 'rv' is None
Py_DECREF(rv);
return self;
} else {
// __new__ constructor
return rv;
}
}

/// Called when a C++ type is bound via nb::class_<>
PyObject *nb_type_new(const type_init_data *t) noexcept {
bool has_doc = t->flags & (uint32_t) type_init_flags::has_doc,
Expand Down Expand Up @@ -952,7 +1016,7 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {

char *name_copy = strdup_check(name.c_str());

constexpr size_t nb_type_max_slots = 10,
constexpr size_t nb_type_max_slots = 11,
nb_extra_slots = 80,
nb_total_slots = nb_type_max_slots +
nb_extra_slots + 1;
Expand Down Expand Up @@ -1050,6 +1114,13 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
*s++ = { Py_tp_methods, (void*) class_getitem_method };
#endif

#if defined(Py_LIMITED_API)
// Pending gh-100554
// if (Py_Version >= 0x030e0000)
// *s++ = { Py_tp_vectorcall, (void *) nb_type_vectorcall };
(void) nb_type_vectorcall;
#endif

if (has_traverse)
spec.flags |= Py_TPFLAGS_HAVE_GC;

Expand All @@ -1066,6 +1137,11 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
}

type_data *to = nb_type_data((PyTypeObject *) result);

#if !defined(Py_LIMITED_API)
((PyTypeObject *) result)->tp_vectorcall = nb_type_vectorcall;
#endif

*to = *t; // note: slices off _init parts
to->flags &= ~(uint32_t) type_init_flags::all_init_flags;

Expand All @@ -1083,18 +1159,19 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
to->name = name_copy;
to->type_py = (PyTypeObject *) result;
to->alias_chain = nullptr;
to->init = nullptr;

if (has_dynamic_attr) {
to->flags |= (uint32_t) type_flags::has_dynamic_attr;
#if defined(Py_LIMITED_API)
to->dictoffset = dictoffset;
to->dictoffset = (uint32_t) dictoffset;
#endif
}

if (is_weak_referenceable) {
to->flags |= (uint32_t) type_flags::is_weak_referenceable;
#if defined(Py_LIMITED_API)
to->weaklistoffset = weaklistoffset;
to->weaklistoffset = (uint32_t) weaklistoffset;
#endif
}

Expand Down Expand Up @@ -1135,7 +1212,8 @@ PyObject *call_one_arg(PyObject *fn, PyObject *arg) noexcept {
Py_DECREF(args);
#else
PyObject *args[2] = { nullptr, arg };
result = PyObject_Vectorcall(fn, args + 1, NB_VECTORCALL_ARGUMENTS_OFFSET + 1, nullptr);
result = PyObject_Vectorcall(fn, args + 1,
NB_VECTORCALL_ARGUMENTS_OFFSET + 1, nullptr);
#endif
return result;
}
Expand Down
4 changes: 4 additions & 0 deletions tests/test_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -887,3 +887,7 @@ def test46_custom_new():

with pytest.raises(RuntimeError):
t.UniqueInt.__new__(int)

def test47_inconstructible():
with pytest.raises(TypeError, match="no constructor defined"):
t.Foo()

0 comments on commit 7225e07

Please sign in to comment.