From bacc7ffa45050598216aa50a40342d57014b1bae Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Tue, 12 Apr 2022 16:02:08 -0400 Subject: [PATCH 1/5] fix: pypy 3.9 (#1412) * fix: include external pybind11 * fix: bump pybind11 version (PyPy 3.9 support) --- CMakeLists.txt | 10 +++++++++- pybind11 | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cce8cbda0a..2c64224fa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,8 @@ project( message(STATUS "CMake version ${CMAKE_VERSION}") message(STATUS "CMAKE_BUILD_TYPE = ${CMAKE_BUILD_TYPE}") +include(CMakeDependentOption) + # Defaults for properties in this directory (and below) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -93,10 +95,16 @@ if(BUILD_TESTING) endif() option(PYBUILD "Build Python modules") +cmake_dependent_option(AWKWARD_EXTERNAL_PYBIND11 "Build against an external pybind11" OFF + "PYBUILD" OFF) # Third tier: Python modules. if(PYBUILD) - add_subdirectory(pybind11) + if(AWKWARD_EXTERNAL_PYBIND11) + find_package(pybind11 CONFIG REQUIRED) + else() + add_subdirectory(pybind11) + endif() file(GLOB LAYOUT_SOURCES "src/python/*.cpp") pybind11_add_module(_ext ${LAYOUT_SOURCES}) diff --git a/pybind11 b/pybind11 index ffa346860b..914c06fb25 160000 --- a/pybind11 +++ b/pybind11 @@ -1 +1 @@ -Subproject commit ffa346860b306c9bbfb341aed9c14c067751feb8 +Subproject commit 914c06fb252b6cc3727d0eedab6736e88a3fcb01 From a2b9a1c49960cdb6c129f3be10068c65d8fc26c8 Mon Sep 17 00:00:00 2001 From: Ahmad-AlSubaie <32343365+Ahmad-AlSubaie@users.noreply.github.com> Date: Wed, 13 Apr 2022 15:03:46 -0400 Subject: [PATCH 2/5] replace llvmlite.ir instead of llvmlite.llvmpy.core (#1413) * llvmlite.ir instead of llvmlite.llvmpy.core * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fixed import typo Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/awkward/_connect/_numba/builder.py | 4 +--- src/awkward/_v2/_connect/numba/builder.py | 4 +--- src/awkward/_v2/behaviors/string.py | 6 ++---- src/awkward/behaviors/string.py | 6 ++---- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/awkward/_connect/_numba/builder.py b/src/awkward/_connect/_numba/builder.py index 69627d8873..4e2cdb03cb 100644 --- a/src/awkward/_connect/_numba/builder.py +++ b/src/awkward/_connect/_numba/builder.py @@ -20,9 +20,7 @@ def globalstring(context, builder, pyvalue): buf = dynamic_addrs[pyvalue] = numpy.array(pyvalue.encode("utf-8") + b"\x00") context.add_dynamic_addr(builder, buf.ctypes.data, info=f"str({pyvalue!r})") ptr = context.get_constant(numba.types.uintp, dynamic_addrs[pyvalue].ctypes.data) - return builder.inttoptr( - ptr, llvmlite.llvmpy.core.Type.pointer(llvmlite.llvmpy.core.Type.int(8)) - ) + return builder.inttoptr(ptr, llvmlite.ir.PointerType(llvmlite.ir.IntType(8))) class ArrayBuilderType(numba.types.Type): diff --git a/src/awkward/_v2/_connect/numba/builder.py b/src/awkward/_v2/_connect/numba/builder.py index 5b6c9c626a..efe2bbd9e3 100644 --- a/src/awkward/_v2/_connect/numba/builder.py +++ b/src/awkward/_v2/_connect/numba/builder.py @@ -18,9 +18,7 @@ def globalstring(context, builder, pyvalue): buf = dynamic_addrs[pyvalue] = numpy.array(pyvalue.encode("utf-8") + b"\x00") context.add_dynamic_addr(builder, buf.ctypes.data, info=f"str({repr(pyvalue)})") ptr = context.get_constant(numba.types.uintp, dynamic_addrs[pyvalue].ctypes.data) - return builder.inttoptr( - ptr, llvmlite.llvmpy.core.Type.pointer(llvmlite.llvmpy.core.Type.int(8)) - ) + return builder.inttoptr(ptr, llvmlite.ir.PointerType(llvmlite.ir.IntType(8))) class ArrayBuilderType(numba.types.Type): diff --git a/src/awkward/_v2/behaviors/string.py b/src/awkward/_v2/behaviors/string.py index d4519a58be..32c6cf72dc 100644 --- a/src/awkward/_v2/behaviors/string.py +++ b/src/awkward/_v2/behaviors/string.py @@ -167,7 +167,7 @@ def _string_numba_lower( context, builder, rettype, viewtype, viewval, viewproxy, attype, atval ): import numba - import llvmlite.llvmpy.core + import llvmlite.ir whichpos = ak._v2._connect.numba.layout.posat( context, builder, viewproxy.pos, viewtype.type.CONTENT @@ -213,9 +213,7 @@ def _string_numba_lower( ) rawptr_cast = builder.inttoptr( rawptr, - llvmlite.llvmpy.core.Type.pointer( - llvmlite.llvmpy.core.Type.int(numba.intp.bitwidth // 8) - ), + llvmlite.ir.PointerType(llvmlite.ir.IntType(numba.intp.bitwidth // 8)), ) strsize = builder.sub(stop, start) strsize_cast = ak._v2._connect.numba.layout.castint( diff --git a/src/awkward/behaviors/string.py b/src/awkward/behaviors/string.py index 4a31dc762d..f16baae4ca 100644 --- a/src/awkward/behaviors/string.py +++ b/src/awkward/behaviors/string.py @@ -182,7 +182,7 @@ def _string_numba_lower( context, builder, rettype, viewtype, viewval, viewproxy, attype, atval ): import numba - import llvmlite.llvmpy.core + import llvmlite.ir whichpos = ak._connect._numba.layout.posat( context, builder, viewproxy.pos, viewtype.type.CONTENT @@ -228,9 +228,7 @@ def _string_numba_lower( ) rawptr_cast = builder.inttoptr( rawptr, - llvmlite.llvmpy.core.Type.pointer( - llvmlite.llvmpy.core.Type.int(numba.intp.bitwidth // 8) - ), + llvmlite.ir.PointerType(llvmlite.ir.IntType(numba.intp.bitwidth // 8)), ) strsize = builder.sub(stop, start) strsize_cast = ak._connect._numba.castint( From 19d7e392b1d9dbf1b57f42caffc3518cfb3523a0 Mon Sep 17 00:00:00 2001 From: "allcontributors[bot]" <46447321+allcontributors[bot]@users.noreply.github.com> Date: Wed, 13 Apr 2022 19:38:17 +0000 Subject: [PATCH 3/5] docs: add Ahmad-AlSubaie as a contributor for code (#1416) * docs: update README.md [skip ci] * docs: update .all-contributorsrc [skip ci] Co-authored-by: allcontributors[bot] <46447321+allcontributors[bot]@users.noreply.github.com> --- .all-contributorsrc | 9 +++++++++ README.md | 1 + 2 files changed, 10 insertions(+) diff --git a/.all-contributorsrc b/.all-contributorsrc index c4f0cc7d19..53d706694c 100644 --- a/.all-contributorsrc +++ b/.all-contributorsrc @@ -293,6 +293,15 @@ "contributions": [ "doc" ] + }, + { + "login": "Ahmad-AlSubaie", + "name": "Ahmad-AlSubaie", + "avatar_url": "https://avatars.githubusercontent.com/u/32343365?v=4", + "profile": "https://github.com/Ahmad-AlSubaie", + "contributions": [ + "code" + ] } ], "contributorsPerLine": 7, diff --git a/README.md b/README.md index 04e3129268..e260bfb300 100644 --- a/README.md +++ b/README.md @@ -291,6 +291,7 @@ Thanks especially to the gracious help of Awkward Array contributors (including
Matthew Feickert

🚧
Santam Roy Choudhury

⚠️
Jeroen Van Goey

📖 +
Ahmad-AlSubaie

💻 From 00d603695c36ea2336bb2a6284f80fa33d41b177 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 13 Apr 2022 19:18:26 -0500 Subject: [PATCH 4/5] Fix performance issue in v2 tolist. (#1418) --- src/awkward/_v2/contents/bitmaskedarray.py | 9 ++--- src/awkward/_v2/contents/bytemaskedarray.py | 9 ++--- src/awkward/_v2/contents/indexedarray.py | 7 ++-- .../_v2/contents/indexedoptionarray.py | 16 ++++++--- src/awkward/_v2/contents/listoffsetarray.py | 33 ++++++++++++++----- src/awkward/_v2/highlevel.py | 14 ++++++++ 6 files changed, 61 insertions(+), 27 deletions(-) diff --git a/src/awkward/_v2/contents/bitmaskedarray.py b/src/awkward/_v2/contents/bitmaskedarray.py index 0bb62194a0..35bf48a8cb 100644 --- a/src/awkward/_v2/contents/bitmaskedarray.py +++ b/src/awkward/_v2/contents/bitmaskedarray.py @@ -634,11 +634,12 @@ def _to_list(self, behavior): return out mask = self.mask_as_bool(valid_when=True, nplike=self.nplike)[: self._length] - content = self._content._to_list(behavior) - out = [None] * self._length + out = self._content._getitem_range(slice(0, self._length))._to_list(behavior) + for i, isvalid in enumerate(mask): - if isvalid: - out[i] = content[i] + if not isvalid: + out[i] = None + return out def _to_nplike(self, nplike): diff --git a/src/awkward/_v2/contents/bytemaskedarray.py b/src/awkward/_v2/contents/bytemaskedarray.py index 964ec2b5e6..f5b3927eb0 100644 --- a/src/awkward/_v2/contents/bytemaskedarray.py +++ b/src/awkward/_v2/contents/bytemaskedarray.py @@ -1035,11 +1035,12 @@ def _to_list(self, behavior): return out mask = self.mask_as_bool(valid_when=True, nplike=self.nplike) - content = self._content._to_list(behavior) - out = [None] * self._mask.length + out = self._content._getitem_range(slice(0, len(mask)))._to_list(behavior) + for i, isvalid in enumerate(mask): - if isvalid: - out[i] = content[i] + if not isvalid: + out[i] = None + return out def _to_nplike(self, nplike): diff --git a/src/awkward/_v2/contents/indexedarray.py b/src/awkward/_v2/contents/indexedarray.py index c3e8840ca5..1efa3ac954 100644 --- a/src/awkward/_v2/contents/indexedarray.py +++ b/src/awkward/_v2/contents/indexedarray.py @@ -1221,11 +1221,8 @@ def _to_list(self, behavior): return out index = self._index.raw(numpy) - content = self._content._to_list(behavior) - out = [None] * index.length - for i, ind in enumerate(index): - out[i] = content[ind] - return out + nextcontent = self._content._carry(ak._v2.index.Index(index), False) + return nextcontent._to_list(behavior) def _to_nplike(self, nplike): index = self._index._to_nplike(nplike) diff --git a/src/awkward/_v2/contents/indexedoptionarray.py b/src/awkward/_v2/contents/indexedoptionarray.py index e0f37c11d2..842c4474c8 100644 --- a/src/awkward/_v2/contents/indexedoptionarray.py +++ b/src/awkward/_v2/contents/indexedoptionarray.py @@ -1653,11 +1653,17 @@ def _to_list(self, behavior): return out index = self._index.raw(numpy) - content = self._content._to_list(behavior) - out = [None] * len(index) - for i, ind in enumerate(index): - if ind >= 0: - out[i] = content[ind] + not_missing = index >= 0 + + nextcontent = self._content._carry( + ak._v2.index.Index(index[not_missing]), False + ) + out = nextcontent._to_list(behavior) + + for i, isvalid in enumerate(not_missing): + if not isvalid: + out.insert(i, None) + return out def _to_nplike(self, nplike): diff --git a/src/awkward/_v2/contents/listoffsetarray.py b/src/awkward/_v2/contents/listoffsetarray.py index 57724e2998..e020abc7f6 100644 --- a/src/awkward/_v2/contents/listoffsetarray.py +++ b/src/awkward/_v2/contents/listoffsetarray.py @@ -2062,20 +2062,36 @@ def packed(self): ) def _to_list(self, behavior): + starts, stops = self.starts, self.stops + starts_data = starts.raw(numpy) + stops_data = stops.raw(numpy)[: len(starts_data)] + + nonempty = starts_data != stops_data + if numpy.count_nonzero(nonempty) == 0: + mini, maxi = 0, 0 + else: + mini = starts_data.min() + maxi = stops_data.max() + + starts_data = starts_data - mini + stops_data = stops_data - mini + + nextcontent = self._content._getitem_range(slice(mini, maxi)) + if self.parameter("__array__") == "bytestring": - content = ak._v2._util.tobytes(self._content.data) - starts, stops = self.starts, self.stops + content = ak._v2._util.tobytes(nextcontent.data) out = [None] * starts.length for i in range(starts.length): - out[i] = content[starts[i] : stops[i]] + out[i] = content[starts_data[i] : stops_data[i]] return out elif self.parameter("__array__") == "string": - content = ak._v2._util.tobytes(self._content.data) - starts, stops = self.starts, self.stops + content = ak._v2._util.tobytes(nextcontent.data) out = [None] * starts.length for i in range(starts.length): - out[i] = content[starts[i] : stops[i]].decode(errors="surrogateescape") + out[i] = content[starts_data[i] : stops_data[i]].decode( + errors="surrogateescape" + ) return out else: @@ -2083,12 +2099,11 @@ def _to_list(self, behavior): if out is not None: return out - content = self._content._to_list(behavior) - starts, stops = self.starts, self.stops + content = nextcontent._to_list(behavior) out = [None] * starts.length for i in range(starts.length): - out[i] = content[starts[i] : stops[i]] + out[i] = content[starts_data[i] : stops_data[i]] return out def _to_nplike(self, nplike): diff --git a/src/awkward/_v2/highlevel.py b/src/awkward/_v2/highlevel.py index f11e0ac89b..252cc6723a 100644 --- a/src/awkward/_v2/highlevel.py +++ b/src/awkward/_v2/highlevel.py @@ -437,6 +437,13 @@ def fields(self): """ return self._layout.fields + @property + def is_tuple(self): + """ + If True, the top-most record structure has no named fields, i.e. it's a tuple. + """ + return self._layout.is_tuple + def _ipython_key_completions_(self): return self._layout.fields @@ -1580,6 +1587,13 @@ def fields(self): """ return self._layout.array.fields + @property + def is_tuple(self): + """ + If True, the top-most record structure has no named fields, i.e. it's a tuple. + """ + return self._layout.array.is_tuple + def _ipython_key_completions_(self): return self._layout.array.fields From 62709587da2caff7e895cf67749aa3174d12c67f Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Wed, 13 Apr 2022 20:42:46 -0500 Subject: [PATCH 5/5] Fix iteration over NumpyArray type. (#1419) --- src/awkward/_v2/highlevel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/awkward/_v2/highlevel.py b/src/awkward/_v2/highlevel.py index 252cc6723a..b1f5b06540 100644 --- a/src/awkward/_v2/highlevel.py +++ b/src/awkward/_v2/highlevel.py @@ -540,7 +540,7 @@ def __iter__(self): errors="surrogateescape" ) else: - yield x + yield ak._v2._util.wrap(x, self._behavior) elif isinstance(x, (ak._v2.contents.Content, ak._v2.record.Record)): yield ak._v2._util.wrap(x, self._behavior) else: