From 879d75e2904473098975b6ba32f59d48d703aea6 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Thu, 23 Feb 2023 15:06:03 -0400
Subject: [PATCH 01/52] maybe schema class

---
 python/.gitignore                    |   2 +-
 python/setup.py                      |   3 +-
 python/src/nanoarrow/__init__.py     |  18 +++++
 python/src/nanoarrow/_lib.pyx        |  85 +++++++++++++++++++++-
 python/src/nanoarrow/arrow_c.pxd     |  55 +++++++++++++++
 python/src/nanoarrow/nanoarrow_c.pxd | 101 ++++++++++++++++++---------
 python/tests/test_nanoarrow.py       |  22 ++++--
 7 files changed, 241 insertions(+), 45 deletions(-)
 create mode 100644 python/src/nanoarrow/arrow_c.pxd

diff --git a/python/.gitignore b/python/.gitignore
index fcf8363ba..a73fd3d06 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -18,7 +18,7 @@
 
 src/nanoarrow/nanoarrow.c
 src/nanoarrow/nanoarrow.h
-src/nanoarrow/*.cpp
+src/nanoarrow/*.c
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/python/setup.py b/python/setup.py
index f6f7efb1c..b89cf1903 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -24,7 +24,6 @@
 
 import numpy as np
 
-
 # setuptools gets confused by relative paths that extend above the project root
 target = Path(__file__).parent / "src" / "nanoarrow"
 shutil.copy(
@@ -39,7 +38,7 @@
         Extension(
             name="nanoarrow._lib",
             include_dirs=[np.get_include(), "src/nanoarrow"],
-            language="c++",
+            language="c",
             sources=[
                 "src/nanoarrow/_lib.pyx",
                 "src/nanoarrow/nanoarrow.c",
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 1586e60ab..9a148a4fc 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -17,4 +17,22 @@
 
 from ._lib import (  # noqa: F401
     as_numpy_array,
+    version,
+    CSchemaHolder,
+    CSchema,
 )
+
+class Schema(CSchema):
+
+    def __init__(self, parent=None, addr=None) -> None:
+        if parent is None:
+            parent = CSchemaHolder()
+        if addr is None:
+            addr = parent._addr()
+        super().__init__(parent, addr)
+
+    @staticmethod
+    def from_pyarrow(obj):
+        schema = Schema()
+        obj._export_to_c(schema._addr())
+        return schema
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index a6b4da153..ba9cd21f9 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -19,8 +19,8 @@
 
 """Low-level nanoarrow Python bindings."""
 
-from libc.stdint cimport uint8_t, uintptr_t
-
+from libc.stdint cimport uint8_t, uintptr_t, int64_t
+from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from nanoarrow_c cimport *
 
 import numpy as np
@@ -84,3 +84,84 @@ def as_numpy_array(arr):
     # TODO set base
 
     return result
+
+
+def version():
+    return ArrowNanoarrowVersion().decode("UTF-8")
+
+cdef class CSchemaHolder:
+    cdef ArrowSchema c_schema
+
+    def __init__(self):
+        self.c_schema.release = NULL
+
+    def __del__(self):
+        if self.c_schema.release != NULL:
+          self.c_schema.release(&self.c_schema)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_schema
+
+cdef class CSchemaChildren:
+    cdef CSchema _parent
+    cdef int64_t _length
+
+    def __init__(self, CSchema parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+
+        return type(self._parent)(self._parent, self._child_addr(k))
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowSchema** children = self._parent._ptr.children
+        cdef ArrowSchema* child = children[i]
+        return <uintptr_t>child
+
+cdef class CSchema:
+    cdef object _base
+    cdef ArrowSchema* _ptr
+
+    def __init__(self, object base, uintptr_t addr) -> None:
+        self._base = base,
+        self._ptr = <ArrowSchema*>addr
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def __repr__(self) -> str:
+        cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
+        cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
+        if not out:
+            raise MemoryError()
+
+        ArrowSchemaToString(self._ptr, out, n_chars + 1, True)
+        out_str = out.decode("UTF-8")
+        PyMem_Free(out)
+
+        return out_str
+
+    @property
+    def format(self):
+        if self._ptr.format != NULL:
+            return self._ptr.format.decode("UTF-8")
+
+    @property
+    def name(self):
+        if self._ptr.name != NULL:
+            return self._ptr.name.decode("UTF-8")
+
+    @property
+    def flags(self):
+        return self._ptr.flags
+
+    @property
+    def children(self):
+        return CSchemaChildren(self)
diff --git a/python/src/nanoarrow/arrow_c.pxd b/python/src/nanoarrow/arrow_c.pxd
new file mode 100644
index 000000000..a5f98c8af
--- /dev/null
+++ b/python/src/nanoarrow/arrow_c.pxd
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from libc.stdint cimport int64_t
+
+cdef extern from "nanoarrow.h":
+    cdef int ARROW_FLAG_DICTIONARY_ORDERED
+    cdef int ARROW_FLAG_NULLABLE
+    cdef int ARROW_FLAG_MAP_KEYS_SORTED
+
+    cdef struct ArrowSchema:
+        const char* format
+        const char* name
+        const char* metadata
+        int64_t flags
+        int64_t n_children
+        ArrowSchema** children
+        ArrowSchema* dictionary
+        void (*release)(ArrowSchema*)
+        void* private_data
+
+    cdef struct ArrowArray:
+        int64_t length
+        int64_t null_count
+        int64_t offset
+        int64_t n_buffers
+        int64_t n_children
+        const void** buffers
+        ArrowArray** children
+        ArrowArray* dictionary
+        void (*release)(ArrowArray*)
+        void* private_data
+
+    cdef struct ArrowArrayStream:
+        int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
+        int (*get_next)(ArrowArrayStream* stream, ArrowArray* out)
+        const char* (*get_last_error)(ArrowArrayStream*)
+        void (*release)(ArrowArrayStream* stream)
+        void* private_data
diff --git a/python/src/nanoarrow/nanoarrow_c.pxd b/python/src/nanoarrow/nanoarrow_c.pxd
index 440f449c1..2d76e0d8a 100644
--- a/python/src/nanoarrow/nanoarrow_c.pxd
+++ b/python/src/nanoarrow/nanoarrow_c.pxd
@@ -17,30 +17,20 @@
 
 # cython: language_level = 3
 
-from libc.stdint cimport int64_t, int8_t, uint8_t
+from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
 
+from arrow_c cimport ArrowSchema, ArrowArray, ArrowArrayStream
 
 cdef extern from "nanoarrow.h":
-    struct ArrowSchema:
-        const char* format
-        int64_t n_children
-        void (*release)(ArrowSchema*)
-
-    struct ArrowArray:
-        int64_t length
-        int64_t null_count
-        int64_t offset
-        const void** buffers
-        void (*release)(ArrowArray*)
-
-    struct ArrowArrayStream:
-        int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
-
     ctypedef int ArrowErrorCode
+    cdef int NANOARROW_OK
+
+    cdef struct ArrowError:
+        pass
 
     enum ArrowType:
-        NANOARROW_TYPE_UNINITIALIZED = 0
-        NANOARROW_TYPE_NA = 1
+        NANOARROW_TYPE_UNINITIALIZED
+        NANOARROW_TYPE_NA
         NANOARROW_TYPE_BOOL
         NANOARROW_TYPE_UINT8
         NANOARROW_TYPE_INT8
@@ -87,34 +77,53 @@ cdef extern from "nanoarrow.h":
         NANOARROW_BUFFER_TYPE_DATA_OFFSET
         NANOARROW_BUFFER_TYPE_DATA
 
-    struct ArrowError:
-        pass
+    enum ArrowTimeUnit:
+        NANOARROW_TIME_UNIT_SECOND
+        NANOARROW_TIME_UNIT_MILLI
+        NANOARROW_TIME_UNIT_MICRO
+        NANOARROW_TIME_UNIT_NANO
 
-    const char* ArrowErrorMessage(ArrowError* error)
-
-    struct ArrowLayout:
-        ArrowBufferType buffer_type[3]
-        int64_t element_size_bits[3]
-        int64_t child_size_elements
+    cdef struct ArrowStringView:
+        const char* data
+        int64_t size_bytes
 
     cdef union buffer_data:
         const void* data
         const int8_t* as_int8
         const uint8_t* as_uint8
-
-    struct ArrowBufferView:
+        const int16_t* as_int16
+        const uint16_t* as_uint16
+        const int32_t* as_int32
+        const uint32_t* as_uint32
+        const int64_t* as_int64
+        const uint64_t* as_uint64
+        const double* as_double
+        const float* as_float
+        const char* as_char
+
+    cdef struct ArrowBufferView:
         buffer_data data
         int64_t size_bytes
 
-    struct ArrowBuffer:
+    cdef struct ArrowBufferAllocator:
+        pass
+
+    cdef struct ArrowBuffer:
         uint8_t* data
         int64_t size_bytes
+        int64_t capacity_bytes
+        ArrowBufferAllocator allocator
 
-    struct ArrowBitmap:
+    cdef struct ArrowBitmap:
         ArrowBuffer buffer
         int64_t size_bits
 
-    struct ArrowArrayView:
+    cdef struct ArrowLayout:
+        ArrowBufferType buffer_type[3]
+        int64_t element_size_bits[3]
+        int64_t child_size_elements
+
+    cdef struct ArrowArrayView:
         ArrowArray* array
         ArrowType storage_type
         ArrowLayout layout
@@ -122,6 +131,30 @@ cdef extern from "nanoarrow.h":
         int64_t n_children
         ArrowArrayView** children
 
-    ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, ArrowSchema* schema, ArrowError* error)
-    ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, ArrowArray* array, ArrowError* error)
-    int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)
+    cdef const char* ArrowNanoarrowVersion()
+    cdef const char* ArrowErrorMessage(ArrowError* error)
+
+    cdef void ArrowSchemaMove(ArrowSchema* src, ArrowSchema* dst)
+    cdef void ArrowArrayMove(ArrowArray* src, ArrowArray* dst)
+    cdef void ArrowArrayStreamMove(ArrowArrayStream* src, ArrowArrayStream* dst)
+
+    cdef int64_t ArrowSchemaToString(ArrowSchema* schema, char* out, int64_t n,
+                                     char recursive)
+    cdef ArrowErrorCode ArrowSchemaDeepCopy(ArrowSchema* schema,
+                                            ArrowSchema* schema_out)
+    cdef ArrowErrorCode ArrowSchemaSetType(ArrowSchema* schema,ArrowType type_)
+    ArrowErrorCode ArrowSchemaSetTypeStruct(ArrowSchema* schema, int64_t n_children)
+
+    cdef struct ArrowMetadataReader:
+        pass
+
+    cdef ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader,
+                                                const char* metadata)
+
+    cdef ArrowErrorCode ArrowMetadataReaderRead(ArrowMetadataReader* reader,
+                                                ArrowStringView* key_out,
+                                                ArrowStringView* value_out)
+
+    cdef ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, ArrowSchema* schema, ArrowError* error)
+    cdef ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, ArrowArray* array, ArrowError* error)
+    cdef int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index fd76534e1..2e3bbb709 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -1,27 +1,37 @@
 import numpy as np
 import pyarrow as pa
 
-import nanoarrow
+import nanoarrow as na
 
 import pytest
 
+def test_version():
+    assert(na.version() == "0.1.0-SNAPSHOT")
 
 def test_as_numpy_array():
-    
+
     arr = pa.array([1, 2, 3])
-    result = nanoarrow.as_numpy_array(arr)
+    result = na.as_numpy_array(arr)
     expected = arr.to_numpy()
     np.testing.assert_array_equal(result, expected)
 
     arr = pa.array([1, 2, 3], pa.uint8())
-    result = nanoarrow.as_numpy_array(arr)
+    result = na.as_numpy_array(arr)
     expected = arr.to_numpy()
     np.testing.assert_array_equal(result, expected)
 
     arr = pa.array([1, 2, None])
     with pytest.raises(ValueError, match="Cannot convert array with nulls"):
-        nanoarrow.as_numpy_array(arr)
+        na.as_numpy_array(arr)
 
     arr = pa.array([[1], [2, 3]])
     with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
-        nanoarrow.as_numpy_array(arr)
+        na.as_numpy_array(arr)
+
+def test_schema():
+    pa_schema = pa.schema([pa.field("some_name", pa.int32())])
+    na_schema = na.Schema.from_pyarrow(pa_schema)
+    assert(na_schema.format == "+s")
+    assert(na_schema.flags == 0)
+    assert(len(na_schema.children), 1)
+    assert(na_schema.children[0].format == "i")

From cff939da42831b29c46a560e7d64ebc60945d67e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 16:44:46 -0400
Subject: [PATCH 02/52] remove pxds

---
 python/src/nanoarrow/arrow_c.pxd     |  55 ---------
 python/src/nanoarrow/nanoarrow_c.pxd | 160 ---------------------------
 2 files changed, 215 deletions(-)
 delete mode 100644 python/src/nanoarrow/arrow_c.pxd
 delete mode 100644 python/src/nanoarrow/nanoarrow_c.pxd

diff --git a/python/src/nanoarrow/arrow_c.pxd b/python/src/nanoarrow/arrow_c.pxd
deleted file mode 100644
index a5f98c8af..000000000
--- a/python/src/nanoarrow/arrow_c.pxd
+++ /dev/null
@@ -1,55 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from libc.stdint cimport int64_t
-
-cdef extern from "nanoarrow.h":
-    cdef int ARROW_FLAG_DICTIONARY_ORDERED
-    cdef int ARROW_FLAG_NULLABLE
-    cdef int ARROW_FLAG_MAP_KEYS_SORTED
-
-    cdef struct ArrowSchema:
-        const char* format
-        const char* name
-        const char* metadata
-        int64_t flags
-        int64_t n_children
-        ArrowSchema** children
-        ArrowSchema* dictionary
-        void (*release)(ArrowSchema*)
-        void* private_data
-
-    cdef struct ArrowArray:
-        int64_t length
-        int64_t null_count
-        int64_t offset
-        int64_t n_buffers
-        int64_t n_children
-        const void** buffers
-        ArrowArray** children
-        ArrowArray* dictionary
-        void (*release)(ArrowArray*)
-        void* private_data
-
-    cdef struct ArrowArrayStream:
-        int (*get_schema)(ArrowArrayStream* stream, ArrowSchema* out)
-        int (*get_next)(ArrowArrayStream* stream, ArrowArray* out)
-        const char* (*get_last_error)(ArrowArrayStream*)
-        void (*release)(ArrowArrayStream* stream)
-        void* private_data
diff --git a/python/src/nanoarrow/nanoarrow_c.pxd b/python/src/nanoarrow/nanoarrow_c.pxd
deleted file mode 100644
index 2d76e0d8a..000000000
--- a/python/src/nanoarrow/nanoarrow_c.pxd
+++ /dev/null
@@ -1,160 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# cython: language_level = 3
-
-from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
-
-from arrow_c cimport ArrowSchema, ArrowArray, ArrowArrayStream
-
-cdef extern from "nanoarrow.h":
-    ctypedef int ArrowErrorCode
-    cdef int NANOARROW_OK
-
-    cdef struct ArrowError:
-        pass
-
-    enum ArrowType:
-        NANOARROW_TYPE_UNINITIALIZED
-        NANOARROW_TYPE_NA
-        NANOARROW_TYPE_BOOL
-        NANOARROW_TYPE_UINT8
-        NANOARROW_TYPE_INT8
-        NANOARROW_TYPE_UINT16
-        NANOARROW_TYPE_INT16
-        NANOARROW_TYPE_UINT32
-        NANOARROW_TYPE_INT32
-        NANOARROW_TYPE_UINT64
-        NANOARROW_TYPE_INT64
-        NANOARROW_TYPE_HALF_FLOAT
-        NANOARROW_TYPE_FLOAT
-        NANOARROW_TYPE_DOUBLE
-        NANOARROW_TYPE_STRING
-        NANOARROW_TYPE_BINARY
-        NANOARROW_TYPE_FIXED_SIZE_BINARY
-        NANOARROW_TYPE_DATE32
-        NANOARROW_TYPE_DATE64
-        NANOARROW_TYPE_TIMESTAMP
-        NANOARROW_TYPE_TIME32
-        NANOARROW_TYPE_TIME64
-        NANOARROW_TYPE_INTERVAL_MONTHS
-        NANOARROW_TYPE_INTERVAL_DAY_TIME
-        NANOARROW_TYPE_DECIMAL128
-        NANOARROW_TYPE_DECIMAL256
-        NANOARROW_TYPE_LIST
-        NANOARROW_TYPE_STRUCT
-        NANOARROW_TYPE_SPARSE_UNION
-        NANOARROW_TYPE_DENSE_UNION
-        NANOARROW_TYPE_DICTIONARY
-        NANOARROW_TYPE_MAP
-        NANOARROW_TYPE_EXTENSION
-        NANOARROW_TYPE_FIXED_SIZE_LIST
-        NANOARROW_TYPE_DURATION
-        NANOARROW_TYPE_LARGE_STRING
-        NANOARROW_TYPE_LARGE_BINARY
-        NANOARROW_TYPE_LARGE_LIST
-        NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
-
-    enum ArrowBufferType:
-        NANOARROW_BUFFER_TYPE_NONE
-        NANOARROW_BUFFER_TYPE_VALIDITY
-        NANOARROW_BUFFER_TYPE_TYPE_ID
-        NANOARROW_BUFFER_TYPE_UNION_OFFSET
-        NANOARROW_BUFFER_TYPE_DATA_OFFSET
-        NANOARROW_BUFFER_TYPE_DATA
-
-    enum ArrowTimeUnit:
-        NANOARROW_TIME_UNIT_SECOND
-        NANOARROW_TIME_UNIT_MILLI
-        NANOARROW_TIME_UNIT_MICRO
-        NANOARROW_TIME_UNIT_NANO
-
-    cdef struct ArrowStringView:
-        const char* data
-        int64_t size_bytes
-
-    cdef union buffer_data:
-        const void* data
-        const int8_t* as_int8
-        const uint8_t* as_uint8
-        const int16_t* as_int16
-        const uint16_t* as_uint16
-        const int32_t* as_int32
-        const uint32_t* as_uint32
-        const int64_t* as_int64
-        const uint64_t* as_uint64
-        const double* as_double
-        const float* as_float
-        const char* as_char
-
-    cdef struct ArrowBufferView:
-        buffer_data data
-        int64_t size_bytes
-
-    cdef struct ArrowBufferAllocator:
-        pass
-
-    cdef struct ArrowBuffer:
-        uint8_t* data
-        int64_t size_bytes
-        int64_t capacity_bytes
-        ArrowBufferAllocator allocator
-
-    cdef struct ArrowBitmap:
-        ArrowBuffer buffer
-        int64_t size_bits
-
-    cdef struct ArrowLayout:
-        ArrowBufferType buffer_type[3]
-        int64_t element_size_bits[3]
-        int64_t child_size_elements
-
-    cdef struct ArrowArrayView:
-        ArrowArray* array
-        ArrowType storage_type
-        ArrowLayout layout
-        ArrowBufferView buffer_views[3]
-        int64_t n_children
-        ArrowArrayView** children
-
-    cdef const char* ArrowNanoarrowVersion()
-    cdef const char* ArrowErrorMessage(ArrowError* error)
-
-    cdef void ArrowSchemaMove(ArrowSchema* src, ArrowSchema* dst)
-    cdef void ArrowArrayMove(ArrowArray* src, ArrowArray* dst)
-    cdef void ArrowArrayStreamMove(ArrowArrayStream* src, ArrowArrayStream* dst)
-
-    cdef int64_t ArrowSchemaToString(ArrowSchema* schema, char* out, int64_t n,
-                                     char recursive)
-    cdef ArrowErrorCode ArrowSchemaDeepCopy(ArrowSchema* schema,
-                                            ArrowSchema* schema_out)
-    cdef ArrowErrorCode ArrowSchemaSetType(ArrowSchema* schema,ArrowType type_)
-    ArrowErrorCode ArrowSchemaSetTypeStruct(ArrowSchema* schema, int64_t n_children)
-
-    cdef struct ArrowMetadataReader:
-        pass
-
-    cdef ArrowErrorCode ArrowMetadataReaderInit(ArrowMetadataReader* reader,
-                                                const char* metadata)
-
-    cdef ArrowErrorCode ArrowMetadataReaderRead(ArrowMetadataReader* reader,
-                                                ArrowStringView* key_out,
-                                                ArrowStringView* value_out)
-
-    cdef ArrowErrorCode ArrowArrayViewInitFromSchema(ArrowArrayView* array_view, ArrowSchema* schema, ArrowError* error)
-    cdef ArrowErrorCode ArrowArrayViewSetArray(ArrowArrayView* array_view, ArrowArray* array, ArrowError* error)
-    cdef int64_t ArrowBitCountSet(const uint8_t* bits, int64_t i_from, int64_t i_to)

From 73eb934db2f4bf3df08b8a977ab9427955fbb312 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 16:56:28 -0400
Subject: [PATCH 03/52] generate the nanoarrow pxd file

---
 python/.gitignore              |   1 +
 python/setup.py                | 114 +++++++++++++++++++++++++++++++++
 python/tests/test_nanoarrow.py |   2 +-
 3 files changed, 116 insertions(+), 1 deletion(-)

diff --git a/python/.gitignore b/python/.gitignore
index a73fd3d06..8abd5d0de 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -18,6 +18,7 @@
 
 src/nanoarrow/nanoarrow.c
 src/nanoarrow/nanoarrow.h
+src/nanoarrow/nanoarrow_c.pxd
 src/nanoarrow/*.c
 
 # Byte-compiled / optimized / DLL files
diff --git a/python/setup.py b/python/setup.py
index b89cf1903..e0b3fe52a 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -17,6 +17,9 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import re
+import os
+
 import shutil
 from pathlib import Path
 
@@ -24,6 +27,112 @@
 
 import numpy as np
 
+class NanoarrowPxdGenerator:
+
+    def __init__(self):
+       self._define_regexes()
+
+    def generate_nanoarrow_pxd(self, file_in, file_out):
+        file_in_name = os.path.basename(file_in)
+
+        # Read the nanoarrow.h header
+        content = None
+        with open(file_in, 'r') as input:
+            content = input.read()
+
+        # Strip comments
+        content = self.re_comment.sub('', content)
+
+        # Find types and function definitions
+        types = self._find_types(content)
+        func_defs = self._find_func_defs(content)
+
+        # Make corresponding cython definitions
+        types_cython = [self._type_to_cython(t, '    ') for t in types]
+        func_defs_cython = [self._func_def_to_cython(d, '     ') for d in func_defs]
+
+        # Unindent the header
+        header = self.re_newline_plus_indent.sub('\n', self._pxd_header())
+
+        # Write nanoarrow_c.pxd
+        with open(file_out, 'wb') as output:
+            output.write(header.encode('UTF-8'))
+
+            output.write(f'\ncdef extern from "{file_in_name}":\n'.encode("UTF-8"))
+
+            for type in types_cython:
+                output.write(type.encode('UTF-8'))
+                output.write(b'\n\n')
+
+            for func_def in func_defs_cython:
+                output.write(func_def.encode('UTF-8'))
+                output.write(b'\n')
+
+            output.write(b'\n')
+
+    def _define_regexes(self):
+        self.re_comment = re.compile(r'\s*//[^\n]*')
+        self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
+        self.re_func_def = re.compile(r'\n(static inline )?(struct|enum )?(?P<return_type>[A-Za-z]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
+        self.re_tagged_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)')
+        self.re_struct_delim = re.compile(r';\s*')
+        self.re_enum_delim = re.compile(r',\s*')
+        self.re_whitespace = re.compile(r'\s+')
+        self.re_newline_plus_indent = re.compile(r'\n +')
+
+    def _strip_comments(self, content):
+        return self.re_comment.sub('', content)
+
+    def _find_types(self, content):
+        return [m.groupdict() for m in self.re_type.finditer(content)]
+
+    def _find_func_defs(self, content):
+        return [m.groupdict() for m in self.re_func_def.finditer(content)]
+
+    def _type_to_cython(self, t, indent=''):
+        type = t['type']
+        name = t['name']
+        body = self.re_tagged_type.sub(r'\2', t['body'].strip())
+        if type == 'enum':
+            items = [item for item in self.re_enum_delim.split(body) if item]
+        else:
+            items = [item for item in self.re_struct_delim.split(body) if item]
+
+        cython_body = f'\n{indent}    '.join([''] + items)
+        return f'{indent}cdef {type} {name}:{cython_body}'
+
+    def _func_def_to_cython(self, d, indent=''):
+        return_type = d['return_type']
+        name = d['name']
+        args = re.sub(r'\s+', ' ', d['args'].strip())
+        args = self.re_tagged_type.sub(r'\2', args)
+        return f'{indent}cdef {return_type} {name}({args})'
+
+    def _pxd_header(self):
+        return """
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: language_level = 3
+
+from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
+        """
+
+
 # setuptools gets confused by relative paths that extend above the project root
 target = Path(__file__).parent / "src" / "nanoarrow"
 shutil.copy(
@@ -33,6 +142,11 @@
     Path(__file__).parent / "../dist/nanoarrow.h", target / "nanoarrow.h"
 )
 
+NanoarrowPxdGenerator().generate_nanoarrow_pxd(
+    'src/nanoarrow/nanoarrow.h',
+    'src/nanoarrow/nanoarrow_c.pxd'
+)
+
 setup(
     ext_modules=[
         Extension(
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 2e3bbb709..305b6615c 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -6,7 +6,7 @@
 import pytest
 
 def test_version():
-    assert(na.version() == "0.1.0-SNAPSHOT")
+    assert(na.version() == "0.2.0-SNAPSHOT")
 
 def test_as_numpy_array():
 

From 6153916f7298b9a5493104daa35690620d956814 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 17:09:47 -0400
Subject: [PATCH 04/52] completely invalid but working towards ideal setup.py

---
 python/setup.py | 68 +++++++++++++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 27 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index e0b3fe52a..e547b70f3 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -110,37 +110,51 @@ def _func_def_to_cython(self, d, indent=''):
 
     def _pxd_header(self):
         return """
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
+        # Licensed to the Apache Software Foundation (ASF) under one
+        # or more contributor license agreements.  See the NOTICE file
+        # distributed with this work for additional information
+        # regarding copyright ownership.  The ASF licenses this file
+        # to you under the Apache License, Version 2.0 (the
+        # "License"); you may not use this file except in compliance
+        # with the License.  You may obtain a copy of the License at
+        #
+        #   http://www.apache.org/licenses/LICENSE-2.0
+        #
+        # Unless required by applicable law or agreed to in writing,
+        # software distributed under the License is distributed on an
+        # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+        # KIND, either express or implied.  See the License for the
+        # specific language governing permissions and limitations
+        # under the License.
+
+        # cython: language_level = 3
+
+        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t,\
+            uint32_t, int64_t, uint64_t
+        """
 
-# cython: language_level = 3
+def copy_or_generate_nanoarrow_c():
+    this_dir = os.path.abspath(os.path.dirname(__file__))
 
-from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
-        """
+    is_cmake_dir = 'CMakeLists.txt' in os.listdir('..')
+    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir('../src/nanoarrow')
+    has_cmake = os.system('cmake --version') == 0
+    build_dir = os.path.join('.', '_cmake')
+    source_dir = os.path.abspath(os.path.join('..'))
 
+    if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
+        try:
+            os.system(f'cmake -B "{build_dir}" -S "{source_dir}" -DNANOARROW_BUNDLE=ON')
+            os.system(f'cmake --install -B "{build_dir}" -DNANOARROW_BUNDLE=ON')
+        finally:
+            os.unlink(build_dir)
 
-# setuptools gets confused by relative paths that extend above the project root
-target = Path(__file__).parent / "src" / "nanoarrow"
-shutil.copy(
-    Path(__file__).parent / "../dist/nanoarrow.c", target / "nanoarrow.c"
-)
-shutil.copy(
-    Path(__file__).parent / "../dist/nanoarrow.h", target / "nanoarrow.h"
-)
+    elif is_in_nanoarrow_repo:
+        shutil.copyfile()
+    else:
+        raise ValueError('Attempt to build source distribution outside the nanoarrow repo')
+
+copy_or_generate_nanoarrow_c()
 
 NanoarrowPxdGenerator().generate_nanoarrow_pxd(
     'src/nanoarrow/nanoarrow.h',

From 1637afeaf052d7117560c561671ab925a557cc7a Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 17:20:15 -0400
Subject: [PATCH 05/52] still invalid but better setup.py

---
 python/setup.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index e547b70f3..68f538e9d 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -19,9 +19,7 @@
 
 import re
 import os
-
 import shutil
-from pathlib import Path
 
 from setuptools import Extension, setup
 
@@ -134,20 +132,23 @@ def _pxd_header(self):
         """
 
 def copy_or_generate_nanoarrow_c():
+    this_wd = os.getcwd()
     this_dir = os.path.abspath(os.path.dirname(__file__))
+    source_dir = os.path.dirname(this_dir)
 
-    is_cmake_dir = 'CMakeLists.txt' in os.listdir('..')
-    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir('../src/nanoarrow')
+    is_cmake_dir = 'CMakeLists.txt' in os.listdir(source_dir)
+    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir(os.path.join(source_dir, 'src', 'nanoarrow'))
     has_cmake = os.system('cmake --version') == 0
-    build_dir = os.path.join('.', '_cmake')
-    source_dir = os.path.abspath(os.path.join('..'))
+    build_dir = os.path.join(this_dir, '_cmake')
 
     if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
         try:
-            os.system(f'cmake -B "{build_dir}" -S "{source_dir}" -DNANOARROW_BUNDLE=ON')
-            os.system(f'cmake --install -B "{build_dir}" -DNANOARROW_BUNDLE=ON')
+            os.mkdir(build_dir)
+            os.chdir(build_dir)
+            os.system(f'cmake .. -DNANOARROW_BUNDLE=ON')
+            os.system(f'cmake --install . --prefix=../src/nanoarrow')
         finally:
-            os.unlink(build_dir)
+            os.chdir(this_wd)
 
     elif is_in_nanoarrow_repo:
         shutil.copyfile()

From 1b38e9e8337147191c2c13ce4fa76ce4b5425d7e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:21:23 -0400
Subject: [PATCH 06/52] actually working bootstrap setup

---
 python/bootstrap.py | 185 ++++++++++++++++++++++++++++++++++++++++++++
 python/setup.py     | 146 ++--------------------------------
 2 files changed, 191 insertions(+), 140 deletions(-)
 create mode 100644 python/bootstrap.py

diff --git a/python/bootstrap.py b/python/bootstrap.py
new file mode 100644
index 000000000..95a87de59
--- /dev/null
+++ b/python/bootstrap.py
@@ -0,0 +1,185 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+import os
+import shutil
+
+# Generate the nanoarrow_c.pxd file used by the Cython extension
+class NanoarrowPxdGenerator:
+
+    def __init__(self):
+       self._define_regexes()
+
+    def generate_nanoarrow_pxd(self, file_in, file_out):
+        file_in_name = os.path.basename(file_in)
+
+        # Read the nanoarrow.h header
+        content = None
+        with open(file_in, 'r') as input:
+            content = input.read()
+
+        # Strip comments
+        content = self.re_comment.sub('', content)
+
+        # Find types and function definitions
+        types = self._find_types(content)
+        func_defs = self._find_func_defs(content)
+
+        # Make corresponding cython definitions
+        types_cython = [self._type_to_cython(t, '    ') for t in types]
+        func_defs_cython = [self._func_def_to_cython(d, '     ') for d in func_defs]
+
+        # Unindent the header
+        header = self.re_newline_plus_indent.sub('\n', self._pxd_header())
+
+        # Write nanoarrow_c.pxd
+        with open(file_out, 'wb') as output:
+            output.write(header.encode('UTF-8'))
+
+            output.write(f'\ncdef extern from "{file_in_name}":\n'.encode("UTF-8"))
+
+            for type in types_cython:
+                output.write(type.encode('UTF-8'))
+                output.write(b'\n\n')
+
+            for func_def in func_defs_cython:
+                output.write(func_def.encode('UTF-8'))
+                output.write(b'\n')
+
+            output.write(b'\n')
+
+    def _define_regexes(self):
+        self.re_comment = re.compile(r'\s*//[^\n]*')
+        self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
+        self.re_func_def = re.compile(r'\n(static inline )?(struct|enum )?(?P<return_type>[A-Za-z]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
+        self.re_tagged_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)')
+        self.re_struct_delim = re.compile(r';\s*')
+        self.re_enum_delim = re.compile(r',\s*')
+        self.re_whitespace = re.compile(r'\s+')
+        self.re_newline_plus_indent = re.compile(r'\n +')
+
+    def _strip_comments(self, content):
+        return self.re_comment.sub('', content)
+
+    def _find_types(self, content):
+        return [m.groupdict() for m in self.re_type.finditer(content)]
+
+    def _find_func_defs(self, content):
+        return [m.groupdict() for m in self.re_func_def.finditer(content)]
+
+    def _type_to_cython(self, t, indent=''):
+        type = t['type']
+        name = t['name']
+        body = self.re_tagged_type.sub(r'\2', t['body'].strip())
+        if type == 'enum':
+            items = [item for item in self.re_enum_delim.split(body) if item]
+        else:
+            items = [item for item in self.re_struct_delim.split(body) if item]
+
+        cython_body = f'\n{indent}    '.join([''] + items)
+        return f'{indent}cdef {type} {name}:{cython_body}'
+
+    def _func_def_to_cython(self, d, indent=''):
+        return_type = d['return_type']
+        name = d['name']
+        args = re.sub(r'\s+', ' ', d['args'].strip())
+        args = self.re_tagged_type.sub(r'\2', args)
+        return f'{indent}cdef {return_type} {name}({args})'
+
+    def _pxd_header(self):
+        return """
+        # Licensed to the Apache Software Foundation (ASF) under one
+        # or more contributor license agreements.  See the NOTICE file
+        # distributed with this work for additional information
+        # regarding copyright ownership.  The ASF licenses this file
+        # to you under the Apache License, Version 2.0 (the
+        # "License"); you may not use this file except in compliance
+        # with the License.  You may obtain a copy of the License at
+        #
+        #   http://www.apache.org/licenses/LICENSE-2.0
+        #
+        # Unless required by applicable law or agreed to in writing,
+        # software distributed under the License is distributed on an
+        # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+        # KIND, either express or implied.  See the License for the
+        # specific language governing permissions and limitations
+        # under the License.
+
+        # cython: language_level = 3
+
+        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t,\
+            uint32_t, int64_t, uint64_t
+        """
+
+# Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h
+# from ../dist if it does not. Running cmake is safer because it will sync
+# any changes from nanoarrow C library sources in the checkout but is not
+# strictly necessary for things like installing from GitHub.
+def copy_or_generate_nanoarrow_c():
+    this_wd = os.getcwd()
+    this_dir = os.path.abspath(os.path.dirname(__file__))
+    source_dir = os.path.dirname(this_dir)
+
+    maybe_nanoarrow_h = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')
+    maybe_nanoarrow_c = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.c')
+    for f in (maybe_nanoarrow_c, maybe_nanoarrow_h):
+        if os.path.exists(f):
+            os.unlink(f)
+
+    is_cmake_dir = 'CMakeLists.txt' in os.listdir(source_dir)
+    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir(os.path.join(source_dir, 'src', 'nanoarrow'))
+    has_cmake = os.system('cmake --version') == 0
+    build_dir = os.path.join(this_dir, '_cmake')
+
+    if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
+        try:
+            os.mkdir(build_dir)
+            os.chdir(build_dir)
+            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON -DNANOARROW_NAMESPACE=PythonPkg')
+            os.system(f'cmake --install . --prefix=../src/nanoarrow')
+        finally:
+            if os.path.exists(build_dir):
+                shutil.rmtree(build_dir)
+            os.chdir(this_wd)
+
+    elif is_in_nanoarrow_repo:
+        shutil.copyfile()
+    else:
+        raise ValueError('Attempt to build source distribution outside the nanoarrow repo')
+
+    if not os.path.exists(os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')):
+        raise ValueError('Attempt to vendor nanoarrow.c/h failed')
+
+    maybe_nanoarrow_hpp = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.hpp')
+    if os.path.exists(maybe_nanoarrow_hpp):
+        os.unlink(maybe_nanoarrow_hpp)
+
+# Runs the pxd generator with some information about the file name
+def generate_nanoarrow_pxd():
+     this_dir = os.path.abspath(os.path.dirname(__file__))
+     maybe_nanoarrow_h = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')
+     maybe_nanoarrow_pxd = os.path.join(this_dir, 'src/nanoarrow/nanoarrow_c.pxd')
+
+     NanoarrowPxdGenerator().generate_nanoarrow_pxd(
+        maybe_nanoarrow_h,
+        maybe_nanoarrow_pxd
+    )
+
+if __name__ == '__main__':
+    copy_or_generate_nanoarrow_c()
+    generate_nanoarrow_pxd()
diff --git a/python/setup.py b/python/setup.py
index 68f538e9d..fdf9eaba7 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -17,150 +17,16 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import re
 import os
-import shutil
-
+import sys
+import subprocess
 from setuptools import Extension, setup
-
 import numpy as np
 
-class NanoarrowPxdGenerator:
-
-    def __init__(self):
-       self._define_regexes()
-
-    def generate_nanoarrow_pxd(self, file_in, file_out):
-        file_in_name = os.path.basename(file_in)
-
-        # Read the nanoarrow.h header
-        content = None
-        with open(file_in, 'r') as input:
-            content = input.read()
-
-        # Strip comments
-        content = self.re_comment.sub('', content)
-
-        # Find types and function definitions
-        types = self._find_types(content)
-        func_defs = self._find_func_defs(content)
-
-        # Make corresponding cython definitions
-        types_cython = [self._type_to_cython(t, '    ') for t in types]
-        func_defs_cython = [self._func_def_to_cython(d, '     ') for d in func_defs]
-
-        # Unindent the header
-        header = self.re_newline_plus_indent.sub('\n', self._pxd_header())
-
-        # Write nanoarrow_c.pxd
-        with open(file_out, 'wb') as output:
-            output.write(header.encode('UTF-8'))
-
-            output.write(f'\ncdef extern from "{file_in_name}":\n'.encode("UTF-8"))
-
-            for type in types_cython:
-                output.write(type.encode('UTF-8'))
-                output.write(b'\n\n')
-
-            for func_def in func_defs_cython:
-                output.write(func_def.encode('UTF-8'))
-                output.write(b'\n')
-
-            output.write(b'\n')
-
-    def _define_regexes(self):
-        self.re_comment = re.compile(r'\s*//[^\n]*')
-        self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
-        self.re_func_def = re.compile(r'\n(static inline )?(struct|enum )?(?P<return_type>[A-Za-z]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
-        self.re_tagged_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)')
-        self.re_struct_delim = re.compile(r';\s*')
-        self.re_enum_delim = re.compile(r',\s*')
-        self.re_whitespace = re.compile(r'\s+')
-        self.re_newline_plus_indent = re.compile(r'\n +')
-
-    def _strip_comments(self, content):
-        return self.re_comment.sub('', content)
-
-    def _find_types(self, content):
-        return [m.groupdict() for m in self.re_type.finditer(content)]
-
-    def _find_func_defs(self, content):
-        return [m.groupdict() for m in self.re_func_def.finditer(content)]
-
-    def _type_to_cython(self, t, indent=''):
-        type = t['type']
-        name = t['name']
-        body = self.re_tagged_type.sub(r'\2', t['body'].strip())
-        if type == 'enum':
-            items = [item for item in self.re_enum_delim.split(body) if item]
-        else:
-            items = [item for item in self.re_struct_delim.split(body) if item]
-
-        cython_body = f'\n{indent}    '.join([''] + items)
-        return f'{indent}cdef {type} {name}:{cython_body}'
-
-    def _func_def_to_cython(self, d, indent=''):
-        return_type = d['return_type']
-        name = d['name']
-        args = re.sub(r'\s+', ' ', d['args'].strip())
-        args = self.re_tagged_type.sub(r'\2', args)
-        return f'{indent}cdef {return_type} {name}({args})'
-
-    def _pxd_header(self):
-        return """
-        # Licensed to the Apache Software Foundation (ASF) under one
-        # or more contributor license agreements.  See the NOTICE file
-        # distributed with this work for additional information
-        # regarding copyright ownership.  The ASF licenses this file
-        # to you under the Apache License, Version 2.0 (the
-        # "License"); you may not use this file except in compliance
-        # with the License.  You may obtain a copy of the License at
-        #
-        #   http://www.apache.org/licenses/LICENSE-2.0
-        #
-        # Unless required by applicable law or agreed to in writing,
-        # software distributed under the License is distributed on an
-        # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-        # KIND, either express or implied.  See the License for the
-        # specific language governing permissions and limitations
-        # under the License.
-
-        # cython: language_level = 3
-
-        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t,\
-            uint32_t, int64_t, uint64_t
-        """
-
-def copy_or_generate_nanoarrow_c():
-    this_wd = os.getcwd()
-    this_dir = os.path.abspath(os.path.dirname(__file__))
-    source_dir = os.path.dirname(this_dir)
-
-    is_cmake_dir = 'CMakeLists.txt' in os.listdir(source_dir)
-    is_in_nanoarrow_repo = 'nanoarrow.h' in os.listdir(os.path.join(source_dir, 'src', 'nanoarrow'))
-    has_cmake = os.system('cmake --version') == 0
-    build_dir = os.path.join(this_dir, '_cmake')
-
-    if has_cmake and is_cmake_dir and is_in_nanoarrow_repo:
-        try:
-            os.mkdir(build_dir)
-            os.chdir(build_dir)
-            os.system(f'cmake .. -DNANOARROW_BUNDLE=ON')
-            os.system(f'cmake --install . --prefix=../src/nanoarrow')
-        finally:
-            os.chdir(this_wd)
-
-    elif is_in_nanoarrow_repo:
-        shutil.copyfile()
-    else:
-        raise ValueError('Attempt to build source distribution outside the nanoarrow repo')
-
-copy_or_generate_nanoarrow_c()
-
-NanoarrowPxdGenerator().generate_nanoarrow_pxd(
-    'src/nanoarrow/nanoarrow.h',
-    'src/nanoarrow/nanoarrow_c.pxd'
-)
+# Run bootstrap.py to run cmake generating a fresh bundle based on this
+# checkout or copy from ../dist if the caller doesn't have cmake available
+this_dir = os.path.dirname(__file__)
+subprocess.run([sys.executable, os.path.join(this_dir, 'bootstrap.py')])
 
 setup(
     ext_modules=[

From ca191e7db8a83427197615e7240d73b7719be274 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:37:56 -0400
Subject: [PATCH 07/52] fix indentation

---
 python/bootstrap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 95a87de59..bf6771bb6 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -42,7 +42,7 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
 
         # Make corresponding cython definitions
         types_cython = [self._type_to_cython(t, '    ') for t in types]
-        func_defs_cython = [self._func_def_to_cython(d, '     ') for d in func_defs]
+        func_defs_cython = [self._func_def_to_cython(d, '    ') for d in func_defs]
 
         # Unindent the header
         header = self.re_newline_plus_indent.sub('\n', self._pxd_header())
@@ -95,7 +95,7 @@ def _type_to_cython(self, t, indent=''):
         return f'{indent}cdef {type} {name}:{cython_body}'
 
     def _func_def_to_cython(self, d, indent=''):
-        return_type = d['return_type']
+        return_type = d['return_type'].strip()
         name = d['name']
         args = re.sub(r'\s+', ' ', d['args'].strip())
         args = self.re_tagged_type.sub(r'\2', args)

From 891afb124dab56f0a4901e974df1592a24967bd0 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:46:35 -0400
Subject: [PATCH 08/52] add some typedefs

---
 python/bootstrap.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index bf6771bb6..1510e5dfa 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -53,6 +53,12 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
 
             output.write(f'\ncdef extern from "{file_in_name}":\n'.encode("UTF-8"))
 
+            # A few things we add in manually
+            output.write(b'\n')
+            output.write(b'    ctypedef int ArrowErrorCode\n')
+            output.write(b'    cdef int NANOARROW_OK\n')
+            output.write(b'\n')
+
             for type in types_cython:
                 output.write(type.encode('UTF-8'))
                 output.write(b'\n\n')
@@ -122,8 +128,7 @@ def _pxd_header(self):
 
         # cython: language_level = 3
 
-        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t,\
-            uint32_t, int64_t, uint64_t
+        from libc.stdint cimport int8_t, uint8_t, int16_t, uint16_t, int32_t, uint32_t, int64_t, uint64_t
         """
 
 # Runs cmake -DNANOARROW_BUNDLE=ON if cmake exists or copies nanoarrow.c/h

From 26581a541971e1f5eeef32fa8dd887692ac6e0ec Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:48:38 -0400
Subject: [PATCH 09/52] no void()

---
 python/bootstrap.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 1510e5dfa..409bd949e 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -105,6 +105,11 @@ def _func_def_to_cython(self, d, indent=''):
         name = d['name']
         args = re.sub(r'\s+', ' ', d['args'].strip())
         args = self.re_tagged_type.sub(r'\2', args)
+
+        # Cython doesn't do (void)
+        if args == 'void':
+            args = ''
+
         return f'{indent}cdef {return_type} {name}({args})'
 
     def _pxd_header(self):

From 8531234d699784d9b6005aa24ec978267cebfbf8 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:50:18 -0400
Subject: [PATCH 10/52] try without namespace

---
 python/bootstrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 409bd949e..8da841f43 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -160,7 +160,7 @@ def copy_or_generate_nanoarrow_c():
         try:
             os.mkdir(build_dir)
             os.chdir(build_dir)
-            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON -DNANOARROW_NAMESPACE=PythonPkg')
+            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON')
             os.system(f'cmake --install . --prefix=../src/nanoarrow')
         finally:
             if os.path.exists(build_dir):

From 1bb020fc11dc75f10db28c4b9efc53dc98a3d3c3 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 8 Mar 2023 22:56:10 -0400
Subject: [PATCH 11/52] better functionr regex

---
 python/bootstrap.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 8da841f43..978314419 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -72,7 +72,7 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
     def _define_regexes(self):
         self.re_comment = re.compile(r'\s*//[^\n]*')
         self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
-        self.re_func_def = re.compile(r'\n(static inline )?(struct|enum )?(?P<return_type>[A-Za-z]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
+        self.re_func_def = re.compile(r'\n(static inline )?(?P<const>const )?(struct|enum )?(?P<return_type>[A-Za-z0-9_*]+) (?P<name>Arrow[A-Za-z]+)\((?P<args>[^\)]*)\);')
         self.re_tagged_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[A-Za-z]+)')
         self.re_struct_delim = re.compile(r';\s*')
         self.re_enum_delim = re.compile(r',\s*')
@@ -102,6 +102,8 @@ def _type_to_cython(self, t, indent=''):
 
     def _func_def_to_cython(self, d, indent=''):
         return_type = d['return_type'].strip()
+        if d['const']:
+            return_type = 'const ' + return_type
         name = d['name']
         args = re.sub(r'\s+', ' ', d['args'].strip())
         args = self.re_tagged_type.sub(r'\2', args)

From 368ecbede7b13982ce3f6bf0d08a3cf9415f9e44 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Thu, 9 Mar 2023 10:05:21 -0400
Subject: [PATCH 12/52] move some bits from the other repo

---
 python/src/nanoarrow/__init__.py | 15 -------
 python/src/nanoarrow/_lib.pyx    | 77 ++++++++++++++++++++++++++++++--
 python/tests/test_nanoarrow.py   | 56 ++++++++++++++++++++---
 src/nanoarrow/nanoarrow_types.h  |  6 +++
 4 files changed, 130 insertions(+), 24 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 9a148a4fc..e429fb6e0 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -21,18 +21,3 @@
     CSchemaHolder,
     CSchema,
 )
-
-class Schema(CSchema):
-
-    def __init__(self, parent=None, addr=None) -> None:
-        if parent is None:
-            parent = CSchemaHolder()
-        if addr is None:
-            addr = parent._addr()
-        super().__init__(parent, addr)
-
-    @staticmethod
-    def from_pyarrow(obj):
-        schema = Schema()
-        obj._export_to_c(schema._addr())
-        return schema
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index ba9cd21f9..f5a0a8923 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -102,6 +102,32 @@ cdef class CSchemaHolder:
     def _addr(self):
         return <uintptr_t>&self.c_schema
 
+cdef class CArrayHolder:
+    cdef ArrowArray c_array
+
+    def __init__(self):
+        self.c_array.release = NULL
+
+    def __del__(self):
+        if self.c_array.release != NULL:
+          self.c_array.release(&self.c_array)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array
+
+cdef class CArrayViewHolder:
+    cdef ArrowArrayView c_array_view
+
+    def __init__(self):
+        ArrowArrayViewInitFromType(&self.c_array_view, NANOARROW_TYPE_UNINITIALIZED)
+
+    def __del__(self):
+        ArrowArrayViewReset(&self.c_array_view)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_view
+
+
 cdef class CSchemaChildren:
     cdef CSchema _parent
     cdef int64_t _length
@@ -118,7 +144,7 @@ cdef class CSchemaChildren:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
 
-        return type(self._parent)(self._parent, self._child_addr(k))
+        return CSchema(self._parent, self._child_addr(k))
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowSchema** children = self._parent._ptr.children
@@ -129,14 +155,26 @@ cdef class CSchema:
     cdef object _base
     cdef ArrowSchema* _ptr
 
-    def __init__(self, object base, uintptr_t addr) -> None:
+    @staticmethod
+    def Empty():
+        base = CSchemaHolder()
+        return CSchema(base, base._addr())
+
+    def __init__(self, object base, uintptr_t addr):
         self._base = base,
         self._ptr = <ArrowSchema*>addr
 
     def _addr(self):
         return <uintptr_t>self._ptr
 
-    def __repr__(self) -> str:
+    def is_valid(self):
+        return self._ptr.release != NULL
+
+    cdef void _assert_valid(self):
+        if self._ptr.release == NULL:
+            raise RuntimeError("schema is released")
+
+    def __repr__(self):
         cdef int64_t n_chars = ArrowSchemaToString(self._ptr, NULL, 0, True)
         cdef char* out = <char*>PyMem_Malloc(n_chars + 1)
         if not out:
@@ -150,13 +188,17 @@ cdef class CSchema:
 
     @property
     def format(self):
+        self._assert_valid()
         if self._ptr.format != NULL:
             return self._ptr.format.decode("UTF-8")
 
     @property
     def name(self):
+        self._assert_valid()
         if self._ptr.name != NULL:
             return self._ptr.name.decode("UTF-8")
+        else:
+            return None
 
     @property
     def flags(self):
@@ -164,4 +206,33 @@ cdef class CSchema:
 
     @property
     def children(self):
+        self._assert_valid()
         return CSchemaChildren(self)
+
+    def parse(self):
+        self._assert_valid()
+
+        cdef ArrowError error
+        cdef ArrowSchemaView schema_view
+
+        cdef int result = ArrowSchemaViewInit(&schema_view, self._ptr, &error)
+        if result != NANOARROW_OK:
+            raise ValueError(ArrowErrorMessage(&error))
+
+        out = {
+            'name': self._ptr.name.decode('UTF-8') if self._ptr.name else None,
+            'type': ArrowTypeString(schema_view.type).decode('UTF-8'),
+            'storage_type': ArrowTypeString(schema_view.storage_type).decode('UTF-8')
+        }
+
+        if schema_view.storage_type in (NANOARROW_TYPE_FIXED_SIZE_LIST,
+                                        NANOARROW_TYPE_FIXED_SIZE_BINARY):
+            out['fixed_size'] = schema_view.fixed_size
+
+        if schema_view.storage_type in (NANOARROW_TYPE_DECIMAL128,
+                                        NANOARROW_TYPE_DECIMAL256):
+            out['decimal_bitwidth'] = schema_view.decimal_bitwidth
+            out['decimal_precision'] = schema_view.decimal_precision
+            out['decimal_scale'] = schema_view.decimal_scale
+
+        return out
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 305b6615c..1698b0aad 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -28,10 +28,54 @@ def test_as_numpy_array():
     with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
         na.as_numpy_array(arr)
 
-def test_schema():
+def test_schema_basic():# Blank invalid schema
+    schema = na.CSchema.Empty()
+    assert(schema.is_valid() is False)
+    assert(repr(schema) == "[invalid: schema is released]")
+
     pa_schema = pa.schema([pa.field("some_name", pa.int32())])
-    na_schema = na.Schema.from_pyarrow(pa_schema)
-    assert(na_schema.format == "+s")
-    assert(na_schema.flags == 0)
-    assert(len(na_schema.children), 1)
-    assert(na_schema.children[0].format == "i")
+    pa_schema._export_to_c(schema._addr())
+
+    assert(schema.format == "+s")
+    assert(schema.flags == 0)
+    assert(len(schema.children), 1)
+    assert(schema.children[0].format == "i")
+    assert(schema.children[0].name == "some_name")
+    assert(repr(schema.children[0]) == "int32")
+
+    with pytest.raises(IndexError):
+        schema.children[1]
+
+def test_schema_parse():
+    schema = na.CSchema.Empty()
+    with pytest.raises(ValueError):
+        schema.parse()
+
+    pa.schema([pa.field("col1", pa.int32())])._export_to_c(schema._addr())
+
+    info = schema.parse()
+    assert(info['type'] == 'struct')
+    assert(info['storage_type'] == 'struct')
+    assert(info['name'] == '')
+
+    # Check on the child
+    child = schema.children[0]
+    child_info = child.parse()
+    assert(child_info['type'] == 'int32')
+    assert(child_info['storage_type'] == 'int32')
+    assert(child_info['name'] == 'col1')
+
+def test_schema_info_params():
+    schema = na.CSchema.Empty()
+    pa.binary(12)._export_to_c(schema._addr())
+    assert(schema.parse()['fixed_size'] == 12)
+
+    schema = na.CSchema.Empty()
+    pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
+    assert(schema.parse()['fixed_size'] == 12)
+
+    schema = na.CSchema.Empty()
+    pa.decimal128(10, 3)._export_to_c(schema._addr())
+    assert(schema.parse()['decimal_bitwidth'] == 128)
+    assert(schema.parse()['decimal_precision'] == 10)
+    assert(schema.parse()['decimal_scale'] == 3)
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 45ee3c636..bf85b19b2 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -298,6 +298,8 @@ enum ArrowType {
 /// \ingroup nanoarrow-utils
 ///
 /// Returns NULL for invalid values for type
+static inline const char* ArrowTypeString(enum ArrowType type);
+
 static inline const char* ArrowTypeString(enum ArrowType type) {
   switch (type) {
     case NANOARROW_TYPE_NA:
@@ -416,6 +418,8 @@ enum ArrowValidationLevel {
 /// \ingroup nanoarrow-utils
 ///
 /// Returns NULL for invalid values for time_unit
+static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit);
+
 static inline const char* ArrowTimeUnitString(enum ArrowTimeUnit time_unit) {
   switch (time_unit) {
     case NANOARROW_TIME_UNIT_SECOND:
@@ -458,6 +462,8 @@ struct ArrowStringView {
 
 /// \brief Return a view of a const C string
 /// \ingroup nanoarrow-utils
+static inline struct ArrowStringView ArrowCharView(const char* value);
+
 static inline struct ArrowStringView ArrowCharView(const char* value) {
   struct ArrowStringView out;
 

From 5d0a50005604a818a3a785e25242dcdcccd0dd8a Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Thu, 9 Mar 2023 10:23:55 -0400
Subject: [PATCH 13/52] with recursiveness

---
 python/src/nanoarrow/__init__.py |   2 +
 python/src/nanoarrow/_lib.pyx    | 172 ++++++++++++++++++++++++++-----
 python/tests/test_nanoarrow.py   |  67 ++++++++++++
 3 files changed, 217 insertions(+), 24 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index e429fb6e0..e9c74a974 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -20,4 +20,6 @@
     version,
     CSchemaHolder,
     CSchema,
+    CArray,
+    CArrayView
 )
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index f5a0a8923..098ecb2f2 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -127,30 +127,6 @@ cdef class CArrayViewHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array_view
 
-
-cdef class CSchemaChildren:
-    cdef CSchema _parent
-    cdef int64_t _length
-
-    def __init__(self, CSchema parent):
-        self._parent = parent
-        self._length = parent._ptr.n_children
-
-    def __len__(self):
-        return self._length
-
-    def __getitem__(self, k):
-        k = int(k)
-        if k < 0 or k >= self._length:
-            raise IndexError(f"{k} out of range [0, {self._length})")
-
-        return CSchema(self._parent, self._child_addr(k))
-
-    cdef _child_addr(self, int64_t i):
-        cdef ArrowSchema** children = self._parent._ptr.children
-        cdef ArrowSchema* child = children[i]
-        return <uintptr_t>child
-
 cdef class CSchema:
     cdef object _base
     cdef ArrowSchema* _ptr
@@ -236,3 +212,151 @@ cdef class CSchema:
             out['decimal_scale'] = schema_view.decimal_scale
 
         return out
+
+cdef class CArray:
+    cdef object _base
+    cdef ArrowArray* _ptr
+    cdef CSchema _schema
+
+    @staticmethod
+    def Empty(CSchema schema):
+        base = CArrayHolder()
+        return CArray(base, base._addr(), schema)
+
+    def __init__(self, object base, uintptr_t addr, CSchema schema):
+        self._base = base,
+        self._ptr = <ArrowArray*>addr
+        self._schema = schema
+
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
+    def is_valid(self):
+        return self._ptr.release != NULL
+
+    cdef void _assert_valid(self):
+        if self._ptr.release == NULL:
+            raise RuntimeError("Array is released")
+
+    @property
+    def schema(self):
+        return self._schema
+
+    @property
+    def children(self):
+        return CArrayChildren(self)
+
+    def validate(self):
+        cdef CArrayViewHolder holder = CArrayViewHolder()
+
+        cdef ArrowError error
+        cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
+                                                       self._schema._ptr, &error)
+        if result != NANOARROW_OK:
+            raise ValueError(ArrowErrorMessage(&error))
+
+        result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr, &error)
+        if result != NANOARROW_OK:
+            raise ValueError(ArrowErrorMessage(&error))
+
+        return CArrayView(holder, holder._addr(), self)
+
+
+cdef class CArrayView:
+    cdef object _base
+    cdef ArrowArrayView* _ptr
+    cdef CArray _array
+
+    def __init__(self, object base, uintptr_t addr, CArray array):
+        self._base = base,
+        self._ptr = <ArrowArrayView*>addr
+        self._array = array
+
+    @property
+    def children(self):
+        return CArrayViewChildren(self)
+
+    @property
+    def array(self):
+        return self._array
+
+    @property
+    def schema(self):
+        return self._array._schema
+
+    def __len__(self):
+        return self._ptr.array.length
+
+    def value_int(self, int64_t i):
+        if i < 0 or i >= self._ptr.array.length:
+            raise IndexError()
+        return ArrowArrayViewGetIntUnsafe(self._ptr, i)
+
+cdef class CSchemaChildren:
+    cdef CSchema _parent
+    cdef int64_t _length
+
+    def __init__(self, CSchema parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+
+        return CSchema(self._parent, self._child_addr(k))
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowSchema** children = self._parent._ptr.children
+        cdef ArrowSchema* child = children[i]
+        return <uintptr_t>child
+
+cdef class CArrayChildren:
+    cdef CArray _parent
+    cdef int64_t _length
+
+    def __init__(self, CArray parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+
+        return CArray(self._parent, self._child_addr(k))
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowArray** children = self._parent._ptr.children
+        cdef ArrowArray* child = children[i]
+        return <uintptr_t>child
+
+cdef class CArrayViewChildren:
+    cdef CArrayView _parent
+    cdef int64_t _length
+
+    def __init__(self, CArrayView parent):
+        self._parent = parent
+        self._length = parent._ptr.n_children
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+
+        return CArrayView(self._parent, self._child_addr(k), self._parent._array)
+
+    cdef _child_addr(self, int64_t i):
+        cdef ArrowArrayView** children = self._parent._ptr.children
+        cdef ArrowArrayView* child = children[i]
+        return <uintptr_t>child
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 1698b0aad..2275c3ab7 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 import numpy as np
 import pyarrow as pa
 
@@ -79,3 +96,53 @@ def test_schema_info_params():
     assert(schema.parse()['decimal_bitwidth'] == 128)
     assert(schema.parse()['decimal_precision'] == 10)
     assert(schema.parse()['decimal_scale'] == 3)
+
+def test_array():
+    schema = na.CSchema.Empty()
+    pa.int32()._export_to_c(schema._addr())
+
+    array = na.CArray.Empty(schema)
+    assert(array.is_valid() is False)
+
+    pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr())
+    assert(array.is_valid() is True)
+
+    view = array.validate()
+
+    assert(view.array is array)
+    assert(view.schema is schema)
+    assert(len(view) == 3)
+
+    assert(view.value_int(0) == 1)
+    assert(view.value_int(1) == 2)
+    assert(view.value_int(2) == 3)
+
+def test_array_recursive():
+    pa_array = pa.array([1, 2, 3], pa.int32())
+    pa_batch = pa.record_batch([pa_array], names=["some_column"])
+
+    schema = na.CSchema.Empty()
+    pa_batch.schema._export_to_c(schema._addr())
+    assert(len(schema.children) == 1)
+    with pytest.raises(IndexError):
+        schema.children[1]
+
+    array = na.CArray.Empty(schema)
+    assert(array.is_valid() is False)
+
+    pa_batch._export_to_c(array._addr())
+    assert(array.is_valid() is True)
+    assert(len(array.children) == 1)
+    with pytest.raises(IndexError):
+        array.children[1]
+
+    view = array.validate()
+    assert(len(view.children) == 1)
+    with pytest.raises(IndexError):
+       view.children[1]
+
+    child = view.children[0]
+    assert(len(child) == 3)
+    assert(child.value_int(0) == 1)
+    assert(child.value_int(1) == 2)
+    assert(child.value_int(2) == 3)

From a83e2e9bb83e78a991bd121dd758fbdd55d52e81 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Thu, 9 Mar 2023 11:21:17 -0400
Subject: [PATCH 14/52] passing buffer protocol test

---
 python/src/nanoarrow/_lib.pyx  | 51 ++++++++++++++++++++++++++++++++++
 python/tests/test_nanoarrow.py |  6 ++++
 2 files changed, 57 insertions(+)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 098ecb2f2..03474a086 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -21,6 +21,7 @@
 
 from libc.stdint cimport uint8_t, uintptr_t, int64_t
 from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython cimport Py_buffer
 from nanoarrow_c cimport *
 
 import numpy as np
@@ -262,6 +263,52 @@ cdef class CArray:
         return CArrayView(holder, holder._addr(), self)
 
 
+cdef class CBufferView:
+    cdef object _base
+    cdef ArrowBufferView* _ptr
+    cdef Py_ssize_t _shape
+    cdef Py_ssize_t _strides
+
+    def __init__(self, object base, uintptr_t addr):
+        self._base = base
+        self._ptr = <ArrowBufferView*>addr
+        self._shape = self._ptr.size_bytes
+        self._strides = 1
+
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        buffer.buf = self._ptr.data.data
+        buffer.format = NULL
+        buffer.internal = NULL
+        buffer.itemsize = 1
+        buffer.len = self._ptr.size_bytes
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 1
+        buffer.shape = &self._shape
+        buffer.strides = &self._strides
+        buffer.suboffsets = NULL
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
+cdef class CArrayViewBuffers:
+    cdef CArrayView _array_view
+    cdef int64_t _length
+
+    def __init__(self, CArrayView array_view):
+        self._array_view = array_view
+        self._length = array_view._array._ptr.n_buffers
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+        cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
+        return CBufferView(self._array_view, <uintptr_t>buffer_view)
+
 cdef class CArrayView:
     cdef object _base
     cdef ArrowArrayView* _ptr
@@ -276,6 +323,10 @@ cdef class CArrayView:
     def children(self):
         return CArrayViewChildren(self)
 
+    @property
+    def buffers(self):
+        return CArrayViewBuffers(self)
+
     @property
     def array(self):
         return self._array
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 2275c3ab7..437b78120 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -117,6 +117,12 @@ def test_array():
     assert(view.value_int(1) == 2)
     assert(view.value_int(2) == 3)
 
+    data_buffer = memoryview(view.buffers[1])
+    assert(len(data_buffer) == 12)
+    data_buffer_copy = bytes(data_buffer)
+    # (needs updating if testing on big endian)
+    assert(data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00')
+
 def test_array_recursive():
     pa_array = pa.array([1, 2, 3], pa.int32())
     pa_batch = pa.record_batch([pa_array], names=["some_column"])

From 18f1a20273959092a46c6bece1144a9dc20472a6 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Thu, 9 Mar 2023 11:28:14 -0400
Subject: [PATCH 15/52] maybe fix errors

---
 python/src/nanoarrow/_lib.pyx  | 4 ++--
 python/tests/test_nanoarrow.py | 7 ++++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 03474a086..0dd022e95 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -147,7 +147,7 @@ cdef class CSchema:
     def is_valid(self):
         return self._ptr.release != NULL
 
-    cdef void _assert_valid(self):
+    def _assert_valid(self):
         if self._ptr.release == NULL:
             raise RuntimeError("schema is released")
 
@@ -235,7 +235,7 @@ cdef class CArray:
     def is_valid(self):
         return self._ptr.release != NULL
 
-    cdef void _assert_valid(self):
+    def _assert_valid(self):
         if self._ptr.release == NULL:
             raise RuntimeError("Array is released")
 
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 437b78120..7dafa5387 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -45,7 +45,8 @@ def test_as_numpy_array():
     with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
         na.as_numpy_array(arr)
 
-def test_schema_basic():# Blank invalid schema
+def test_schema_basic():
+    # Blank invalid schema
     schema = na.CSchema.Empty()
     assert(schema.is_valid() is False)
     assert(repr(schema) == "[invalid: schema is released]")
@@ -55,7 +56,7 @@ def test_schema_basic():# Blank invalid schema
 
     assert(schema.format == "+s")
     assert(schema.flags == 0)
-    assert(len(schema.children), 1)
+    assert(len(schema.children) == 1)
     assert(schema.children[0].format == "i")
     assert(schema.children[0].name == "some_name")
     assert(repr(schema.children[0]) == "int32")
@@ -65,7 +66,7 @@ def test_schema_basic():# Blank invalid schema
 
 def test_schema_parse():
     schema = na.CSchema.Empty()
-    with pytest.raises(ValueError):
+    with pytest.raises(RuntimeError):
         schema.parse()
 
     pa.schema([pa.field("col1", pa.int32())])._export_to_c(schema._addr())

From 0cc14b8d8343a1127de292e47ff0e2aa1aa088bf Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 09:54:31 -0400
Subject: [PATCH 16/52] don't run bootstrap when installing from sdist

---
 python/MANIFEST.in | 18 ++++++++++++++++++
 python/setup.py    |  7 +++++--
 2 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 python/MANIFEST.in

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
new file mode 100644
index 000000000..61380d9a2
--- /dev/null
+++ b/python/MANIFEST.in
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+exclude bootstrap.py
diff --git a/python/setup.py b/python/setup.py
index fdf9eaba7..0acdb0e5c 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -24,9 +24,12 @@
 import numpy as np
 
 # Run bootstrap.py to run cmake generating a fresh bundle based on this
-# checkout or copy from ../dist if the caller doesn't have cmake available
+# checkout or copy from ../dist if the caller doesn't have cmake available.
+# Note that bootstrap.py won't exist if building from sdist.
 this_dir = os.path.dirname(__file__)
-subprocess.run([sys.executable, os.path.join(this_dir, 'bootstrap.py')])
+bootstrap_py = os.path.join(this_dir, 'bootstrap.py')
+if os.path.exists(bootstrap_py):
+    subprocess.run([sys.executable, bootstrap_py])
 
 setup(
     ext_modules=[

From 9e72b962044c40339086888bda258576b24a5950 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 09:57:55 -0400
Subject: [PATCH 17/52] make sure we can install from sdist

---
 python/MANIFEST.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 61380d9a2..9fc293725 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -16,3 +16,6 @@
 # under the License.
 
 exclude bootstrap.py
+include src/nanoarrow/nanoarrow.c
+include src/nanoarrow/nanoarrow.h
+include src/nanoarrow/nanoarrow_c.pxd

From 2cd5599f629c9a0188fdd89976bd51c7c7374c80 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 10:00:46 -0400
Subject: [PATCH 18/52] remove redundant cdefs

---
 python/bootstrap.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 978314419..3ed7b10e2 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -67,8 +67,6 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
                 output.write(func_def.encode('UTF-8'))
                 output.write(b'\n')
 
-            output.write(b'\n')
-
     def _define_regexes(self):
         self.re_comment = re.compile(r'\s*//[^\n]*')
         self.re_type = re.compile(r'(?P<type>struct|union|enum) (?P<name>Arrow[^ ]+) {(?P<body>[^}]*)}')
@@ -98,7 +96,7 @@ def _type_to_cython(self, t, indent=''):
             items = [item for item in self.re_struct_delim.split(body) if item]
 
         cython_body = f'\n{indent}    '.join([''] + items)
-        return f'{indent}cdef {type} {name}:{cython_body}'
+        return f'{indent}{type} {name}:{cython_body}'
 
     def _func_def_to_cython(self, d, indent=''):
         return_type = d['return_type'].strip()
@@ -112,7 +110,7 @@ def _func_def_to_cython(self, d, indent=''):
         if args == 'void':
             args = ''
 
-        return f'{indent}cdef {return_type} {name}({args})'
+        return f'{indent}{return_type} {name}({args})'
 
     def _pxd_header(self):
         return """

From 0144afe74977050e4580702f548c46a30efe081e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 10:23:00 -0400
Subject: [PATCH 19/52] fix asserts

---
 python/tests/test_nanoarrow.py | 86 ++++++++++++++++++----------------
 1 file changed, 45 insertions(+), 41 deletions(-)

diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 7dafa5387..0714b8e8b 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -15,15 +15,15 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import sys
 import numpy as np
 import pyarrow as pa
+import pytest
 
 import nanoarrow as na
 
-import pytest
-
 def test_version():
-    assert(na.version() == "0.2.0-SNAPSHOT")
+    assert na.version() == "0.2.0-SNAPSHOT"
 
 def test_as_numpy_array():
 
@@ -48,18 +48,18 @@ def test_as_numpy_array():
 def test_schema_basic():
     # Blank invalid schema
     schema = na.CSchema.Empty()
-    assert(schema.is_valid() is False)
-    assert(repr(schema) == "[invalid: schema is released]")
+    assert schema.is_valid() is False
+    assert repr(schema) == "[invalid: schema is released]"
 
     pa_schema = pa.schema([pa.field("some_name", pa.int32())])
     pa_schema._export_to_c(schema._addr())
 
-    assert(schema.format == "+s")
-    assert(schema.flags == 0)
-    assert(len(schema.children) == 1)
-    assert(schema.children[0].format == "i")
-    assert(schema.children[0].name == "some_name")
-    assert(repr(schema.children[0]) == "int32")
+    assert schema.format == "+s"
+    assert schema.flags == 0
+    assert len(schema.children) == 1
+    assert schema.children[0].format == "i"
+    assert schema.children[0].name == "some_name"
+    assert repr(schema.children[0]) == "int32"
 
     with pytest.raises(IndexError):
         schema.children[1]
@@ -72,57 +72,61 @@ def test_schema_parse():
     pa.schema([pa.field("col1", pa.int32())])._export_to_c(schema._addr())
 
     info = schema.parse()
-    assert(info['type'] == 'struct')
-    assert(info['storage_type'] == 'struct')
-    assert(info['name'] == '')
+    assert info['type'] == 'struct'
+    assert info['storage_type'] == 'struct'
+    assert info['name'] == ''
 
     # Check on the child
     child = schema.children[0]
     child_info = child.parse()
-    assert(child_info['type'] == 'int32')
-    assert(child_info['storage_type'] == 'int32')
-    assert(child_info['name'] == 'col1')
+    assert child_info['type'] == 'int32'
+    assert child_info['storage_type'] == 'int32'
+    assert child_info['name'] == 'col1'
 
 def test_schema_info_params():
     schema = na.CSchema.Empty()
     pa.binary(12)._export_to_c(schema._addr())
-    assert(schema.parse()['fixed_size'] == 12)
+    assert schema.parse()['fixed_size'] == 12
 
     schema = na.CSchema.Empty()
     pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
-    assert(schema.parse()['fixed_size'] == 12)
+    assert schema.parse()['fixed_size'] == 12
 
     schema = na.CSchema.Empty()
     pa.decimal128(10, 3)._export_to_c(schema._addr())
-    assert(schema.parse()['decimal_bitwidth'] == 128)
-    assert(schema.parse()['decimal_precision'] == 10)
-    assert(schema.parse()['decimal_scale'] == 3)
+    assert schema.parse()['decimal_bitwidth'] == 128
+    assert schema.parse()['decimal_precision'] == 10
+    assert schema.parse()['decimal_scale'] == 3
 
 def test_array():
     schema = na.CSchema.Empty()
     pa.int32()._export_to_c(schema._addr())
 
     array = na.CArray.Empty(schema)
-    assert(array.is_valid() is False)
+    assert array.is_valid() is False
 
     pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr())
-    assert(array.is_valid() is True)
+    assert array.is_valid() is True
 
     view = array.validate()
 
-    assert(view.array is array)
-    assert(view.schema is schema)
-    assert(len(view) == 3)
+    assert view.array is array
+    assert view.schema is schema
+    assert len(view) == 3
 
-    assert(view.value_int(0) == 1)
-    assert(view.value_int(1) == 2)
-    assert(view.value_int(2) == 3)
+    assert view.value_int(0) == 1
+    assert view.value_int(1) == 2
+    assert view.value_int(2) == 3
 
     data_buffer = memoryview(view.buffers[1])
-    assert(len(data_buffer) == 12)
+    assert len(data_buffer) == 12
     data_buffer_copy = bytes(data_buffer)
     # (needs updating if testing on big endian)
-    assert(data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00')
+
+    if sys.byteorder == 'little':
+        assert data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'
+    else:
+        assert data_buffer_copy == b'\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'
 
 def test_array_recursive():
     pa_array = pa.array([1, 2, 3], pa.int32())
@@ -130,26 +134,26 @@ def test_array_recursive():
 
     schema = na.CSchema.Empty()
     pa_batch.schema._export_to_c(schema._addr())
-    assert(len(schema.children) == 1)
+    assert len(schema.children) == 1
     with pytest.raises(IndexError):
         schema.children[1]
 
     array = na.CArray.Empty(schema)
-    assert(array.is_valid() is False)
+    assert array.is_valid() is False
 
     pa_batch._export_to_c(array._addr())
-    assert(array.is_valid() is True)
-    assert(len(array.children) == 1)
+    assert array.is_valid() is True
+    assert len(array.children) == 1
     with pytest.raises(IndexError):
         array.children[1]
 
     view = array.validate()
-    assert(len(view.children) == 1)
+    assert len(view.children) == 1
     with pytest.raises(IndexError):
        view.children[1]
 
     child = view.children[0]
-    assert(len(child) == 3)
-    assert(child.value_int(0) == 1)
-    assert(child.value_int(1) == 2)
-    assert(child.value_int(2) == 3)
+    assert len(child) == 3
+    assert child.value_int(0) == 1
+    assert child.value_int(1) == 2
+    assert child.value_int(2) == 3

From b768724ebe82446523d27d096fa2f087e25027a2 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 10:26:35 -0400
Subject: [PATCH 20/52] maybe safer version check

---
 python/tests/test_nanoarrow.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 0714b8e8b..d1434974c 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import sys
+import re
 import numpy as np
 import pyarrow as pa
 import pytest
@@ -23,7 +24,8 @@
 import nanoarrow as na
 
 def test_version():
-    assert na.version() == "0.2.0-SNAPSHOT"
+    re_version = re.compile(r'^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$')
+    assert re_version.match(na.version()) is not None
 
 def test_as_numpy_array():
 

From 846c8376b4b70e36a948e56db2c6727bfe399e36 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 10:38:07 -0400
Subject: [PATCH 21/52] drop C class prefix, drop numpy dependency, pythonize
 quotes

---
 python/pyproject.toml            |   4 +-
 python/setup.py                  |  11 ++-
 python/src/nanoarrow/__init__.py |   8 +-
 python/src/nanoarrow/_lib.pyx    | 141 +++++++++----------------------
 python/tests/test_nanoarrow.py   |  39 ++-------
 5 files changed, 57 insertions(+), 146 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 1cc2c17ec..52b7d5bc5 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -23,7 +23,6 @@ description = ""
 authors = [{name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}]
 license = {text = "Apache-2.0"}
 requires-python = ">=3.8"
-dependencies = ["numpy"]
 
 [project.optional-dependencies]
 test = ["pyarrow", "pytest"]
@@ -36,7 +35,6 @@ repository = "https://github.com/apache/arrow-nanoarrow"
 requires = [
     "setuptools >= 61.0.0",
     "setuptools-scm",
-    "Cython",
-    "oldest-supported-numpy",
+    "Cython"
 ]
 build-backend = "setuptools.build_meta"
diff --git a/python/setup.py b/python/setup.py
index 0acdb0e5c..8b4b61c42 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -21,7 +21,6 @@
 import sys
 import subprocess
 from setuptools import Extension, setup
-import numpy as np
 
 # Run bootstrap.py to run cmake generating a fresh bundle based on this
 # checkout or copy from ../dist if the caller doesn't have cmake available.
@@ -34,12 +33,12 @@
 setup(
     ext_modules=[
         Extension(
-            name="nanoarrow._lib",
-            include_dirs=[np.get_include(), "src/nanoarrow"],
-            language="c",
+            name='nanoarrow._lib',
+            include_dirs=['src/nanoarrow'],
+            language='c',
             sources=[
-                "src/nanoarrow/_lib.pyx",
-                "src/nanoarrow/nanoarrow.c",
+                'src/nanoarrow/_lib.pyx',
+                'src/nanoarrow/nanoarrow.c',
             ],
         )
     ]
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index e9c74a974..18847ccad 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -16,10 +16,8 @@
 # under the License.
 
 from ._lib import (  # noqa: F401
-    as_numpy_array,
     version,
-    CSchemaHolder,
-    CSchema,
-    CArray,
-    CArrayView
+    Schema,
+    Array,
+    ArrayView
 )
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 0dd022e95..a281fa522 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -19,78 +19,15 @@
 
 """Low-level nanoarrow Python bindings."""
 
-from libc.stdint cimport uint8_t, uintptr_t, int64_t
+from libc.stdint cimport uintptr_t, int64_t
 from cpython.mem cimport PyMem_Malloc, PyMem_Free
 from cpython cimport Py_buffer
 from nanoarrow_c cimport *
 
-import numpy as np
-cimport numpy as cnp
-
-cnp.import_array()
-
-
-cdef dict _numpy_type_map = {
-    NANOARROW_TYPE_UINT8: cnp.NPY_UINT8,
-    NANOARROW_TYPE_INT8: cnp.NPY_INT8,
-    NANOARROW_TYPE_UINT16: cnp.NPY_UINT16,
-    NANOARROW_TYPE_INT16: cnp.NPY_INT16,
-    NANOARROW_TYPE_UINT32: cnp.NPY_UINT32,
-    NANOARROW_TYPE_INT32: cnp.NPY_INT32,
-    NANOARROW_TYPE_UINT64: cnp.NPY_UINT64,
-    NANOARROW_TYPE_INT64: cnp.NPY_INT64,
-    NANOARROW_TYPE_HALF_FLOAT: cnp.NPY_FLOAT16,
-    NANOARROW_TYPE_FLOAT: cnp.NPY_FLOAT32,
-    NANOARROW_TYPE_DOUBLE: cnp.NPY_FLOAT64,
-}
-
-
-def as_numpy_array(arr):
-    cdef ArrowSchema schema
-    cdef ArrowArray array
-    cdef ArrowArrayView array_view
-    cdef ArrowError error
-
-    arr._export_to_c(<uintptr_t> &array, <uintptr_t> &schema)
-    ArrowArrayViewInitFromSchema(&array_view, &schema, &error)
-
-    # primitive arrays have DATA as the second buffer
-    if array_view.layout.buffer_type[1] != NANOARROW_BUFFER_TYPE_DATA:
-        raise TypeError("Cannot convert a non-primitive array")
-
-    # disallow nulls for this method
-    if array.null_count > 0:
-        raise ValueError("Cannot convert array with nulls")
-    elif array.null_count < 0:
-        # not yet computed
-        if array_view.layout.buffer_type[0] == NANOARROW_BUFFER_TYPE_VALIDITY:
-            if array.buffers[0] != NULL:
-                null_count = ArrowBitCountSet(
-                    <const uint8_t *>array.buffers[0], array.offset, array.length
-                )
-                if null_count > 0:
-                    raise ValueError("Cannot convert array with nulls")
-
-    cdef int type_num
-    if array_view.storage_type in _numpy_type_map:
-        type_num = _numpy_type_map[array_view.storage_type]
-    else:
-        raise NotImplementedError(array_view.storage_type)
-
-    cdef cnp.npy_intp dims[1]
-    dims[0] = array.length
-    cdef cnp.ndarray result = cnp.PyArray_New(
-        np.ndarray, 1, dims, type_num, NULL, <void *> array.buffers[1], -1, 0, <object>NULL
-    )
-    # TODO set base
-
-    return result
-
-
 def version():
     return ArrowNanoarrowVersion().decode("UTF-8")
 
-cdef class CSchemaHolder:
+cdef class SchemaHolder:
     cdef ArrowSchema c_schema
 
     def __init__(self):
@@ -103,7 +40,7 @@ cdef class CSchemaHolder:
     def _addr(self):
         return <uintptr_t>&self.c_schema
 
-cdef class CArrayHolder:
+cdef class ArrayHolder:
     cdef ArrowArray c_array
 
     def __init__(self):
@@ -116,7 +53,7 @@ cdef class CArrayHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array
 
-cdef class CArrayViewHolder:
+cdef class ArrayViewHolder:
     cdef ArrowArrayView c_array_view
 
     def __init__(self):
@@ -128,14 +65,14 @@ cdef class CArrayViewHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array_view
 
-cdef class CSchema:
+cdef class Schema:
     cdef object _base
     cdef ArrowSchema* _ptr
 
     @staticmethod
     def Empty():
-        base = CSchemaHolder()
-        return CSchema(base, base._addr())
+        base = SchemaHolder()
+        return Schema(base, base._addr())
 
     def __init__(self, object base, uintptr_t addr):
         self._base = base,
@@ -184,7 +121,7 @@ cdef class CSchema:
     @property
     def children(self):
         self._assert_valid()
-        return CSchemaChildren(self)
+        return SchemaChildren(self)
 
     def parse(self):
         self._assert_valid()
@@ -214,17 +151,17 @@ cdef class CSchema:
 
         return out
 
-cdef class CArray:
+cdef class Array:
     cdef object _base
     cdef ArrowArray* _ptr
-    cdef CSchema _schema
+    cdef Schema _schema
 
     @staticmethod
-    def Empty(CSchema schema):
-        base = CArrayHolder()
-        return CArray(base, base._addr(), schema)
+    def Empty(Schema schema):
+        base = ArrayHolder()
+        return Array(base, base._addr(), schema)
 
-    def __init__(self, object base, uintptr_t addr, CSchema schema):
+    def __init__(self, object base, uintptr_t addr, Schema schema):
         self._base = base,
         self._ptr = <ArrowArray*>addr
         self._schema = schema
@@ -245,10 +182,10 @@ cdef class CArray:
 
     @property
     def children(self):
-        return CArrayChildren(self)
+        return ArrayChildren(self)
 
     def validate(self):
-        cdef CArrayViewHolder holder = CArrayViewHolder()
+        cdef ArrayViewHolder holder = ArrayViewHolder()
 
         cdef ArrowError error
         cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
@@ -260,10 +197,10 @@ cdef class CArray:
         if result != NANOARROW_OK:
             raise ValueError(ArrowErrorMessage(&error))
 
-        return CArrayView(holder, holder._addr(), self)
+        return ArrayView(holder, holder._addr(), self)
 
 
-cdef class CBufferView:
+cdef class BufferView:
     cdef object _base
     cdef ArrowBufferView* _ptr
     cdef Py_ssize_t _shape
@@ -291,11 +228,11 @@ cdef class CBufferView:
     def __releasebuffer__(self, Py_buffer *buffer):
         pass
 
-cdef class CArrayViewBuffers:
-    cdef CArrayView _array_view
+cdef class ArrayViewBuffers:
+    cdef ArrayView _array_view
     cdef int64_t _length
 
-    def __init__(self, CArrayView array_view):
+    def __init__(self, ArrayView array_view):
         self._array_view = array_view
         self._length = array_view._array._ptr.n_buffers
 
@@ -307,25 +244,25 @@ cdef class CArrayViewBuffers:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
         cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
-        return CBufferView(self._array_view, <uintptr_t>buffer_view)
+        return BufferView(self._array_view, <uintptr_t>buffer_view)
 
-cdef class CArrayView:
+cdef class ArrayView:
     cdef object _base
     cdef ArrowArrayView* _ptr
-    cdef CArray _array
+    cdef Array _array
 
-    def __init__(self, object base, uintptr_t addr, CArray array):
+    def __init__(self, object base, uintptr_t addr, Array array):
         self._base = base,
         self._ptr = <ArrowArrayView*>addr
         self._array = array
 
     @property
     def children(self):
-        return CArrayViewChildren(self)
+        return ArrayViewChildren(self)
 
     @property
     def buffers(self):
-        return CArrayViewBuffers(self)
+        return ArrayViewBuffers(self)
 
     @property
     def array(self):
@@ -343,11 +280,11 @@ cdef class CArrayView:
             raise IndexError()
         return ArrowArrayViewGetIntUnsafe(self._ptr, i)
 
-cdef class CSchemaChildren:
-    cdef CSchema _parent
+cdef class SchemaChildren:
+    cdef Schema _parent
     cdef int64_t _length
 
-    def __init__(self, CSchema parent):
+    def __init__(self, Schema parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -359,18 +296,18 @@ cdef class CSchemaChildren:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
 
-        return CSchema(self._parent, self._child_addr(k))
+        return Schema(self._parent, self._child_addr(k))
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowSchema** children = self._parent._ptr.children
         cdef ArrowSchema* child = children[i]
         return <uintptr_t>child
 
-cdef class CArrayChildren:
-    cdef CArray _parent
+cdef class ArrayChildren:
+    cdef Array _parent
     cdef int64_t _length
 
-    def __init__(self, CArray parent):
+    def __init__(self, Array parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -382,18 +319,18 @@ cdef class CArrayChildren:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
 
-        return CArray(self._parent, self._child_addr(k))
+        return Array(self._parent, self._child_addr(k))
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowArray** children = self._parent._ptr.children
         cdef ArrowArray* child = children[i]
         return <uintptr_t>child
 
-cdef class CArrayViewChildren:
-    cdef CArrayView _parent
+cdef class ArrayViewChildren:
+    cdef ArrayView _parent
     cdef int64_t _length
 
-    def __init__(self, CArrayView parent):
+    def __init__(self, ArrayView parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -405,7 +342,7 @@ cdef class CArrayViewChildren:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
 
-        return CArrayView(self._parent, self._child_addr(k), self._parent._array)
+        return ArrayView(self._parent, self._child_addr(k), self._parent._array)
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowArrayView** children = self._parent._ptr.children
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index d1434974c..37cb273be 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -17,7 +17,6 @@
 
 import sys
 import re
-import numpy as np
 import pyarrow as pa
 import pytest
 
@@ -27,29 +26,9 @@ def test_version():
     re_version = re.compile(r'^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$')
     assert re_version.match(na.version()) is not None
 
-def test_as_numpy_array():
-
-    arr = pa.array([1, 2, 3])
-    result = na.as_numpy_array(arr)
-    expected = arr.to_numpy()
-    np.testing.assert_array_equal(result, expected)
-
-    arr = pa.array([1, 2, 3], pa.uint8())
-    result = na.as_numpy_array(arr)
-    expected = arr.to_numpy()
-    np.testing.assert_array_equal(result, expected)
-
-    arr = pa.array([1, 2, None])
-    with pytest.raises(ValueError, match="Cannot convert array with nulls"):
-        na.as_numpy_array(arr)
-
-    arr = pa.array([[1], [2, 3]])
-    with pytest.raises(TypeError, match="Cannot convert a non-primitive array"):
-        na.as_numpy_array(arr)
-
 def test_schema_basic():
     # Blank invalid schema
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     assert schema.is_valid() is False
     assert repr(schema) == "[invalid: schema is released]"
 
@@ -67,7 +46,7 @@ def test_schema_basic():
         schema.children[1]
 
 def test_schema_parse():
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     with pytest.raises(RuntimeError):
         schema.parse()
 
@@ -86,25 +65,25 @@ def test_schema_parse():
     assert child_info['name'] == 'col1'
 
 def test_schema_info_params():
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     pa.binary(12)._export_to_c(schema._addr())
     assert schema.parse()['fixed_size'] == 12
 
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
     assert schema.parse()['fixed_size'] == 12
 
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     pa.decimal128(10, 3)._export_to_c(schema._addr())
     assert schema.parse()['decimal_bitwidth'] == 128
     assert schema.parse()['decimal_precision'] == 10
     assert schema.parse()['decimal_scale'] == 3
 
 def test_array():
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     pa.int32()._export_to_c(schema._addr())
 
-    array = na.CArray.Empty(schema)
+    array = na.Array.Empty(schema)
     assert array.is_valid() is False
 
     pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr())
@@ -134,13 +113,13 @@ def test_array_recursive():
     pa_array = pa.array([1, 2, 3], pa.int32())
     pa_batch = pa.record_batch([pa_array], names=["some_column"])
 
-    schema = na.CSchema.Empty()
+    schema = na.Schema.Empty()
     pa_batch.schema._export_to_c(schema._addr())
     assert len(schema.children) == 1
     with pytest.raises(IndexError):
         schema.children[1]
 
-    array = na.CArray.Empty(schema)
+    array = na.Array.Empty(schema)
     assert array.is_valid() is False
 
     pa_batch._export_to_c(array._addr())

From ac304af59a6233604fb6404af31022d5ae6a6185 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 12:48:31 -0400
Subject: [PATCH 22/52] more complete schema field wrapping

---
 python/src/nanoarrow/_lib.pyx  | 243 ++++++++++++++++++++++++---------
 python/tests/test_nanoarrow.py |  84 +++++++++---
 2 files changed, 241 insertions(+), 86 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index a281fa522..2db4ce85e 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -21,6 +21,7 @@
 
 from libc.stdint cimport uintptr_t, int64_t
 from cpython.mem cimport PyMem_Malloc, PyMem_Free
+from cpython.bytes cimport PyBytes_FromStringAndSize
 from cpython cimport Py_buffer
 from nanoarrow_c cimport *
 
@@ -118,38 +119,121 @@ cdef class Schema:
     def flags(self):
         return self._ptr.flags
 
+    @property
+    def metadata(self):
+        self._assert_valid()
+        if self._ptr.metadata != NULL:
+            return SchemaMetadata(self, <uintptr_t>self._ptr.metadata)
+        else:
+            return None
+
     @property
     def children(self):
         self._assert_valid()
         return SchemaChildren(self)
 
-    def parse(self):
+    def view(self):
         self._assert_valid()
-
+        schema_view = SchemaView()
         cdef ArrowError error
-        cdef ArrowSchemaView schema_view
-
-        cdef int result = ArrowSchemaViewInit(&schema_view, self._ptr, &error)
+        cdef int result = ArrowSchemaViewInit(&schema_view._schema_view, self._ptr, &error)
         if result != NANOARROW_OK:
             raise ValueError(ArrowErrorMessage(&error))
+        return schema_view
 
-        out = {
-            'name': self._ptr.name.decode('UTF-8') if self._ptr.name else None,
-            'type': ArrowTypeString(schema_view.type).decode('UTF-8'),
-            'storage_type': ArrowTypeString(schema_view.storage_type).decode('UTF-8')
-        }
+cdef class SchemaView:
+    cdef ArrowSchemaView _schema_view
 
-        if schema_view.storage_type in (NANOARROW_TYPE_FIXED_SIZE_LIST,
-                                        NANOARROW_TYPE_FIXED_SIZE_BINARY):
-            out['fixed_size'] = schema_view.fixed_size
+    _fixed_size_types = (
+        NANOARROW_TYPE_FIXED_SIZE_LIST,
+        NANOARROW_TYPE_FIXED_SIZE_BINARY
+    )
 
-        if schema_view.storage_type in (NANOARROW_TYPE_DECIMAL128,
-                                        NANOARROW_TYPE_DECIMAL256):
-            out['decimal_bitwidth'] = schema_view.decimal_bitwidth
-            out['decimal_precision'] = schema_view.decimal_precision
-            out['decimal_scale'] = schema_view.decimal_scale
+    _decimal_types = (
+        NANOARROW_TYPE_DECIMAL128,
+        NANOARROW_TYPE_DECIMAL256
+    )
 
-        return out
+    _time_unit_types = (
+        NANOARROW_TYPE_TIME32,
+        NANOARROW_TYPE_TIME64,
+        NANOARROW_TYPE_DURATION,
+        NANOARROW_TYPE_TIMESTAMP
+    )
+
+    _union_types = (
+        NANOARROW_TYPE_DENSE_UNION,
+        NANOARROW_TYPE_SPARSE_UNION
+    )
+
+    def __init__(self):
+        self._schema_view.type = NANOARROW_TYPE_UNINITIALIZED
+        self._schema_view.storage_type = NANOARROW_TYPE_UNINITIALIZED
+
+    @property
+    def type(self):
+        cdef const char* type_str = ArrowTypeString(self._schema_view.type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def storage_type(self):
+        cdef const char* type_str = ArrowTypeString(self._schema_view.storage_type)
+        if type_str != NULL:
+            return type_str.decode('UTF-8')
+
+    @property
+    def fixed_size(self):
+        if self._schema_view.type in SchemaView._fixed_size_types:
+            return self._schema_view.fixed_size
+
+    @property
+    def decimal_bitwidth(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_bitwidth
+
+    @property
+    def decimal_precision(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_precision
+
+    @property
+    def decimal_scale(self):
+        if self._schema_view.type in SchemaView._decimal_types:
+            return self._schema_view.decimal_scale
+
+    @property
+    def time_unit(self):
+        if self._schema_view.type in SchemaView._time_unit_types:
+            return ArrowTimeUnitString(self._schema_view.time_unit).decode('UTF-8')
+
+    @property
+    def timezone(self):
+        if self._schema_view.type == NANOARROW_TYPE_TIMESTAMP:
+            return self._schema_view.timezone.decode('UTF_8')
+
+    @property
+    def union_type_ids(self):
+        if self._schema_view.type in SchemaView._union_types:
+            type_ids_str = self._schema_view.union_type_ids.decode('UTF-8').split(',')
+            return (int(type_id) for type_id in type_ids_str)
+
+    @property
+    def extension_name(self):
+        if self._schema_view.extension_name.data != NULL:
+            name_bytes = PyBytes_FromStringAndSize(
+                self._schema_view.extension_name.data,
+                self._schema_view.extension_name.size_bytes
+            )
+            return name_bytes.decode('UTF-8')
+
+    @property
+    def extension_metadata(self):
+        if self._schema_view.extension_name.data != NULL:
+            return PyBytes_FromStringAndSize(
+                self._schema_view.extension_metadata.data,
+                self._schema_view.extension_metadata.size_bytes
+            )
 
 cdef class Array:
     cdef object _base
@@ -199,53 +283,6 @@ cdef class Array:
 
         return ArrayView(holder, holder._addr(), self)
 
-
-cdef class BufferView:
-    cdef object _base
-    cdef ArrowBufferView* _ptr
-    cdef Py_ssize_t _shape
-    cdef Py_ssize_t _strides
-
-    def __init__(self, object base, uintptr_t addr):
-        self._base = base
-        self._ptr = <ArrowBufferView*>addr
-        self._shape = self._ptr.size_bytes
-        self._strides = 1
-
-    def __getbuffer__(self, Py_buffer *buffer, int flags):
-        buffer.buf = self._ptr.data.data
-        buffer.format = NULL
-        buffer.internal = NULL
-        buffer.itemsize = 1
-        buffer.len = self._ptr.size_bytes
-        buffer.ndim = 1
-        buffer.obj = self
-        buffer.readonly = 1
-        buffer.shape = &self._shape
-        buffer.strides = &self._strides
-        buffer.suboffsets = NULL
-
-    def __releasebuffer__(self, Py_buffer *buffer):
-        pass
-
-cdef class ArrayViewBuffers:
-    cdef ArrayView _array_view
-    cdef int64_t _length
-
-    def __init__(self, ArrayView array_view):
-        self._array_view = array_view
-        self._length = array_view._array._ptr.n_buffers
-
-    def __len__(self):
-        return self._length
-
-    def __getitem__(self, k):
-        k = int(k)
-        if k < 0 or k >= self._length:
-            raise IndexError(f"{k} out of range [0, {self._length})")
-        cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
-        return BufferView(self._array_view, <uintptr_t>buffer_view)
-
 cdef class ArrayView:
     cdef object _base
     cdef ArrowArrayView* _ptr
@@ -303,6 +340,34 @@ cdef class SchemaChildren:
         cdef ArrowSchema* child = children[i]
         return <uintptr_t>child
 
+cdef class SchemaMetadata:
+    cdef object _parent
+    cdef const char* _metadata
+    cdef ArrowMetadataReader _reader
+
+    def __init__(self, object parent, uintptr_t ptr):
+        self._parent = parent
+        self._metadata = <const char*>ptr
+
+    def _init_reader(self):
+        cdef int result = ArrowMetadataReaderInit(&self._reader, self._metadata)
+        if result != NANOARROW_OK:
+            raise ValueError('ArrowMetadataReaderInit() failed')
+
+    def __len__(self):
+        self._init_reader()
+        return self._reader.remaining_keys
+
+    def __iter__(self):
+        cdef ArrowStringView key
+        cdef ArrowStringView value
+        self._init_reader()
+        while self._reader.remaining_keys > 0:
+            ArrowMetadataReaderRead(&self._reader, &key, &value)
+            key_obj = PyBytes_FromStringAndSize(key.data, key.size_bytes).decode('UTF-8')
+            value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
+            yield key_obj, value_obj
+
 cdef class ArrayChildren:
     cdef Array _parent
     cdef int64_t _length
@@ -348,3 +413,49 @@ cdef class ArrayViewChildren:
         cdef ArrowArrayView** children = self._parent._ptr.children
         cdef ArrowArrayView* child = children[i]
         return <uintptr_t>child
+
+cdef class BufferView:
+    cdef object _base
+    cdef ArrowBufferView* _ptr
+    cdef Py_ssize_t _shape
+    cdef Py_ssize_t _strides
+
+    def __init__(self, object base, uintptr_t addr):
+        self._base = base
+        self._ptr = <ArrowBufferView*>addr
+        self._shape = self._ptr.size_bytes
+        self._strides = 1
+
+    def __getbuffer__(self, Py_buffer *buffer, int flags):
+        buffer.buf = self._ptr.data.data
+        buffer.format = NULL
+        buffer.internal = NULL
+        buffer.itemsize = 1
+        buffer.len = self._ptr.size_bytes
+        buffer.ndim = 1
+        buffer.obj = self
+        buffer.readonly = 1
+        buffer.shape = &self._shape
+        buffer.strides = &self._strides
+        buffer.suboffsets = NULL
+
+    def __releasebuffer__(self, Py_buffer *buffer):
+        pass
+
+cdef class ArrayViewBuffers:
+    cdef ArrayView _array_view
+    cdef int64_t _length
+
+    def __init__(self, ArrayView array_view):
+        self._array_view = array_view
+        self._length = array_view._array._ptr.n_buffers
+
+    def __len__(self):
+        return self._length
+
+    def __getitem__(self, k):
+        k = int(k)
+        if k < 0 or k >= self._length:
+            raise IndexError(f"{k} out of range [0, {self._length})")
+        cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
+        return BufferView(self._array_view, <uintptr_t>buffer_view)
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 37cb273be..0c0077ff8 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -37,6 +37,7 @@ def test_schema_basic():
 
     assert schema.format == "+s"
     assert schema.flags == 0
+    assert schema.metadata is None
     assert len(schema.children) == 1
     assert schema.children[0].format == "i"
     assert schema.children[0].name == "some_name"
@@ -45,39 +46,82 @@ def test_schema_basic():
     with pytest.raises(IndexError):
         schema.children[1]
 
-def test_schema_parse():
+def test_schema_metadata():
     schema = na.Schema.Empty()
-    with pytest.raises(RuntimeError):
-        schema.parse()
+    meta = {'key1': 'value1', 'key2': 'value2'}
+    pa.field('', pa.int32(), metadata=meta)._export_to_c(schema._addr())
 
-    pa.schema([pa.field("col1", pa.int32())])._export_to_c(schema._addr())
+    assert len(schema.metadata) == 2
 
-    info = schema.parse()
-    assert info['type'] == 'struct'
-    assert info['storage_type'] == 'struct'
-    assert info['name'] == ''
+    meta2 = {k: v for k, v in schema.metadata}
+    assert list(meta2.keys()) == ['key1', 'key2']
+    assert list(meta2.values()) == [b'value1', b'value2']
 
-    # Check on the child
-    child = schema.children[0]
-    child_info = child.parse()
-    assert child_info['type'] == 'int32'
-    assert child_info['storage_type'] == 'int32'
-    assert child_info['name'] == 'col1'
+def test_schema_view():
+    schema = na.Schema.Empty()
+    with pytest.raises(RuntimeError):
+        schema.view()
 
-def test_schema_info_params():
+    pa.int32()._export_to_c(schema._addr())
+    view = schema.view()
+    assert view.type == 'int32'
+    assert view.storage_type == 'int32'
+
+    assert view.fixed_size is None
+    assert view.decimal_bitwidth is None
+    assert view.decimal_scale is None
+    assert view.time_unit is None
+    assert view.timezone is None
+    assert view.union_type_ids is None
+    assert view.extension_name is None
+    assert view.extension_metadata is None
+
+def test_schema_view_extra_params():
     schema = na.Schema.Empty()
     pa.binary(12)._export_to_c(schema._addr())
-    assert schema.parse()['fixed_size'] == 12
+    view = schema.view()
+    assert view.fixed_size == 12
 
     schema = na.Schema.Empty()
     pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
-    assert schema.parse()['fixed_size'] == 12
+    assert view.fixed_size == 12
 
     schema = na.Schema.Empty()
     pa.decimal128(10, 3)._export_to_c(schema._addr())
-    assert schema.parse()['decimal_bitwidth'] == 128
-    assert schema.parse()['decimal_precision'] == 10
-    assert schema.parse()['decimal_scale'] == 3
+    view = schema.view()
+    assert view.decimal_bitwidth == 128
+    assert view.decimal_precision == 10
+    assert view.decimal_scale == 3
+
+    schema = na.Schema.Empty()
+    pa.decimal256(10, 3)._export_to_c(schema._addr())
+    view = schema.view()
+    assert view.decimal_bitwidth == 256
+    assert view.decimal_precision == 10
+    assert view.decimal_scale == 3
+
+    schema = na.Schema.Empty()
+    pa.duration('us')._export_to_c(schema._addr())
+    view = schema.view()
+    assert view.time_unit == 'us'
+
+    schema = na.Schema.Empty()
+    pa.timestamp('us', tz='America/Halifax')._export_to_c(schema._addr())
+    view = schema.view()
+    assert view.type == 'timestamp'
+    assert view.storage_type == 'int64'
+    assert view.time_unit == 'us'
+    assert view.timezone == 'America/Halifax'
+
+    schema = na.Schema.Empty()
+    meta = {
+        'ARROW:extension:name': 'some_name',
+        'ARROW:extension:metadata': 'some_metadata'
+    }
+    pa.field('', pa.int32(), metadata=meta)._export_to_c(schema._addr())
+    view = schema.view()
+    assert view.extension_name == 'some_name'
+    assert view.extension_metadata == b'some_metadata'
 
 def test_array():
     schema = na.Schema.Empty()

From 05d4a812dc72309f72a4b07003a2bcaf4a9f4063 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 13:42:45 -0400
Subject: [PATCH 23/52] test a few more array view things

---
 python/src/nanoarrow/_lib.pyx  | 50 +++++++++++++++++------
 python/tests/test_nanoarrow.py | 74 ++++++++++++++++++++--------------
 2 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 2db4ce85e..1aa414b01 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -132,6 +132,14 @@ cdef class Schema:
         self._assert_valid()
         return SchemaChildren(self)
 
+    @property
+    def dictionary(self):
+        self._assert_valid()
+        if self._ptr.dictionary != NULL:
+            return Schema(self, <uintptr_t>self._ptr.dictionary)
+        else:
+            return None
+
     def view(self):
         self._assert_valid()
         schema_view = SchemaView()
@@ -264,11 +272,37 @@ cdef class Array:
     def schema(self):
         return self._schema
 
+    @property
+    def length(self):
+        self._assert_valid()
+        return self._ptr.length
+
+    @property
+    def offset(self):
+        self._assert_valid()
+        return self._ptr.offset
+
+    @property
+    def null_count(self):
+        return self._ptr.null_count
+
+    @property
+    def buffers(self):
+        return tuple(<uintptr_t>self._ptr.buffers[i] for i in range(self._ptr.n_buffers))
+
     @property
     def children(self):
         return ArrayChildren(self)
 
-    def validate(self):
+    @property
+    def dictionary(self):
+        self._assert_valid()
+        if self._ptr.dictionary != NULL:
+            return Array(self, <uintptr_t>self._ptr.dictionary, self._schema.dictionary)
+        else:
+            return None
+
+    def view(self):
         cdef ArrayViewHolder holder = ArrayViewHolder()
 
         cdef ArrowError error
@@ -309,14 +343,6 @@ cdef class ArrayView:
     def schema(self):
         return self._array._schema
 
-    def __len__(self):
-        return self._ptr.array.length
-
-    def value_int(self, int64_t i):
-        if i < 0 or i >= self._ptr.array.length:
-            raise IndexError()
-        return ArrowArrayViewGetIntUnsafe(self._ptr, i)
-
 cdef class SchemaChildren:
     cdef Schema _parent
     cdef int64_t _length
@@ -383,8 +409,7 @@ cdef class ArrayChildren:
         k = int(k)
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
-
-        return Array(self._parent, self._child_addr(k))
+        return Array(self._parent, self._child_addr(k), self._parent.schema.children[k])
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowArray** children = self._parent._ptr.children
@@ -406,8 +431,7 @@ cdef class ArrayViewChildren:
         k = int(k)
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
-
-        return ArrayView(self._parent, self._child_addr(k), self._parent._array)
+        return ArrayView(self._parent, self._child_addr(k), self._parent._array.children[k])
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowArrayView** children = self._parent._ptr.children
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 0c0077ff8..af5b24588 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -27,7 +27,6 @@ def test_version():
     assert re_version.match(na.version()) is not None
 
 def test_schema_basic():
-    # Blank invalid schema
     schema = na.Schema.Empty()
     assert schema.is_valid() is False
     assert repr(schema) == "[invalid: schema is released]"
@@ -42,10 +41,17 @@ def test_schema_basic():
     assert schema.children[0].format == "i"
     assert schema.children[0].name == "some_name"
     assert repr(schema.children[0]) == "int32"
+    assert schema.dictionary is None
 
     with pytest.raises(IndexError):
         schema.children[1]
 
+def test_schema_dictionary():
+    schema = na.Schema.Empty()
+    pa.dictionary(pa.int32(), pa.utf8())._export_to_c(schema._addr())
+    assert schema.format == 'i'
+    assert schema.dictionary.format == 'u'
+
 def test_schema_metadata():
     schema = na.Schema.Empty()
     meta = {'key1': 'value1', 'key2': 'value2'}
@@ -132,53 +138,59 @@ def test_array():
 
     pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr())
     assert array.is_valid() is True
+    assert array.length == 3
+    assert array.offset == 0
+    assert array.null_count == 0
+    assert len(array.buffers) == 2
+    assert array.buffers[0] == 0
+    assert len(array.children) == 0
+    assert array.dictionary is None
 
-    view = array.validate()
+    with pytest.raises(IndexError):
+        array.children[1]
 
-    assert view.array is array
-    assert view.schema is schema
-    assert len(view) == 3
+def test_array_view():
+    array = na.Array.Empty(na.Schema.Empty())
+    pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr(), array.schema._addr())
+    view = array.view()
 
-    assert view.value_int(0) == 1
-    assert view.value_int(1) == 2
-    assert view.value_int(2) == 3
+    assert view.array is array
+    assert view.schema is array.schema
 
     data_buffer = memoryview(view.buffers[1])
     assert len(data_buffer) == 12
     data_buffer_copy = bytes(data_buffer)
-    # (needs updating if testing on big endian)
 
     if sys.byteorder == 'little':
         assert data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'
     else:
         assert data_buffer_copy == b'\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'
 
-def test_array_recursive():
-    pa_array = pa.array([1, 2, 3], pa.int32())
-    pa_batch = pa.record_batch([pa_array], names=["some_column"])
-
-    schema = na.Schema.Empty()
-    pa_batch.schema._export_to_c(schema._addr())
-    assert len(schema.children) == 1
     with pytest.raises(IndexError):
-        schema.children[1]
+        view.children[1]
 
-    array = na.Array.Empty(schema)
-    assert array.is_valid() is False
+def test_array_view_recursive():
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
 
-    pa_batch._export_to_c(array._addr())
-    assert array.is_valid() is True
+    array = na.Array.Empty(na.Schema.Empty())
+    pa_array._export_to_c(array._addr(), array.schema._addr())
+
+    assert array.schema.format == '+s'
+    assert array.length == 3
     assert len(array.children) == 1
-    with pytest.raises(IndexError):
-        array.children[1]
 
-    view = array.validate()
+    assert array.children[0].schema.format == 'i'
+    assert array.children[0].length == 3
+    assert array.children[0].schema._addr() == array.schema.children[0]._addr()
+
+    view = array.view()
+    assert len(view.buffers) == 1
     assert len(view.children) == 1
-    with pytest.raises(IndexError):
-       view.children[1]
+    assert view.array._addr() == array._addr()
+    assert view.schema._addr() == array.schema._addr()
 
-    child = view.children[0]
-    assert len(child) == 3
-    assert child.value_int(0) == 1
-    assert child.value_int(1) == 2
-    assert child.value_int(2) == 3
+    assert len(view.children[0].buffers) == 2
+    assert view.children[0].array._addr() == array.children[0]._addr()
+    assert view.children[0].schema._addr() == array.schema.children[0]._addr()
+    assert view.children[0].schema._addr() == array.children[0].schema._addr()

From bbf18df4425edb9df511699984e858d03bcdadcb Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 14:06:48 -0400
Subject: [PATCH 24/52] maybe install on Windows, update install instructions

---
 python/README.md    | 28 ++++++++++++++++++++--------
 python/bootstrap.py |  6 +++++-
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/python/README.md b/python/README.md
index 701896bb5..04d05898b 100644
--- a/python/README.md
+++ b/python/README.md
@@ -19,26 +19,38 @@
 
 # nanoarrow for Python
 
-Python bindings for nanoarrow.
+Python bindings for nanoarrow. These are in a preliminary state: see open issues
+and tests/test_nanoarrow.py for usage.
+
+## Installation
+
+Python bindings for nanoarrow are not yet available on PyPI. You can install via
+URL (requires a C compiler):
+
+```bash
+python -m pip install "https://github.com/apache/arrow-nanoarrow/archive/refs/heads/main.zip#egg=nanoarrow&subdirectory=python"
+```
+
 ## Building
 
-Python libraries are managed with [setuptools][setuptools]. In general, that
-means all projects can be built as follows:
+Python bindings for nanoarrow are managed with setuptools[setuptools]. This means you
+can build the project using:
 
 ```shell
-$ cd python
-$ pip install -e .
+git clone https://github.com/apache/arrow-nanoarrow.git
+cd python
+pip install -e .
 ```
 
 Tests use [pytest][pytest]:
 
 ```shell
 # Install dependencies
-$ pip install -e .[test]
+pip install -e .[test]
 
 # Run tests
-$ pytest -vvx
+pytest -vvx
 ```
 
 [pytest]: https://docs.pytest.org/
-[setuptools]: https://setuptools.pypa.io/en/latest/index.html
\ No newline at end of file
+[setuptools]: https://setuptools.pypa.io/en/latest/index.html
diff --git a/python/bootstrap.py b/python/bootstrap.py
index 3ed7b10e2..2aeb90735 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -164,7 +164,11 @@ def copy_or_generate_nanoarrow_c():
             os.system(f'cmake --install . --prefix=../src/nanoarrow')
         finally:
             if os.path.exists(build_dir):
-                shutil.rmtree(build_dir)
+                # Can fail on Windows with permission issues
+                try:
+                    shutil.rmtree(build_dir)
+                except Exception as e:
+                    print(f'Failed to remove _cmake temp directory: {str(e)}')
             os.chdir(this_wd)
 
     elif is_in_nanoarrow_repo:

From 085b82a47842f94eb7affe94f8c075c4f912f2ef Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Fri, 10 Mar 2023 14:11:18 -0400
Subject: [PATCH 25/52] Empty -> empty

---
 python/src/nanoarrow/_lib.pyx  |  2 +-
 python/tests/test_nanoarrow.py | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 1aa414b01..b06b26fe7 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -71,7 +71,7 @@ cdef class Schema:
     cdef ArrowSchema* _ptr
 
     @staticmethod
-    def Empty():
+    def empty():
         base = SchemaHolder()
         return Schema(base, base._addr())
 
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index af5b24588..8935d5442 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -27,7 +27,7 @@ def test_version():
     assert re_version.match(na.version()) is not None
 
 def test_schema_basic():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     assert schema.is_valid() is False
     assert repr(schema) == "[invalid: schema is released]"
 
@@ -47,13 +47,13 @@ def test_schema_basic():
         schema.children[1]
 
 def test_schema_dictionary():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.dictionary(pa.int32(), pa.utf8())._export_to_c(schema._addr())
     assert schema.format == 'i'
     assert schema.dictionary.format == 'u'
 
 def test_schema_metadata():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     meta = {'key1': 'value1', 'key2': 'value2'}
     pa.field('', pa.int32(), metadata=meta)._export_to_c(schema._addr())
 
@@ -64,7 +64,7 @@ def test_schema_metadata():
     assert list(meta2.values()) == [b'value1', b'value2']
 
 def test_schema_view():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     with pytest.raises(RuntimeError):
         schema.view()
 
@@ -83,35 +83,35 @@ def test_schema_view():
     assert view.extension_metadata is None
 
 def test_schema_view_extra_params():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.binary(12)._export_to_c(schema._addr())
     view = schema.view()
     assert view.fixed_size == 12
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
     assert view.fixed_size == 12
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.decimal128(10, 3)._export_to_c(schema._addr())
     view = schema.view()
     assert view.decimal_bitwidth == 128
     assert view.decimal_precision == 10
     assert view.decimal_scale == 3
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.decimal256(10, 3)._export_to_c(schema._addr())
     view = schema.view()
     assert view.decimal_bitwidth == 256
     assert view.decimal_precision == 10
     assert view.decimal_scale == 3
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.duration('us')._export_to_c(schema._addr())
     view = schema.view()
     assert view.time_unit == 'us'
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.timestamp('us', tz='America/Halifax')._export_to_c(schema._addr())
     view = schema.view()
     assert view.type == 'timestamp'
@@ -119,7 +119,7 @@ def test_schema_view_extra_params():
     assert view.time_unit == 'us'
     assert view.timezone == 'America/Halifax'
 
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     meta = {
         'ARROW:extension:name': 'some_name',
         'ARROW:extension:metadata': 'some_metadata'
@@ -130,7 +130,7 @@ def test_schema_view_extra_params():
     assert view.extension_metadata == b'some_metadata'
 
 def test_array():
-    schema = na.Schema.Empty()
+    schema = na.Schema.empty()
     pa.int32()._export_to_c(schema._addr())
 
     array = na.Array.Empty(schema)
@@ -150,7 +150,7 @@ def test_array():
         array.children[1]
 
 def test_array_view():
-    array = na.Array.Empty(na.Schema.Empty())
+    array = na.Array.Empty(na.Schema.empty())
     pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr(), array.schema._addr())
     view = array.view()
 
@@ -173,7 +173,7 @@ def test_array_view_recursive():
     pa_array_child = pa.array([1, 2, 3], pa.int32())
     pa_array = pa.record_batch([pa_array_child], names=["some_column"])
 
-    array = na.Array.Empty(na.Schema.Empty())
+    array = na.Array.Empty(na.Schema.empty())
     pa_array._export_to_c(array._addr(), array.schema._addr())
 
     assert array.schema.format == '+s'

From a4d0490c09ae9a54cd7f86f2b9bf31ea63faa51d Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 16:32:03 -0300
Subject: [PATCH 26/52] nogil

---
 python/bootstrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 2aeb90735..8cdc83dcd 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -51,7 +51,7 @@ def generate_nanoarrow_pxd(self, file_in, file_out):
         with open(file_out, 'wb') as output:
             output.write(header.encode('UTF-8'))
 
-            output.write(f'\ncdef extern from "{file_in_name}":\n'.encode("UTF-8"))
+            output.write(f'\ncdef extern from "{file_in_name}" nogil:\n'.encode("UTF-8"))
 
             # A few things we add in manually
             output.write(b'\n')

From 55dcdb41111e749e245156c9c2e1b30a9cbefd5c Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 16:51:17 -0300
Subject: [PATCH 27/52] add buffer data types to properly set format

---
 python/src/nanoarrow/_lib.pyx | 46 ++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index b06b26fe7..20fddc6a5 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -34,7 +34,7 @@ cdef class SchemaHolder:
     def __init__(self):
         self.c_schema.release = NULL
 
-    def __del__(self):
+    def __dealloc__(self):
         if self.c_schema.release != NULL:
           self.c_schema.release(&self.c_schema)
 
@@ -47,7 +47,7 @@ cdef class ArrayHolder:
     def __init__(self):
         self.c_array.release = NULL
 
-    def __del__(self):
+    def __dealloc__(self):
         if self.c_array.release != NULL:
           self.c_array.release(&self.c_array)
 
@@ -60,7 +60,7 @@ cdef class ArrayViewHolder:
     def __init__(self):
         ArrowArrayViewInitFromType(&self.c_array_view, NANOARROW_TYPE_UNINITIALIZED)
 
-    def __del__(self):
+    def __dealloc__(self):
         ArrowArrayViewReset(&self.c_array_view)
 
     def _addr(self):
@@ -441,18 +441,47 @@ cdef class ArrayViewChildren:
 cdef class BufferView:
     cdef object _base
     cdef ArrowBufferView* _ptr
+    cdef ArrowBufferType _buffer_type
+    cdef ArrowType _buffer_data_type
     cdef Py_ssize_t _shape
     cdef Py_ssize_t _strides
 
-    def __init__(self, object base, uintptr_t addr):
+    def __init__(self, object base, uintptr_t addr,
+                 ArrowBufferType buffer_type, ArrowType buffer_data_type):
         self._base = base
         self._ptr = <ArrowBufferView*>addr
+        self._buffer_type = buffer_type
+        self._buffer_data_type = buffer_data_type
         self._shape = self._ptr.size_bytes
         self._strides = 1
 
+    cdef const char* _get_format(self):
+        if self._buffer_data_type == NANOARROW_TYPE_INT8:
+            return "h"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT8:
+            return "B"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT16:
+            return "h"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT16:
+            return "H"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT32:
+            return "i"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT32:
+            return "I"
+        elif self._buffer_data_type == NANOARROW_TYPE_INT64:
+            return "l"
+        elif self._buffer_data_type == NANOARROW_TYPE_UINT64:
+            return "L"
+        elif self._buffer_data_type == NANOARROW_TYPE_FLOAT:
+            return "f"
+        elif self._buffer_data_type == NANOARROW_TYPE_DOUBLE:
+            return "B"
+        else:
+            return "z"
+
     def __getbuffer__(self, Py_buffer *buffer, int flags):
         buffer.buf = self._ptr.data.data
-        buffer.format = NULL
+        buffer.format = self._get_format()
         buffer.internal = NULL
         buffer.itemsize = 1
         buffer.len = self._ptr.size_bytes
@@ -482,4 +511,9 @@ cdef class ArrayViewBuffers:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
         cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
-        return BufferView(self._array_view, <uintptr_t>buffer_view)
+        return BufferView(
+            self._array_view,
+            <uintptr_t>buffer_view,
+            self._array_view._ptr.layout.buffer_type[k],
+            self._array_view._ptr.layout.buffer_data_type[k]
+        )

From 984ea76592ca3bbe8f5e51a8fafc8a6baedca549 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 16:53:46 -0300
Subject: [PATCH 28/52] add dictionary member

---
 python/src/nanoarrow/_lib.pyx | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 20fddc6a5..3d18650f3 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -335,6 +335,10 @@ cdef class ArrayView:
     def buffers(self):
         return ArrayViewBuffers(self)
 
+    @property
+    def dictionary(self):
+        return ArrayView(self, <uintptr_t>self._ptr.dictionary, self._array.dictionary)
+
     @property
     def array(self):
         return self._array

From fc0a7dcc6b1371f056a10f28a8173f3890f91e10 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 23:04:22 -0300
Subject: [PATCH 29/52] more buffer info

---
 python/src/nanoarrow/_lib.pyx  | 31 ++++++++++++++++++++++++-------
 python/tests/test_nanoarrow.py | 15 ++++++++++++++-
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 3d18650f3..06909db73 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -447,17 +447,31 @@ cdef class BufferView:
     cdef ArrowBufferView* _ptr
     cdef ArrowBufferType _buffer_type
     cdef ArrowType _buffer_data_type
+    cdef Py_ssize_t _element_size_bits
     cdef Py_ssize_t _shape
     cdef Py_ssize_t _strides
 
     def __init__(self, object base, uintptr_t addr,
-                 ArrowBufferType buffer_type, ArrowType buffer_data_type):
+                 ArrowBufferType buffer_type, ArrowType buffer_data_type,
+                 Py_ssize_t element_size_bits):
         self._base = base
         self._ptr = <ArrowBufferView*>addr
         self._buffer_type = buffer_type
         self._buffer_data_type = buffer_data_type
-        self._shape = self._ptr.size_bytes
-        self._strides = 1
+        self._element_size_bits = element_size_bits
+        self._strides = self._item_size()
+        self._shape = self._ptr.size_bytes // self._strides
+
+
+    cdef Py_ssize_t _item_size(self):
+        if self._buffer_data_type == NANOARROW_TYPE_BOOL:
+            return 1
+        elif self._buffer_data_type == NANOARROW_TYPE_STRING:
+            return 1
+        elif self._buffer_data_type == NANOARROW_TYPE_BINARY:
+            return 1
+        else:
+            return self._element_size_bits // 8
 
     cdef const char* _get_format(self):
         if self._buffer_data_type == NANOARROW_TYPE_INT8:
@@ -479,15 +493,17 @@ cdef class BufferView:
         elif self._buffer_data_type == NANOARROW_TYPE_FLOAT:
             return "f"
         elif self._buffer_data_type == NANOARROW_TYPE_DOUBLE:
-            return "B"
+            return "d"
+        elif self._buffer_data_type == NANOARROW_TYPE_STRING:
+            return "c"
         else:
-            return "z"
+            return "B"
 
     def __getbuffer__(self, Py_buffer *buffer, int flags):
         buffer.buf = self._ptr.data.data
         buffer.format = self._get_format()
         buffer.internal = NULL
-        buffer.itemsize = 1
+        buffer.itemsize = self._strides
         buffer.len = self._ptr.size_bytes
         buffer.ndim = 1
         buffer.obj = self
@@ -519,5 +535,6 @@ cdef class ArrayViewBuffers:
             self._array_view,
             <uintptr_t>buffer_view,
             self._array_view._ptr.layout.buffer_type[k],
-            self._array_view._ptr.layout.buffer_data_type[k]
+            self._array_view._ptr.layout.buffer_data_type[k],
+            self._array_view._ptr.layout.element_size_bits[k]
         )
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 8935d5442..b436b6e85 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -158,8 +158,8 @@ def test_array_view():
     assert view.schema is array.schema
 
     data_buffer = memoryview(view.buffers[1])
-    assert len(data_buffer) == 12
     data_buffer_copy = bytes(data_buffer)
+    assert len(data_buffer_copy) == 12
 
     if sys.byteorder == 'little':
         assert data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'
@@ -194,3 +194,16 @@ def test_array_view_recursive():
     assert view.children[0].array._addr() == array.children[0]._addr()
     assert view.children[0].schema._addr() == array.schema.children[0]._addr()
     assert view.children[0].schema._addr() == array.children[0].schema._addr()
+
+def test_array_view_dictionary():
+    pa_array = pa.array(["a", "b", "b"], pa.dictionary(pa.int32(), pa.utf8()))
+
+    array = na.Array.Empty(na.Schema.empty())
+    pa_array._export_to_c(array._addr(), array.schema._addr())
+
+    assert array.schema.format == 'i'
+    assert array.dictionary.schema.format == 'u'
+
+    view = array.view()
+    assert len(view.buffers) == 2
+    assert len(view.dictionary.buffers) == 3

From 33dcbe7e6e31d47615e1c395a94d978744380d4e Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 23:40:41 -0300
Subject: [PATCH 30/52] test buffer access with numpy

---
 python/pyproject.toml          |  2 +-
 python/src/nanoarrow/_lib.pyx  |  5 ++-
 python/tests/test_nanoarrow.py | 58 ++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+), 2 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 52b7d5bc5..3e4ee75a8 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -25,7 +25,7 @@ license = {text = "Apache-2.0"}
 requires-python = ">=3.8"
 
 [project.optional-dependencies]
-test = ["pyarrow", "pytest"]
+test = ["pyarrow", "pytest", "numpy"]
 
 [project.urls]
 homepage = "https://arrow.apache.org"
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 06909db73..7fc86780a 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -475,7 +475,7 @@ cdef class BufferView:
 
     cdef const char* _get_format(self):
         if self._buffer_data_type == NANOARROW_TYPE_INT8:
-            return "h"
+            return "b"
         elif self._buffer_data_type == NANOARROW_TYPE_UINT8:
             return "B"
         elif self._buffer_data_type == NANOARROW_TYPE_INT16:
@@ -531,6 +531,9 @@ cdef class ArrayViewBuffers:
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
         cdef ArrowBufferView* buffer_view = &(self._array_view._ptr.buffer_views[k])
+        if buffer_view.data.data == NULL:
+            return None
+
         return BufferView(
             self._array_view,
             <uintptr_t>buffer_view,
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index b436b6e85..68b809d35 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -17,6 +17,7 @@
 
 import sys
 import re
+import numpy as np
 import pyarrow as pa
 import pytest
 
@@ -207,3 +208,60 @@ def test_array_view_dictionary():
     view = array.view()
     assert len(view.buffers) == 2
     assert len(view.dictionary.buffers) == 3
+
+def test_buffers_data():
+    data_types = [
+        (pa.uint8(), np.uint8()),
+        (pa.int8(), np.int8()),
+        (pa.uint16(), np.uint16()),
+        (pa.int16(), np.int16()),
+        (pa.uint32(), np.uint32()),
+        (pa.int32(), np.int32()),
+        (pa.uint64(), np.uint64()),
+        (pa.int64(), np.int64()),
+        (pa.float32(), np.float32()),
+        (pa.float64(), np.float64())
+    ]
+
+    for pa_type, np_type in data_types:
+        pa_array = pa.array([0, 1, 2], pa_type)
+        array = na.Array.Empty(na.Schema.empty())
+        pa_array._export_to_c(array._addr(), array.schema._addr())
+        view = array.view()
+
+        np.testing.assert_array_equal(
+            np.array(view.buffers[1]),
+            np.array([0, 1, 2], np_type)
+        )
+
+def test_buffers_string():
+    pa_array = pa.array(["a", "bc", "def"])
+    array = na.Array.Empty(na.Schema.empty())
+    pa_array._export_to_c(array._addr(), array.schema._addr())
+    view = array.view()
+
+    assert view.buffers[0] is None
+    np.testing.assert_array_equal(
+        np.array(view.buffers[1]),
+        np.array([0, 1, 3, 6], np.int32())
+    )
+    np.testing.assert_array_equal(
+        np.array(view.buffers[2]),
+        np.array(list("abcdef"), dtype='|S1')
+    )
+
+def test_buffers_binary():
+    pa_array = pa.array([b"a", b"bc", b"def"])
+    array = na.Array.Empty(na.Schema.empty())
+    pa_array._export_to_c(array._addr(), array.schema._addr())
+    view = array.view()
+
+    assert view.buffers[0] is None
+    np.testing.assert_array_equal(
+        np.array(view.buffers[1]),
+        np.array([0, 1, 3, 6], np.int32())
+    )
+    np.testing.assert_array_equal(
+        np.array(view.buffers[2]),
+        np.array(list(b"abcdef"))
+    )

From 280193b35454a492291535ad8ddc6b706b9f06e3 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Fri, 9 Jun 2023 23:46:01 -0300
Subject: [PATCH 31/52] format with black

---
 python/src/nanoarrow/__init__.py |  7 +--
 python/tests/test_nanoarrow.py   | 88 +++++++++++++++++---------------
 2 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 18847ccad..9a96e58e8 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -15,9 +15,4 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import (  # noqa: F401
-    version,
-    Schema,
-    Array,
-    ArrayView
-)
+from ._lib import version, Schema, Array, ArrayView
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 68b809d35..ca31ebbfc 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -23,10 +23,12 @@
 
 import nanoarrow as na
 
+
 def test_version():
-    re_version = re.compile(r'^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$')
+    re_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$")
     assert re_version.match(na.version()) is not None
 
+
 def test_schema_basic():
     schema = na.Schema.empty()
     assert schema.is_valid() is False
@@ -47,22 +49,25 @@ def test_schema_basic():
     with pytest.raises(IndexError):
         schema.children[1]
 
+
 def test_schema_dictionary():
     schema = na.Schema.empty()
     pa.dictionary(pa.int32(), pa.utf8())._export_to_c(schema._addr())
-    assert schema.format == 'i'
-    assert schema.dictionary.format == 'u'
+    assert schema.format == "i"
+    assert schema.dictionary.format == "u"
+
 
 def test_schema_metadata():
     schema = na.Schema.empty()
-    meta = {'key1': 'value1', 'key2': 'value2'}
-    pa.field('', pa.int32(), metadata=meta)._export_to_c(schema._addr())
+    meta = {"key1": "value1", "key2": "value2"}
+    pa.field("", pa.int32(), metadata=meta)._export_to_c(schema._addr())
 
     assert len(schema.metadata) == 2
 
     meta2 = {k: v for k, v in schema.metadata}
-    assert list(meta2.keys()) == ['key1', 'key2']
-    assert list(meta2.values()) == [b'value1', b'value2']
+    assert list(meta2.keys()) == ["key1", "key2"]
+    assert list(meta2.values()) == [b"value1", b"value2"]
+
 
 def test_schema_view():
     schema = na.Schema.empty()
@@ -71,8 +76,8 @@ def test_schema_view():
 
     pa.int32()._export_to_c(schema._addr())
     view = schema.view()
-    assert view.type == 'int32'
-    assert view.storage_type == 'int32'
+    assert view.type == "int32"
+    assert view.storage_type == "int32"
 
     assert view.fixed_size is None
     assert view.decimal_bitwidth is None
@@ -83,6 +88,7 @@ def test_schema_view():
     assert view.extension_name is None
     assert view.extension_metadata is None
 
+
 def test_schema_view_extra_params():
     schema = na.Schema.empty()
     pa.binary(12)._export_to_c(schema._addr())
@@ -108,27 +114,28 @@ def test_schema_view_extra_params():
     assert view.decimal_scale == 3
 
     schema = na.Schema.empty()
-    pa.duration('us')._export_to_c(schema._addr())
+    pa.duration("us")._export_to_c(schema._addr())
     view = schema.view()
-    assert view.time_unit == 'us'
+    assert view.time_unit == "us"
 
     schema = na.Schema.empty()
-    pa.timestamp('us', tz='America/Halifax')._export_to_c(schema._addr())
+    pa.timestamp("us", tz="America/Halifax")._export_to_c(schema._addr())
     view = schema.view()
-    assert view.type == 'timestamp'
-    assert view.storage_type == 'int64'
-    assert view.time_unit == 'us'
-    assert view.timezone == 'America/Halifax'
+    assert view.type == "timestamp"
+    assert view.storage_type == "int64"
+    assert view.time_unit == "us"
+    assert view.timezone == "America/Halifax"
 
     schema = na.Schema.empty()
     meta = {
-        'ARROW:extension:name': 'some_name',
-        'ARROW:extension:metadata': 'some_metadata'
+        "ARROW:extension:name": "some_name",
+        "ARROW:extension:metadata": "some_metadata",
     }
-    pa.field('', pa.int32(), metadata=meta)._export_to_c(schema._addr())
+    pa.field("", pa.int32(), metadata=meta)._export_to_c(schema._addr())
     view = schema.view()
-    assert view.extension_name == 'some_name'
-    assert view.extension_metadata == b'some_metadata'
+    assert view.extension_name == "some_name"
+    assert view.extension_metadata == b"some_metadata"
+
 
 def test_array():
     schema = na.Schema.empty()
@@ -150,6 +157,7 @@ def test_array():
     with pytest.raises(IndexError):
         array.children[1]
 
+
 def test_array_view():
     array = na.Array.Empty(na.Schema.empty())
     pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr(), array.schema._addr())
@@ -162,14 +170,15 @@ def test_array_view():
     data_buffer_copy = bytes(data_buffer)
     assert len(data_buffer_copy) == 12
 
-    if sys.byteorder == 'little':
-        assert data_buffer_copy == b'\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00'
+    if sys.byteorder == "little":
+        assert data_buffer_copy == b"\x01\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00"
     else:
-        assert data_buffer_copy == b'\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03'
+        assert data_buffer_copy == b"\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x03"
 
     with pytest.raises(IndexError):
         view.children[1]
 
+
 def test_array_view_recursive():
     pa_array_child = pa.array([1, 2, 3], pa.int32())
     pa_array = pa.record_batch([pa_array_child], names=["some_column"])
@@ -177,11 +186,11 @@ def test_array_view_recursive():
     array = na.Array.Empty(na.Schema.empty())
     pa_array._export_to_c(array._addr(), array.schema._addr())
 
-    assert array.schema.format == '+s'
+    assert array.schema.format == "+s"
     assert array.length == 3
     assert len(array.children) == 1
 
-    assert array.children[0].schema.format == 'i'
+    assert array.children[0].schema.format == "i"
     assert array.children[0].length == 3
     assert array.children[0].schema._addr() == array.schema.children[0]._addr()
 
@@ -196,19 +205,21 @@ def test_array_view_recursive():
     assert view.children[0].schema._addr() == array.schema.children[0]._addr()
     assert view.children[0].schema._addr() == array.children[0].schema._addr()
 
+
 def test_array_view_dictionary():
     pa_array = pa.array(["a", "b", "b"], pa.dictionary(pa.int32(), pa.utf8()))
 
     array = na.Array.Empty(na.Schema.empty())
     pa_array._export_to_c(array._addr(), array.schema._addr())
 
-    assert array.schema.format == 'i'
-    assert array.dictionary.schema.format == 'u'
+    assert array.schema.format == "i"
+    assert array.dictionary.schema.format == "u"
 
     view = array.view()
     assert len(view.buffers) == 2
     assert len(view.dictionary.buffers) == 3
 
+
 def test_buffers_data():
     data_types = [
         (pa.uint8(), np.uint8()),
@@ -220,7 +231,7 @@ def test_buffers_data():
         (pa.uint64(), np.uint64()),
         (pa.int64(), np.int64()),
         (pa.float32(), np.float32()),
-        (pa.float64(), np.float64())
+        (pa.float64(), np.float64()),
     ]
 
     for pa_type, np_type in data_types:
@@ -230,10 +241,10 @@ def test_buffers_data():
         view = array.view()
 
         np.testing.assert_array_equal(
-            np.array(view.buffers[1]),
-            np.array([0, 1, 2], np_type)
+            np.array(view.buffers[1]), np.array([0, 1, 2], np_type)
         )
 
+
 def test_buffers_string():
     pa_array = pa.array(["a", "bc", "def"])
     array = na.Array.Empty(na.Schema.empty())
@@ -242,14 +253,13 @@ def test_buffers_string():
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(
-        np.array(view.buffers[1]),
-        np.array([0, 1, 3, 6], np.int32())
+        np.array(view.buffers[1]), np.array([0, 1, 3, 6], np.int32())
     )
     np.testing.assert_array_equal(
-        np.array(view.buffers[2]),
-        np.array(list("abcdef"), dtype='|S1')
+        np.array(view.buffers[2]), np.array(list("abcdef"), dtype="|S1")
     )
 
+
 def test_buffers_binary():
     pa_array = pa.array([b"a", b"bc", b"def"])
     array = na.Array.Empty(na.Schema.empty())
@@ -258,10 +268,6 @@ def test_buffers_binary():
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(
-        np.array(view.buffers[1]),
-        np.array([0, 1, 3, 6], np.int32())
-    )
-    np.testing.assert_array_equal(
-        np.array(view.buffers[2]),
-        np.array(list(b"abcdef"))
+        np.array(view.buffers[1]), np.array([0, 1, 3, 6], np.int32())
     )
+    np.testing.assert_array_equal(np.array(view.buffers[2]), np.array(list(b"abcdef")))

From 97df28ece7a6adcf58fa27b0fba24af7f931f5d2 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Sat, 10 Jun 2023 23:22:34 -0300
Subject: [PATCH 32/52] some helpers

---
 python/pyproject.toml            |  2 +-
 python/src/nanoarrow/__init__.py |  1 +
 python/src/nanoarrow/_lib.pyx    |  2 +-
 python/src/nanoarrow/lib.py      | 31 +++++++++++
 python/tests/test_nanoarrow.py   | 88 +++++++++++++++-----------------
 5 files changed, 74 insertions(+), 50 deletions(-)
 create mode 100644 python/src/nanoarrow/lib.py

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 3e4ee75a8..743cebe0c 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -19,7 +19,7 @@
 [project]
 name = "nanoarrow"
 version = "1.0.0-alpha0"
-description = ""
+description = "Python bindings to the nanoarrow C library"
 authors = [{name = "Apache Arrow Developers", email = "dev@arrow.apache.org"}]
 license = {text = "Apache-2.0"}
 requires-python = ">=3.8"
diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 9a96e58e8..5b648d247 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -16,3 +16,4 @@
 # under the License.
 
 from ._lib import version, Schema, Array, ArrayView
+from .lib import schema, array
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 7fc86780a..18bcf7fbf 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -249,7 +249,7 @@ cdef class Array:
     cdef Schema _schema
 
     @staticmethod
-    def Empty(Schema schema):
+    def empty(Schema schema):
         base = ArrayHolder()
         return Array(base, base._addr(), schema)
 
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
new file mode 100644
index 000000000..e0c8c508d
--- /dev/null
+++ b/python/src/nanoarrow/lib.py
@@ -0,0 +1,31 @@
+from ._lib import Schema, Array
+
+
+def schema(obj):
+    if isinstance(obj, Schema):
+        return obj
+
+    # Not entirely safe but will have to do until there's a dunder method
+    if hasattr(obj, "_export_to_c"):
+        out = Schema.empty()
+        obj._export_to_c(out._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Schema"
+        )
+
+
+def array(obj):
+    if isinstance(obj, Array):
+        return obj
+
+    # Not entirely safe but will have to do until there's a dunder method
+    if hasattr(obj, "_export_to_c"):
+        out = Array.empty(Schema.empty())
+        obj._export_to_c(out._addr(), out.schema._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Array"
+        )
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index ca31ebbfc..e340c7c86 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -29,13 +29,34 @@ def test_version():
     assert re_version.match(na.version()) is not None
 
 
+def test_schema_helper():
+    schema = na.Schema.empty()
+    assert na.schema(schema) is schema
+
+    schema = na.schema(pa.null())
+    assert isinstance(schema, na.Schema)
+
+    with pytest.raises(TypeError):
+        na.schema(None)
+
+
+def test_array_helper():
+    array = na.Array.empty(na.Schema.empty())
+    assert na.array(array) is array
+
+    array = na.array(pa.array([], pa.null()))
+    assert isinstance(array, na.Array)
+
+    with pytest.raises(TypeError):
+        na.schema(None)
+
+
 def test_schema_basic():
     schema = na.Schema.empty()
     assert schema.is_valid() is False
     assert repr(schema) == "[invalid: schema is released]"
 
-    pa_schema = pa.schema([pa.field("some_name", pa.int32())])
-    pa_schema._export_to_c(schema._addr())
+    schema = na.schema(pa.schema([pa.field("some_name", pa.int32())]))
 
     assert schema.format == "+s"
     assert schema.flags == 0
@@ -51,16 +72,14 @@ def test_schema_basic():
 
 
 def test_schema_dictionary():
-    schema = na.Schema.empty()
-    pa.dictionary(pa.int32(), pa.utf8())._export_to_c(schema._addr())
+    schema = na.schema(pa.dictionary(pa.int32(), pa.utf8()))
     assert schema.format == "i"
     assert schema.dictionary.format == "u"
 
 
 def test_schema_metadata():
-    schema = na.Schema.empty()
     meta = {"key1": "value1", "key2": "value2"}
-    pa.field("", pa.int32(), metadata=meta)._export_to_c(schema._addr())
+    schema = na.schema(pa.field("", pa.int32(), metadata=meta))
 
     assert len(schema.metadata) == 2
 
@@ -74,7 +93,7 @@ def test_schema_view():
     with pytest.raises(RuntimeError):
         schema.view()
 
-    pa.int32()._export_to_c(schema._addr())
+    schema = na.schema(pa.int32())
     view = schema.view()
     assert view.type == "int32"
     assert view.storage_type == "int32"
@@ -90,61 +109,48 @@ def test_schema_view():
 
 
 def test_schema_view_extra_params():
-    schema = na.Schema.empty()
-    pa.binary(12)._export_to_c(schema._addr())
+    schema = na.schema(pa.binary(12))
     view = schema.view()
     assert view.fixed_size == 12
 
-    schema = na.Schema.empty()
-    pa.list_(pa.int32(), 12)._export_to_c(schema._addr())
+    schema = na.schema(pa.list_(pa.int32(), 12))
     assert view.fixed_size == 12
 
-    schema = na.Schema.empty()
-    pa.decimal128(10, 3)._export_to_c(schema._addr())
+    schema = na.schema(pa.decimal128(10, 3))
     view = schema.view()
     assert view.decimal_bitwidth == 128
     assert view.decimal_precision == 10
     assert view.decimal_scale == 3
 
-    schema = na.Schema.empty()
-    pa.decimal256(10, 3)._export_to_c(schema._addr())
+    schema = na.schema(pa.decimal256(10, 3))
     view = schema.view()
     assert view.decimal_bitwidth == 256
     assert view.decimal_precision == 10
     assert view.decimal_scale == 3
 
-    schema = na.Schema.empty()
-    pa.duration("us")._export_to_c(schema._addr())
+    schema = na.schema(pa.duration("us"))
     view = schema.view()
     assert view.time_unit == "us"
 
-    schema = na.Schema.empty()
-    pa.timestamp("us", tz="America/Halifax")._export_to_c(schema._addr())
+    schema = na.schema(pa.timestamp("us", tz="America/Halifax"))
     view = schema.view()
     assert view.type == "timestamp"
     assert view.storage_type == "int64"
     assert view.time_unit == "us"
     assert view.timezone == "America/Halifax"
 
-    schema = na.Schema.empty()
     meta = {
         "ARROW:extension:name": "some_name",
         "ARROW:extension:metadata": "some_metadata",
     }
-    pa.field("", pa.int32(), metadata=meta)._export_to_c(schema._addr())
+    schema = na.schema(pa.field("", pa.int32(), metadata=meta))
     view = schema.view()
     assert view.extension_name == "some_name"
     assert view.extension_metadata == b"some_metadata"
 
 
 def test_array():
-    schema = na.Schema.empty()
-    pa.int32()._export_to_c(schema._addr())
-
-    array = na.Array.Empty(schema)
-    assert array.is_valid() is False
-
-    pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr())
+    array = na.array(pa.array([1, 2, 3], pa.int32()))
     assert array.is_valid() is True
     assert array.length == 3
     assert array.offset == 0
@@ -159,8 +165,7 @@ def test_array():
 
 
 def test_array_view():
-    array = na.Array.Empty(na.Schema.empty())
-    pa.array([1, 2, 3], pa.int32())._export_to_c(array._addr(), array.schema._addr())
+    array = na.array(pa.array([1, 2, 3], pa.int32()))
     view = array.view()
 
     assert view.array is array
@@ -183,8 +188,7 @@ def test_array_view_recursive():
     pa_array_child = pa.array([1, 2, 3], pa.int32())
     pa_array = pa.record_batch([pa_array_child], names=["some_column"])
 
-    array = na.Array.Empty(na.Schema.empty())
-    pa_array._export_to_c(array._addr(), array.schema._addr())
+    array = na.array(pa_array)
 
     assert array.schema.format == "+s"
     assert array.length == 3
@@ -208,9 +212,7 @@ def test_array_view_recursive():
 
 def test_array_view_dictionary():
     pa_array = pa.array(["a", "b", "b"], pa.dictionary(pa.int32(), pa.utf8()))
-
-    array = na.Array.Empty(na.Schema.empty())
-    pa_array._export_to_c(array._addr(), array.schema._addr())
+    array = na.array(pa_array)
 
     assert array.schema.format == "i"
     assert array.dictionary.schema.format == "u"
@@ -235,21 +237,14 @@ def test_buffers_data():
     ]
 
     for pa_type, np_type in data_types:
-        pa_array = pa.array([0, 1, 2], pa_type)
-        array = na.Array.Empty(na.Schema.empty())
-        pa_array._export_to_c(array._addr(), array.schema._addr())
-        view = array.view()
-
+        view = na.array(pa.array([0, 1, 2], pa_type)).view()
         np.testing.assert_array_equal(
             np.array(view.buffers[1]), np.array([0, 1, 2], np_type)
         )
 
 
 def test_buffers_string():
-    pa_array = pa.array(["a", "bc", "def"])
-    array = na.Array.Empty(na.Schema.empty())
-    pa_array._export_to_c(array._addr(), array.schema._addr())
-    view = array.view()
+    view = na.array(pa.array(["a", "bc", "def"])).view()
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(
@@ -261,10 +256,7 @@ def test_buffers_string():
 
 
 def test_buffers_binary():
-    pa_array = pa.array([b"a", b"bc", b"def"])
-    array = na.Array.Empty(na.Schema.empty())
-    pa_array._export_to_c(array._addr(), array.schema._addr())
-    view = array.view()
+    view = na.array(pa.array([b"a", b"bc", b"def"])).view()
 
     assert view.buffers[0] is None
     np.testing.assert_array_equal(

From d7fd6e181f7c5c0e672e626af92294912079bace Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Sat, 10 Jun 2023 23:24:44 -0300
Subject: [PATCH 33/52] rename version

---
 python/src/nanoarrow/__init__.py | 2 +-
 python/src/nanoarrow/_lib.pyx    | 2 +-
 python/tests/test_nanoarrow.py   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 5b648d247..4b41e0674 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -15,5 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import version, Schema, Array, ArrayView
+from ._lib import c_version, Schema, Array, ArrayView
 from .lib import schema, array
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 18bcf7fbf..c8873b661 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -25,7 +25,7 @@ from cpython.bytes cimport PyBytes_FromStringAndSize
 from cpython cimport Py_buffer
 from nanoarrow_c cimport *
 
-def version():
+def c_version():
     return ArrowNanoarrowVersion().decode("UTF-8")
 
 cdef class SchemaHolder:
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index e340c7c86..eeb04a7e2 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -24,9 +24,9 @@
 import nanoarrow as na
 
 
-def test_version():
+def test_c_version():
     re_version = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+(-SNAPSHOT)?$")
-    assert re_version.match(na.version()) is not None
+    assert re_version.match(na.c_version()) is not None
 
 
 def test_schema_helper():

From 5c02647f82e465a72f212667670e7b7452280573 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Sun, 11 Jun 2023 15:19:51 -0300
Subject: [PATCH 34/52] start on stream

---
 python/src/nanoarrow/_lib.pyx | 117 +++++++++++++++++++++++++++++++++-
 python/src/nanoarrow/lib.py   |  25 +++++++-
 2 files changed, 138 insertions(+), 4 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index c8873b661..69a9b4e59 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -31,7 +31,7 @@ def c_version():
 cdef class SchemaHolder:
     cdef ArrowSchema c_schema
 
-    def __init__(self):
+    def __cinit__(self):
         self.c_schema.release = NULL
 
     def __dealloc__(self):
@@ -44,7 +44,7 @@ cdef class SchemaHolder:
 cdef class ArrayHolder:
     cdef ArrowArray c_array
 
-    def __init__(self):
+    def __cinit__(self):
         self.c_array.release = NULL
 
     def __dealloc__(self):
@@ -54,6 +54,19 @@ cdef class ArrayHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array
 
+cdef class ArrayStreamHolder:
+    cdef ArrowArrayStream c_array_stream
+
+    def __cinit__(self):
+        self.c_array_stream.release = NULL
+
+    def __dealloc__(self):
+        if self.c_array_stream.release != NULL:
+          self.c_array_stream.release(&self.c_array_stream)
+
+    def _addr(self):
+        return <uintptr_t>&self.c_array_stream
+
 cdef class ArrayViewHolder:
     cdef ArrowArrayView c_array_view
 
@@ -66,6 +79,34 @@ cdef class ArrayViewHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array_view
 
+
+class NanoarrowException(RuntimeError):
+
+    def __init__(self, what, code, message):
+        self.what = what
+        self.code = code
+        self.message = message
+
+        if self.message == "":
+            super().__init__(f"{self.what} failed ({self.code})")
+        else:
+            super().__init__(f"{self.what} failed ({self.code}): {self.message}")
+
+
+cdef class Error:
+    cdef ArrowError c_error
+
+    def __cinit__(self):
+        self.c_error.message[0] = 0
+
+    def raise_message(self, what, code):
+        raise Exception(what, code, self.c_error.message.decode("UTF-8"))
+
+    @staticmethod
+    def raise_error(what, code):
+        raise Exception(what, code, "")
+
+
 cdef class Schema:
     cdef object _base
     cdef ArrowSchema* _ptr
@@ -317,6 +358,7 @@ cdef class Array:
 
         return ArrayView(holder, holder._addr(), self)
 
+
 cdef class ArrayView:
     cdef object _base
     cdef ArrowArrayView* _ptr
@@ -541,3 +583,74 @@ cdef class ArrayViewBuffers:
             self._array_view._ptr.layout.buffer_data_type[k],
             self._array_view._ptr.layout.element_size_bits[k]
         )
+
+
+cdef class ArrayStream:
+    cdef object _base
+    cdef ArrowArrayStream* _ptr
+
+    def __init__(self, object base, uintptr_t addr):
+        self._base = base,
+        self._ptr = <ArrowArrayStream*>addr
+        self._cached_schema = None
+
+    def is_valid(self):
+        return self._ptr != NULL and self._ptr.release != NULL
+
+    def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("array stream pointer is NULL")
+        if self._ptr.release == NULL:
+            raise RuntimeError("array stream is released")
+
+    def _get_schema(self, Schema schema):
+        self._assert_valid()
+        cdef int code = self._ptr.get_schema(self._ptr, schema._ptr)
+        cdef const char* message = NULL
+        if code != NANOARROW_OK:
+            message = self._ptr.get_last_error(self._ptr)
+            if message != NULL:
+                raise NanoarrowException(
+                    "ArrowArrayStream::get_schema()",
+                    code,
+                    message.decode("UTF-8")
+                )
+            else:
+                Error.raise_error("ArrowArrayStream::get_schema()", code)
+
+        self._cached_schema = schema
+
+    def get_schema(self):
+        # Update the cached copy of the schema as an independent object
+        if self._cached_schema is not None:
+            del self._cached_schema
+        self._cached_schema = Schema.empty()
+        self._get_schema(self._cached_schema)
+
+        # Return an independent copy
+        out = Schema.empty()
+        self._get_schema(out)
+        return out
+
+    def get_next(self):
+        self._assert_valid()
+
+        if self._cached_schema is None:
+            self._cached_schema = Schema.empty()
+            self._get_schema(self._cached_schema)
+
+        cdef Array array = Array.empty(self._cached_schema)
+        cdef int code = self._ptr.get_next(self._ptr, array._ptr)
+        cdef const char* message = NULL
+        if code != NANOARROW_OK:
+            message = self._ptr.get_last_error(self._ptr)
+            if message != NULL:
+                raise NanoarrowException(
+                    "ArrowArrayStream::get_next()",
+                    code,
+                    message.decode("UTF-8")
+                )
+            else:
+                Error.raise_error("ArrowArrayStream::get_next()", code)
+
+        return array
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index e0c8c508d..549e9d096 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -5,7 +5,9 @@ def schema(obj):
     if isinstance(obj, Schema):
         return obj
 
-    # Not entirely safe but will have to do until there's a dunder method
+    # Not particularly safe because _export_to_c() could be exporting an
+    # array, schema, or array_stream. The ideal
+    # solution here would be something like __arrow_c_schema__()
     if hasattr(obj, "_export_to_c"):
         out = Schema.empty()
         obj._export_to_c(out._addr())
@@ -20,7 +22,9 @@ def array(obj):
     if isinstance(obj, Array):
         return obj
 
-    # Not entirely safe but will have to do until there's a dunder method
+    # Somewhat safe because calling _export_to_c() with two arguments will
+    # not fail with a crash (but will fail with a confusing error). The ideal
+    # solution here would be something like __arrow_c_array__()
     if hasattr(obj, "_export_to_c"):
         out = Array.empty(Schema.empty())
         obj._export_to_c(out._addr(), out.schema._addr())
@@ -29,3 +33,20 @@ def array(obj):
         raise TypeError(
             f"Can't convert object of type {type(obj).__name__} to nanoarrow.Array"
         )
+
+
+def array_stream(obj):
+    if isinstance(obj, Schema):
+        return obj
+
+    # Not particularly safe because _export_to_c() could be exporting an
+    # array, schema, or array_stream. The ideal
+    # solution here would be something like __arrow_c_array_stream__()
+    if hasattr(obj, "_export_to_c"):
+        out = Schema.empty()
+        obj._export_to_c(out._addr())
+        return out
+    else:
+        raise TypeError(
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Schema"
+        )

From ed5e5450b6760c53f0fe374acc1f7097ecf62a18 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 15:39:45 -0300
Subject: [PATCH 35/52] some array stream madness

---
 python/src/nanoarrow/__init__.py |  4 ++--
 python/src/nanoarrow/_lib.pyx    | 22 +++++++++++++++++++---
 python/src/nanoarrow/lib.py      |  4 ++--
 python/tests/test_nanoarrow.py   | 29 +++++++++++++++++++++++++++++
 4 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/python/src/nanoarrow/__init__.py b/python/src/nanoarrow/__init__.py
index 4b41e0674..bb4372642 100644
--- a/python/src/nanoarrow/__init__.py
+++ b/python/src/nanoarrow/__init__.py
@@ -15,5 +15,5 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from ._lib import c_version, Schema, Array, ArrayView
-from .lib import schema, array
+from ._lib import c_version, Schema, Array, ArrayView, ArrayStream
+from .lib import schema, array, array_stream
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 69a9b4e59..7ef9d9321 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -588,12 +588,16 @@ cdef class ArrayViewBuffers:
 cdef class ArrayStream:
     cdef object _base
     cdef ArrowArrayStream* _ptr
+    cdef object _cached_schema
 
     def __init__(self, object base, uintptr_t addr):
         self._base = base,
         self._ptr = <ArrowArrayStream*>addr
         self._cached_schema = None
 
+    def _addr(self):
+        return <uintptr_t>self._ptr
+
     def is_valid(self):
         return self._ptr != NULL and self._ptr.release != NULL
 
@@ -622,8 +626,6 @@ cdef class ArrayStream:
 
     def get_schema(self):
         # Update the cached copy of the schema as an independent object
-        if self._cached_schema is not None:
-            del self._cached_schema
         self._cached_schema = Schema.empty()
         self._get_schema(self._cached_schema)
 
@@ -653,4 +655,18 @@ cdef class ArrayStream:
             else:
                 Error.raise_error("ArrowArrayStream::get_next()", code)
 
-        return array
+        if not array.is_valid():
+            return None
+        else:
+            return array
+
+    def __iter__(self):
+        array = self.get_next()
+        while array is not None:
+            yield array
+            array = self.get_next()
+
+    @staticmethod
+    def empty():
+        base = ArrayStreamHolder()
+        return ArrayStream(base, base._addr())
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index 549e9d096..b0a2a326d 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -1,4 +1,4 @@
-from ._lib import Schema, Array
+from ._lib import Schema, Array, ArrayStream
 
 
 def schema(obj):
@@ -43,7 +43,7 @@ def array_stream(obj):
     # array, schema, or array_stream. The ideal
     # solution here would be something like __arrow_c_array_stream__()
     if hasattr(obj, "_export_to_c"):
-        out = Schema.empty()
+        out = ArrayStream.empty()
         obj._export_to_c(out._addr())
         return out
     else:
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index eeb04a7e2..6f7477dfe 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -263,3 +263,32 @@ def test_buffers_binary():
         np.array(view.buffers[1]), np.array([0, 1, 3, 6], np.int32())
     )
     np.testing.assert_array_equal(np.array(view.buffers[2]), np.array(list(b"abcdef")))
+
+
+def test_array_stream():
+    array_stream = na.ArrayStream.empty()
+    assert array_stream.is_valid() is False
+    with pytest.raises(RuntimeError):
+        array_stream.get_schema()
+    with pytest.raises(RuntimeError):
+        array_stream.get_next()
+
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+    reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+    array_stream = na.array_stream(reader)
+
+    assert array_stream.is_valid() is True
+    array = array_stream.get_next()
+    assert array.schema.children[0].name == "some_column"
+    assert array_stream.get_next() is None
+
+def test_array_stream_iter():
+    pa_array_child = pa.array([1, 2, 3], pa.int32())
+    pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+    reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+    array_stream = na.array_stream(reader)
+
+    arrays = list(array_stream)
+    assert len(arrays) == 1
+    assert arrays[0].schema.children[0].name == "some_column"

From 1321545c689de9cb8200d9a0cb98fc2e261b25d9 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 16:30:48 -0300
Subject: [PATCH 36/52] some tidying

---
 python/.coveragerc            |  20 +++++
 python/setup.py               |  14 ++++
 python/src/nanoarrow/_lib.pyx | 142 ++++++++++++++++++++++++++++++++--
 python/src/nanoarrow/lib.py   |  17 ++++
 4 files changed, 188 insertions(+), 5 deletions(-)
 create mode 100644 python/.coveragerc

diff --git a/python/.coveragerc b/python/.coveragerc
new file mode 100644
index 000000000..1fb6a24ea
--- /dev/null
+++ b/python/.coveragerc
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# .coveragerc to control coverage.py
+[run]
+plugins = Cython.Coverage
diff --git a/python/setup.py b/python/setup.py
index 8b4b61c42..3ede82c86 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -30,6 +30,17 @@
 if os.path.exists(bootstrap_py):
     subprocess.run([sys.executable, bootstrap_py])
 
+
+# Set some extra flags for compiling with coverage support
+if os.getenv('NANOARROW_PYTHON_COVERAGE') == "1":
+    coverage_compile_args = ['--coverage']
+    coverage_link_args = ['--coverage']
+    coverage_define_macros = [("CYTHON_TRACE", 1)]
+else:
+    coverage_compile_args = []
+    coverage_link_args = []
+    coverage_define_macros = []
+
 setup(
     ext_modules=[
         Extension(
@@ -40,6 +51,9 @@
                 'src/nanoarrow/_lib.pyx',
                 'src/nanoarrow/nanoarrow.c',
             ],
+            extra_compile_args = coverage_compile_args,
+            extra_link_args = [] + coverage_link_args,
+            define_macros= [] + coverage_define_macros,
         )
     ]
 )
diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 7ef9d9321..769a2ec61 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -16,8 +16,16 @@
 # under the License.
 
 # cython: language_level = 3
+# cython: linetrace=True
 
-"""Low-level nanoarrow Python bindings."""
+"""Low-level nanoarrow Python bindings
+
+This Cython extension provides low-level Python wrappers around the
+Arrow C Data and Arrow C Stream interface structs. In general, there
+is one wrapper per C struct and pointer validity is managed by keeping
+strong references to Python objects. These wrappers are intended to
+be literal and stay close to the structure definitions.
+"""
 
 from libc.stdint cimport uintptr_t, int64_t
 from cpython.mem cimport PyMem_Malloc, PyMem_Free
@@ -26,9 +34,17 @@ from cpython cimport Py_buffer
 from nanoarrow_c cimport *
 
 def c_version():
+    """Return the nanoarrow C library version string
+    """
     return ArrowNanoarrowVersion().decode("UTF-8")
 
 cdef class SchemaHolder:
+    """Memory holder for an ArrowSchema
+
+    This class is responsible for the lifecycle of the ArrowSchema
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
     cdef ArrowSchema c_schema
 
     def __cinit__(self):
@@ -42,6 +58,12 @@ cdef class SchemaHolder:
         return <uintptr_t>&self.c_schema
 
 cdef class ArrayHolder:
+    """Memory holder for an ArrowArray
+
+    This class is responsible for the lifecycle of the ArrowArray
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
     cdef ArrowArray c_array
 
     def __cinit__(self):
@@ -55,6 +77,12 @@ cdef class ArrayHolder:
         return <uintptr_t>&self.c_array
 
 cdef class ArrayStreamHolder:
+    """Memory holder for an ArrowArrayStream
+
+    This class is responsible for the lifecycle of the ArrowArrayStream
+    whose memory it is responsible. When this object is deleted,
+    a non-NULL release callback is invoked.
+    """
     cdef ArrowArrayStream c_array_stream
 
     def __cinit__(self):
@@ -68,6 +96,12 @@ cdef class ArrayStreamHolder:
         return <uintptr_t>&self.c_array_stream
 
 cdef class ArrayViewHolder:
+    """Memory holder for an ArrowArrayView
+
+    This class is responsible for the lifecycle of the ArrowArrayView
+    whose memory it is responsible. When this object is deleted,
+    ArrowArrayViewReset() is called on the contents.
+    """
     cdef ArrowArrayView c_array_view
 
     def __init__(self):
@@ -81,6 +115,13 @@ cdef class ArrayViewHolder:
 
 
 class NanoarrowException(RuntimeError):
+    """An error resulting from a call to the nanoarrow C library
+
+    Calls to the nanoarrow C library and/or the Arrow C Stream interface
+    callbacks return an errno error code and sometimes a message with extra
+    detail. This exception wraps a RuntimeError to format a suitable message
+    and store the components of the original error.
+    """
 
     def __init__(self, what, code, message):
         self.what = what
@@ -94,20 +135,56 @@ class NanoarrowException(RuntimeError):
 
 
 cdef class Error:
+    """Memory holder for an ArrowError
+
+    ArrowError is the C struct that is optionally passed to nanoarrow functions
+    when a detailed error message might be returned. This class holds a C
+    reference to the object and provides helpers for raising exceptions based
+    on the contained message.
+    """
     cdef ArrowError c_error
 
     def __cinit__(self):
         self.c_error.message[0] = 0
 
     def raise_message(self, what, code):
-        raise Exception(what, code, self.c_error.message.decode("UTF-8"))
+        """Raise a NanoarrowException from this message
+        """
+        raise NanoarrowException(what, code, self.c_error.message.decode("UTF-8"))
 
     @staticmethod
     def raise_error(what, code):
-        raise Exception(what, code, "")
+        """Raise a NanoarrowException without a message
+        """
+        raise NanoarrowException(what, code, "")
 
 
 cdef class Schema:
+    """ArrowSchema wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowSchema as defined in the Arrow C Data interface. These objects
+    are usually created using `nanoarrow.schema()`. This Python wrapper
+    allows access to schema fields but does not automatically deserialize
+    their content: use `.view()` to validate and deserialize the content
+    into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.int32())
+    >>> schema.is_valid()
+    True
+    >>> schema.format
+    'i'
+    >>> schema.name
+    ''
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'int32'
+    """
     cdef object _base
     cdef ArrowSchema* _ptr
 
@@ -124,9 +201,11 @@ cdef class Schema:
         return <uintptr_t>self._ptr
 
     def is_valid(self):
-        return self._ptr.release != NULL
+        return self._ptr != NULL and self._ptr.release != NULL
 
     def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("schema is NULL")
         if self._ptr.release == NULL:
             raise RuntimeError("schema is released")
 
@@ -190,7 +269,30 @@ cdef class Schema:
             raise ValueError(ArrowErrorMessage(&error))
         return schema_view
 
+
 cdef class SchemaView:
+    """ArrowSchemaView wrapper
+
+    The ArrowSchemaView is a nanoarrow C library structure that facilitates
+    access to the deserialized content of an ArrowSchema (e.g., parameter
+    values for parameterized types). This wrapper extends that facility to Python.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> schema = na.schema(pa.decimal128(10, 3))
+    >>> schema_view = schema.view()
+    >>> schema_view.type
+    'decimal128'
+    >>> schema_view.decimal_bitwidth
+    128
+    >>> schema_view.decimal_precision
+    10
+    >>> schema_view.decimal_scale
+    3
+    """
     cdef ArrowSchemaView _schema_view
 
     _fixed_size_types = (
@@ -285,6 +387,34 @@ cdef class SchemaView:
             )
 
 cdef class Array:
+    """ArrowArray wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowArray as defined in the Arrow C Data interface, holding an
+    optional reference to a Schema that can be used to safely deserialize
+    the content. These objects are usually created using `nanoarrow.array()`.
+    This Python wrapper allows access to array fields but does not
+    automatically deserialize their content: use `.view()` to validate and
+    deserialize the content into a more easily inspectable object.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> import nanoarrow as na
+    >>> array = na.array(pa.array(["one", "two", "three", None]))
+    >>> array.length
+    4
+    >>> array.null_count
+    1
+    >>> array_view = array.view()
+    >>> np.array(array_view.buffers[1])
+    array([ 0,  3,  6, 11, 11], dtype=int32)
+    >>> np.array(array_view.buffers[2])
+    array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
+          dtype='|S1')
+    """
     cdef object _base
     cdef ArrowArray* _ptr
     cdef Schema _schema
@@ -303,9 +433,11 @@ cdef class Array:
         return <uintptr_t>self._ptr
 
     def is_valid(self):
-        return self._ptr.release != NULL
+        return self._ptr != NULL and self._ptr.release != NULL
 
     def _assert_valid(self):
+        if self._ptr == NULL:
+            raise RuntimeError("Array is NULL")
         if self._ptr.release == NULL:
             raise RuntimeError("Array is released")
 
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index b0a2a326d..ef8bca692 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -1,3 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
 from ._lib import Schema, Array, ArrayStream
 
 
From c1c2df24ed7a5da73b48c591e9a54993589abed1 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 16:48:40 -0300
Subject: [PATCH 37/52] more documentation

---
 python/src/nanoarrow/_lib.pyx | 73 ++++++++++++++++++++++++++++++++---
 1 file changed, 68 insertions(+), 5 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 769a2ec61..53f1dbdfc 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -409,11 +409,6 @@ cdef class Array:
     >>> array.null_count
     1
     >>> array_view = array.view()
-    >>> np.array(array_view.buffers[1])
-    array([ 0,  3,  6, 11, 11], dtype=int32)
-    >>> np.array(array_view.buffers[2])
-    array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
-          dtype='|S1')
     """
     cdef object _base
     cdef ArrowArray* _ptr
@@ -492,6 +487,25 @@ cdef class Array:
 
 
 cdef class ArrayView:
+    """ArrowArrayView wrapper
+
+    The ArrowArrayView is a nanoarrow C library structure that facilitates
+    access to the deserialized content of an ArrowArray (e.g., buffer types,
+    lengths, and content). This wrapper extends that facility to Python.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import numpy as np
+    >>> import nanoarrow as na
+    >>> array_view = na.array(pa.array(["one", "two", "three", None])).view()
+    >>> np.array(array_view.buffers[1])
+    array([ 0,  3,  6, 11, 11], dtype=int32)
+    >>> np.array(array_view.buffers[2])
+    array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
+          dtype='|S1')
+    """
     cdef object _base
     cdef ArrowArrayView* _ptr
     cdef Array _array
@@ -522,6 +536,8 @@ cdef class ArrayView:
         return self._array._schema
 
 cdef class SchemaChildren:
+    """Wrapper for a lazily-resolved list of Schema children
+    """
     cdef Schema _parent
     cdef int64_t _length
 
@@ -544,7 +560,11 @@ cdef class SchemaChildren:
         cdef ArrowSchema* child = children[i]
         return <uintptr_t>child
 
+
 cdef class SchemaMetadata:
+    """Wrapper for a lazily-parsed Schema.metadata string
+    """
+
     cdef object _parent
     cdef const char* _metadata
     cdef ArrowMetadataReader _reader
@@ -572,7 +592,10 @@ cdef class SchemaMetadata:
             value_obj = PyBytes_FromStringAndSize(value.data, value.size_bytes)
             yield key_obj, value_obj
 
+
 cdef class ArrayChildren:
+    """Wrapper for a lazily-resolved list of Array children
+    """
     cdef Array _parent
     cdef int64_t _length
 
@@ -594,7 +617,10 @@ cdef class ArrayChildren:
         cdef ArrowArray* child = children[i]
         return <uintptr_t>child
 
+
 cdef class ArrayViewChildren:
+    """Wrapper for a lazily-resolved list of ArrayView children
+    """
     cdef ArrayView _parent
     cdef int64_t _length
 
@@ -617,6 +643,13 @@ cdef class ArrayViewChildren:
         return <uintptr_t>child
 
 cdef class BufferView:
+    """Wrapper for Array buffer content
+
+    This object is a Python wrapper around a buffer held by an Array.
+    It implements the Python buffer protocol and is best accessed through
+    another implementor (e.g., `np.array(array_view.buffers[1])`)). Note that
+    this buffer content does not apply any parent offset.
+    """
     cdef object _base
     cdef ArrowBufferView* _ptr
     cdef ArrowBufferType _buffer_type
@@ -689,7 +722,10 @@ cdef class BufferView:
     def __releasebuffer__(self, Py_buffer *buffer):
         pass
 
+
 cdef class ArrayViewBuffers:
+    """A lazily-resolved list of ArrayView buffers
+    """
     cdef ArrayView _array_view
     cdef int64_t _length
 
@@ -718,6 +754,27 @@ cdef class ArrayViewBuffers:
 
 
 cdef class ArrayStream:
+    """ArrowArrayStream wrapper
+
+    This class provides a user-facing interface to access the fields of
+    an ArrowArrayStream as defined in the Arrow C Stream interface.
+    These objects are usually created using `nanoarrow.array_stream()`.
+
+    Examples
+    --------
+
+    >>> import pyarrow as pa
+    >>> import nanoarrow as na
+    >>> pa_column = pa.array([1, 2, 3], pa.int32())
+    >>> pa_batch = pa.record_batch([pa_column], names=["col1"])
+    >>> pa_reader = pa.RecordBatchReader.from_batches(pa_batch.schema, [pa_batch])
+    >>> array_stream = na.array_stream(pa_reader)
+    >>> array_stream.get_schema()
+    struct<col1: int32>
+    >>> array_stream.get_next().length
+    >>> array_stream.get_next() is None
+    True
+    """
     cdef object _base
     cdef ArrowArrayStream* _ptr
     cdef object _cached_schema
@@ -757,6 +814,8 @@ cdef class ArrayStream:
         self._cached_schema = schema
 
     def get_schema(self):
+        """Get the schema associated with this stream
+        """
         # Update the cached copy of the schema as an independent object
         self._cached_schema = Schema.empty()
         self._get_schema(self._cached_schema)
@@ -767,6 +826,10 @@ cdef class ArrayStream:
         return out
 
     def get_next(self):
+        """Get the next Array from this stream
+
+        Returns None when there are no more arrays in this stream.
+        """
         self._assert_valid()
 
         if self._cached_schema is None:

From ee4dbb6a9b3e136082ff3e219429bc622530ef2a Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 16:50:43 -0300
Subject: [PATCH 38/52] empty -> allocate

---
 python/src/nanoarrow/_lib.pyx  | 10 +++++-----
 python/src/nanoarrow/lib.py    |  8 ++++----
 python/tests/test_nanoarrow.py | 11 ++++++-----
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 53f1dbdfc..4f3c1b270 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -189,7 +189,7 @@ cdef class Schema:
     cdef ArrowSchema* _ptr
 
     @staticmethod
-    def empty():
+    def allocate():
         base = SchemaHolder()
         return Schema(base, base._addr())
 
@@ -817,11 +817,11 @@ cdef class ArrayStream:
         """Get the schema associated with this stream
         """
         # Update the cached copy of the schema as an independent object
-        self._cached_schema = Schema.empty()
+        self._cached_schema = Schema.allocate()
         self._get_schema(self._cached_schema)
 
         # Return an independent copy
-        out = Schema.empty()
+        out = Schema.allocate()
         self._get_schema(out)
         return out
 
@@ -833,7 +833,7 @@ cdef class ArrayStream:
         self._assert_valid()
 
         if self._cached_schema is None:
-            self._cached_schema = Schema.empty()
+            self._cached_schema = Schema.allocate()
             self._get_schema(self._cached_schema)
 
         cdef Array array = Array.empty(self._cached_schema)
@@ -862,6 +862,6 @@ cdef class ArrayStream:
             array = self.get_next()
 
     @staticmethod
-    def empty():
+    def allocate():
         base = ArrayStreamHolder()
         return ArrayStream(base, base._addr())
diff --git a/python/src/nanoarrow/lib.py b/python/src/nanoarrow/lib.py
index ef8bca692..8841ade47 100644
--- a/python/src/nanoarrow/lib.py
+++ b/python/src/nanoarrow/lib.py
@@ -26,7 +26,7 @@ def schema(obj):
     # array, schema, or array_stream. The ideal
     # solution here would be something like __arrow_c_schema__()
     if hasattr(obj, "_export_to_c"):
-        out = Schema.empty()
+        out = Schema.allocate()
         obj._export_to_c(out._addr())
         return out
     else:
@@ -43,7 +43,7 @@ def array(obj):
     # not fail with a crash (but will fail with a confusing error). The ideal
     # solution here would be something like __arrow_c_array__()
     if hasattr(obj, "_export_to_c"):
-        out = Array.empty(Schema.empty())
+        out = Array.empty(Schema.allocate())
         obj._export_to_c(out._addr(), out.schema._addr())
         return out
     else:
@@ -60,10 +60,10 @@ def array_stream(obj):
     # array, schema, or array_stream. The ideal
     # solution here would be something like __arrow_c_array_stream__()
     if hasattr(obj, "_export_to_c"):
-        out = ArrayStream.empty()
+        out = ArrayStream.allocate()
         obj._export_to_c(out._addr())
         return out
     else:
         raise TypeError(
-            f"Can't convert object of type {type(obj).__name__} to nanoarrow.Schema"
+            f"Can't convert object of type {type(obj).__name__} to nanoarrow.ArrowArrayStream"
         )
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 6f7477dfe..9e3170caf 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -30,7 +30,7 @@ def test_c_version():
 
 
 def test_schema_helper():
-    schema = na.Schema.empty()
+    schema = na.Schema.allocate()
     assert na.schema(schema) is schema
 
     schema = na.schema(pa.null())
@@ -41,7 +41,7 @@ def test_schema_helper():
 
 
 def test_array_helper():
-    array = na.Array.empty(na.Schema.empty())
+    array = na.Array.empty(na.Schema.allocate())
     assert na.array(array) is array
 
     array = na.array(pa.array([], pa.null()))
@@ -52,7 +52,7 @@ def test_array_helper():
 
 
 def test_schema_basic():
-    schema = na.Schema.empty()
+    schema = na.Schema.allocate()
     assert schema.is_valid() is False
     assert repr(schema) == "[invalid: schema is released]"
 
@@ -89,7 +89,7 @@ def test_schema_metadata():
 
 
 def test_schema_view():
-    schema = na.Schema.empty()
+    schema = na.Schema.allocate()
     with pytest.raises(RuntimeError):
         schema.view()
 
@@ -266,7 +266,7 @@ def test_buffers_binary():
 
 
 def test_array_stream():
-    array_stream = na.ArrayStream.empty()
+    array_stream = na.ArrayStream.allocate()
     assert array_stream.is_valid() is False
     with pytest.raises(RuntimeError):
         array_stream.get_schema()
@@ -283,6 +283,7 @@ def test_array_stream():
     assert array.schema.children[0].name == "some_column"
     assert array_stream.get_next() is None
 
+
 def test_array_stream_iter():
     pa_array_child = pa.array([1, 2, 3], pa.int32())
     pa_array = pa.record_batch([pa_array_child], names=["some_column"])

From 4bb8f860a316b199d17ed93c201e09acc95f3dce Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 16:57:15 -0300
Subject: [PATCH 39/52] in theory use nanoarrowexcpetion

---
 python/src/nanoarrow/_lib.pyx | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/python/src/nanoarrow/_lib.pyx b/python/src/nanoarrow/_lib.pyx
index 4f3c1b270..c1d45821e 100644
--- a/python/src/nanoarrow/_lib.pyx
+++ b/python/src/nanoarrow/_lib.pyx
@@ -263,10 +263,11 @@ cdef class Schema:
     def view(self):
         self._assert_valid()
         schema_view = SchemaView()
-        cdef ArrowError error
-        cdef int result = ArrowSchemaViewInit(&schema_view._schema_view, self._ptr, &error)
+        cdef Error error = Error()
+        cdef int result = ArrowSchemaViewInit(&schema_view._schema_view, self._ptr, &error.c_error)
         if result != NANOARROW_OK:
-            raise ValueError(ArrowErrorMessage(&error))
+            error.raise_message("ArrowSchemaViewInit()", result)
+
         return schema_view
 
 
@@ -473,15 +474,15 @@ cdef class Array:
     def view(self):
         cdef ArrayViewHolder holder = ArrayViewHolder()
 
-        cdef ArrowError error
+        cdef Error error = Error()
         cdef int result = ArrowArrayViewInitFromSchema(&holder.c_array_view,
-                                                       self._schema._ptr, &error)
+                                                       self._schema._ptr, &error.c_error)
         if result != NANOARROW_OK:
-            raise ValueError(ArrowErrorMessage(&error))
+            error.raise_message("ArrowArrayViewInitFromSchema()", result)
 
-        result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr, &error)
+        result = ArrowArrayViewSetArray(&holder.c_array_view, self._ptr, &error.c_error)
         if result != NANOARROW_OK:
-            raise ValueError(ArrowErrorMessage(&error))
+            error.raise_message("ArrowArrayViewSetArray()", result)
 
         return ArrayView(holder, holder._addr(), self)
 
@@ -576,7 +577,7 @@ cdef class SchemaMetadata:
     def _init_reader(self):
         cdef int result = ArrowMetadataReaderInit(&self._reader, self._metadata)
         if result != NANOARROW_OK:
-            raise ValueError('ArrowMetadataReaderInit() failed')
+            Error.raise_error("ArrowMetadataReaderInit()", result)
 
     def __len__(self):
         self._init_reader()

From 1d3d8c1af8a0a1915ce761432caa59abc4a4322a Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 17:06:13 -0300
Subject: [PATCH 40/52] attempt starting coverage

---
 python/.gitignore | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/python/.gitignore b/python/.gitignore
index 8abd5d0de..b3724522b 100644
--- a/python/.gitignore
+++ b/python/.gitignore
@@ -16,10 +16,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-src/nanoarrow/nanoarrow.c
-src/nanoarrow/nanoarrow.h
-src/nanoarrow/nanoarrow_c.pxd
-src/nanoarrow/*.c
+nanoarrow/nanoarrow.c
+nanoarrow/nanoarrow.h
+nanoarrow/nanoarrow_c.pxd
+nanoarrow/*.c
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

From 3d03cc61de98833da9cda6f8a61f2439d3fd4ac3 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 17:06:20 -0300
Subject: [PATCH 41/52] attempt starting coverage again

---
 python/.coverage                       | Bin 0 -> 53248 bytes
 python/MANIFEST.in                     |   6 +++---
 python/bootstrap.py                    |  14 +++++++-------
 python/{src => }/nanoarrow/__init__.py |   0
 python/{src => }/nanoarrow/_lib.pyx    |   0
 python/{src => }/nanoarrow/lib.py      |   0
 python/setup.py                        |  24 ++++++++++++------------
 7 files changed, 22 insertions(+), 22 deletions(-)
 create mode 100644 python/.coverage
 rename python/{src => }/nanoarrow/__init__.py (100%)
 rename python/{src => }/nanoarrow/_lib.pyx (100%)
 rename python/{src => }/nanoarrow/lib.py (100%)

diff --git a/python/.coverage b/python/.coverage
new file mode 100644
index 0000000000000000000000000000000000000000..cb940059e3ace43b858468e85ebfdb74569e3700
GIT binary patch
literal 53248
zcmeI)O^@3|7zc3MNs}g<;6hbJt5w;#z;?ASVL=rF4zSQYR8%V4E~pX`?!=jFOcOh`
zo$l_TLTQmIA@L0mNBA6ExbqPzA+<L+lyWEsp2uV7rQNhWRH~Z&TbsnOXU6mVJTGyw
z$>r?}UaUkb2)i;C?-?fy(=<L2!Z3^y-KuoUmLjd>vnTXuj;&8yEg9<<Z&vNEj7s50
z!~Ujv(O#?kRDFByk4m@n!`#nBS#oIy76?E90uVUb0)y>Ju{J+%-ntpfMn}b=bXBPD
ztDjxk+T7U^JDZ<g*b;i1I8zq1tgVSn5e9prr$W*4I!btc({p9)`MV-+t1NXtQca%d
zXd6v+oNztK&ufm?q+GGur6PKv*OlRcxTX$PlLYC}VznRZ4OEEoc70AE&ZOCwMW|XT
zRKBYsU5hhbbGf{H`@3?ncJicoM<p@I(51h*iD+oUD3tRpD##7OCfB|ZO5bg(XjRBz
zgl<6f+>dp%VbHC|f#`UC5~d${u^0G4?JKt*tLC8rt}<G-5Z-t+RMu4v=&H_{9&1cz
zl=+aHY4S94iz?>_8S5w`ADS{Jb@uz+29bZ;@AmlQP!4_IIGC1mTHZKFgMIFEbI7?I
z#XE62GjcwoI#=KKuJ_gO<gbcR&ebFgKUftU1hs&-<_fhBPMQX_O0^$d@6hWlNA}|&
zdG653cQ)v*^8M?_i?zi?^Un38p)ilr@ZxB=QkrNq!_8Bg4)34dZn&3BHWKbdosI}v
zNtfZ=LZ2LJ&EtKho?*z4JDLd6gjCa)+S<6XR0q-M1dYkImb92iQ3s*1AWPQj`!08t
zh+}Yr8!D8$YCYYnhlQ8jkf`ZwlnS-2#c54v*aV!7;c9s>UntfV7R+0vq#31WD}5+V
zG@<NF9x{w(yShq7{V*Fyo;FBkIYy&o)<@Dk6wX?)P}^LX7EYSZ*+^H84~qF>?bIoA
zpnC+5OME*v5d?1;b%Dnwg6FS(PsVNWWZii)Reb4Kq4v?KX%*)xI2(Llxc#-2RQb+)
zqTi)KpKrN|D)W|6l|NSpYd2(vMw6cOLh7<K2J0C>aKfgf#Ih0e<D~ua)1-Ow+%u}L
zE@1RLv!_(cNpkAmUj$L)(X`6bR`g-ERV=k-#G_EOBpN}`QPS6Q1LZg$(J{S9Bz1Ul
zR{D5UMs&bs?A};P>Xc0`Jk0M<r_1^mt#rH6Z>IAVPs6%Y8vD6(@^SPaLK(#pb;^jk
z=woGOg{FIX#LCKF?~w_Y+LcY+-T9=9mAKbdPZXzTqjNG=vDa0ClO&Z%8rGhSgc~Z_
zNptD)QtoNZfX|k*!S{TXe4Jm(4HnH}ZDqwA^wZ9$#~CN0NxdtxYNt=FiAM6+&QW{0
zNCPgt+8J*rY&XWiPxTRaaMNe7p%t=J>1&<xGGE}E_9KJ-us{F;5P$##AOHafKmY;|
zfB*y_Fna<y(=v;E{cqWK4f`*;!vX;aKmY;|fB*y_009U<00Izzz^f@xv2v&F>|Z?I
zvCZ5<E&V5e_t)QDKU=1#Ec?D;-?#sHH3LMgApijgKmY;|fB*y_009U<00I!m1S+}H
zX7(<CRn9Gx(l-J0=l|P={foVwrNtuzAOHafKmY;|fB*y_009U<00PetxNPN(!Akwh
zh<?1F-c)<)pxO6*`Y|8+HJJK26<v#iUOkN5P{wY%E<^e%|C%rTAX%&T4&pYY8*S)#
z4f<7sD<&0s_JtMdIG*ptjzcBu&;KtO_OJG(XGDhG5P$##AOHafKmY;|fB*y_009UL
z1$Od-74wB%s_W~2>q>rbw)DaZ9lQR=@Ba-?99AFz0SG_<0uX=z1Rwwb2tWV=vnY@=
zb4Hav|2OUb4En<Y0SG_<0uX=z1Rwwb2tWV=5P-l83gmLtlK%exJ;VOjzBhv*A|wbv
z00Izz00bZa0SG_<0uX?}Q4)B~%A2J(EyMWp@89nZe#;fje5v$?VN~?@{~s9kBm2Qo
z0zs@0fB*y_009U<00Izz00bZafms(Q(0c)<W##EDfaLRk<HW2-jQAh`0SG_<0uX=z
z1Rwwb2tWV=M_YhD|Ht+J(QX5X9s&@600bZa0SG_<0uX=z1ZH1AfB*j>|NsBP*^d?x
hLI45~fB*y_009U<00Izz00fSbK+d9{{!f1Y|9^f9w@?58

literal 0
HcmV?d00001

diff --git a/python/MANIFEST.in b/python/MANIFEST.in
index 9fc293725..93ed2fd0a 100644
--- a/python/MANIFEST.in
+++ b/python/MANIFEST.in
@@ -16,6 +16,6 @@
 # under the License.
 
 exclude bootstrap.py
-include src/nanoarrow/nanoarrow.c
-include src/nanoarrow/nanoarrow.h
-include src/nanoarrow/nanoarrow_c.pxd
+include nanoarrow/nanoarrow.c
+include nanoarrow/nanoarrow.h
+include nanoarrow/nanoarrow_c.pxd
diff --git a/python/bootstrap.py b/python/bootstrap.py
index 8cdc83dcd..9a41446c9 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -145,8 +145,8 @@ def copy_or_generate_nanoarrow_c():
     this_dir = os.path.abspath(os.path.dirname(__file__))
     source_dir = os.path.dirname(this_dir)
 
-    maybe_nanoarrow_h = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')
-    maybe_nanoarrow_c = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.c')
+    maybe_nanoarrow_h = os.path.join(this_dir, 'nanoarrow/nanoarrow.h')
+    maybe_nanoarrow_c = os.path.join(this_dir, 'nanoarrow/nanoarrow.c')
     for f in (maybe_nanoarrow_c, maybe_nanoarrow_h):
         if os.path.exists(f):
             os.unlink(f)
@@ -161,7 +161,7 @@ def copy_or_generate_nanoarrow_c():
             os.mkdir(build_dir)
             os.chdir(build_dir)
             os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON')
-            os.system(f'cmake --install . --prefix=../src/nanoarrow')
+            os.system(f'cmake --install . --prefix=../nanoarrow')
         finally:
             if os.path.exists(build_dir):
                 # Can fail on Windows with permission issues
@@ -176,18 +176,18 @@ def copy_or_generate_nanoarrow_c():
     else:
         raise ValueError('Attempt to build source distribution outside the nanoarrow repo')
 
-    if not os.path.exists(os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')):
+    if not os.path.exists(os.path.join(this_dir, 'nanoarrow/nanoarrow.h')):
         raise ValueError('Attempt to vendor nanoarrow.c/h failed')
 
-    maybe_nanoarrow_hpp = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.hpp')
+    maybe_nanoarrow_hpp = os.path.join(this_dir, 'nanoarrow/nanoarrow.hpp')
     if os.path.exists(maybe_nanoarrow_hpp):
         os.unlink(maybe_nanoarrow_hpp)
 
 # Runs the pxd generator with some information about the file name
 def generate_nanoarrow_pxd():
      this_dir = os.path.abspath(os.path.dirname(__file__))
-     maybe_nanoarrow_h = os.path.join(this_dir, 'src/nanoarrow/nanoarrow.h')
-     maybe_nanoarrow_pxd = os.path.join(this_dir, 'src/nanoarrow/nanoarrow_c.pxd')
+     maybe_nanoarrow_h = os.path.join(this_dir, 'nanoarrow/nanoarrow.h')
+     maybe_nanoarrow_pxd = os.path.join(this_dir, 'nanoarrow/nanoarrow_c.pxd')
 
      NanoarrowPxdGenerator().generate_nanoarrow_pxd(
         maybe_nanoarrow_h,
diff --git a/python/src/nanoarrow/__init__.py b/python/nanoarrow/__init__.py
similarity index 100%
rename from python/src/nanoarrow/__init__.py
rename to python/nanoarrow/__init__.py
diff --git a/python/src/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
similarity index 100%
rename from python/src/nanoarrow/_lib.pyx
rename to python/nanoarrow/_lib.pyx
diff --git a/python/src/nanoarrow/lib.py b/python/nanoarrow/lib.py
similarity index 100%
rename from python/src/nanoarrow/lib.py
rename to python/nanoarrow/lib.py
diff --git a/python/setup.py b/python/setup.py
index 3ede82c86..4222cd85d 100644
--- a/python/setup.py
+++ b/python/setup.py
@@ -26,15 +26,15 @@
 # checkout or copy from ../dist if the caller doesn't have cmake available.
 # Note that bootstrap.py won't exist if building from sdist.
 this_dir = os.path.dirname(__file__)
-bootstrap_py = os.path.join(this_dir, 'bootstrap.py')
+bootstrap_py = os.path.join(this_dir, "bootstrap.py")
 if os.path.exists(bootstrap_py):
     subprocess.run([sys.executable, bootstrap_py])
 
 
 # Set some extra flags for compiling with coverage support
-if os.getenv('NANOARROW_PYTHON_COVERAGE') == "1":
-    coverage_compile_args = ['--coverage']
-    coverage_link_args = ['--coverage']
+if os.getenv("NANOARROW_PYTHON_COVERAGE") == "1":
+    coverage_compile_args = ["--coverage"]
+    coverage_link_args = ["--coverage"]
     coverage_define_macros = [("CYTHON_TRACE", 1)]
 else:
     coverage_compile_args = []
@@ -44,16 +44,16 @@
 setup(
     ext_modules=[
         Extension(
-            name='nanoarrow._lib',
-            include_dirs=['src/nanoarrow'],
-            language='c',
+            name="nanoarrow._lib",
+            include_dirs=["nanoarrow"],
+            language="c",
             sources=[
-                'src/nanoarrow/_lib.pyx',
-                'src/nanoarrow/nanoarrow.c',
+                "nanoarrow/_lib.pyx",
+                "nanoarrow/nanoarrow.c",
             ],
-            extra_compile_args = coverage_compile_args,
-            extra_link_args = [] + coverage_link_args,
-            define_macros= [] + coverage_define_macros,
+            extra_compile_args=coverage_compile_args,
+            extra_link_args=coverage_link_args,
+            define_macros=coverage_define_macros,
         )
     ]
 )

From 3413af1eab825e91033e6216ff1d6789243931f3 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 20:37:56 -0300
Subject: [PATCH 42/52] remove coverage file

---
 python/.coverage | Bin 53248 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 python/.coverage

diff --git a/python/.coverage b/python/.coverage
deleted file mode 100644
index cb940059e3ace43b858468e85ebfdb74569e3700..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 53248
zcmeI)O^@3|7zc3MNs}g<;6hbJt5w;#z;?ASVL=rF4zSQYR8%V4E~pX`?!=jFOcOh`
zo$l_TLTQmIA@L0mNBA6ExbqPzA+<L+lyWEsp2uV7rQNhWRH~Z&TbsnOXU6mVJTGyw
z$>r?}UaUkb2)i;C?-?fy(=<L2!Z3^y-KuoUmLjd>vnTXuj;&8yEg9<<Z&vNEj7s50
z!~Ujv(O#?kRDFByk4m@n!`#nBS#oIy76?E90uVUb0)y>Ju{J+%-ntpfMn}b=bXBPD
ztDjxk+T7U^JDZ<g*b;i1I8zq1tgVSn5e9prr$W*4I!btc({p9)`MV-+t1NXtQca%d
zXd6v+oNztK&ufm?q+GGur6PKv*OlRcxTX$PlLYC}VznRZ4OEEoc70AE&ZOCwMW|XT
zRKBYsU5hhbbGf{H`@3?ncJicoM<p@I(51h*iD+oUD3tRpD##7OCfB|ZO5bg(XjRBz
zgl<6f+>dp%VbHC|f#`UC5~d${u^0G4?JKt*tLC8rt}<G-5Z-t+RMu4v=&H_{9&1cz
zl=+aHY4S94iz?>_8S5w`ADS{Jb@uz+29bZ;@AmlQP!4_IIGC1mTHZKFgMIFEbI7?I
z#XE62GjcwoI#=KKuJ_gO<gbcR&ebFgKUftU1hs&-<_fhBPMQX_O0^$d@6hWlNA}|&
zdG653cQ)v*^8M?_i?zi?^Un38p)ilr@ZxB=QkrNq!_8Bg4)34dZn&3BHWKbdosI}v
zNtfZ=LZ2LJ&EtKho?*z4JDLd6gjCa)+S<6XR0q-M1dYkImb92iQ3s*1AWPQj`!08t
zh+}Yr8!D8$YCYYnhlQ8jkf`ZwlnS-2#c54v*aV!7;c9s>UntfV7R+0vq#31WD}5+V
zG@<NF9x{w(yShq7{V*Fyo;FBkIYy&o)<@Dk6wX?)P}^LX7EYSZ*+^H84~qF>?bIoA
zpnC+5OME*v5d?1;b%Dnwg6FS(PsVNWWZii)Reb4Kq4v?KX%*)xI2(Llxc#-2RQb+)
zqTi)KpKrN|D)W|6l|NSpYd2(vMw6cOLh7<K2J0C>aKfgf#Ih0e<D~ua)1-Ow+%u}L
zE@1RLv!_(cNpkAmUj$L)(X`6bR`g-ERV=k-#G_EOBpN}`QPS6Q1LZg$(J{S9Bz1Ul
zR{D5UMs&bs?A};P>Xc0`Jk0M<r_1^mt#rH6Z>IAVPs6%Y8vD6(@^SPaLK(#pb;^jk
z=woGOg{FIX#LCKF?~w_Y+LcY+-T9=9mAKbdPZXzTqjNG=vDa0ClO&Z%8rGhSgc~Z_
zNptD)QtoNZfX|k*!S{TXe4Jm(4HnH}ZDqwA^wZ9$#~CN0NxdtxYNt=FiAM6+&QW{0
zNCPgt+8J*rY&XWiPxTRaaMNe7p%t=J>1&<xGGE}E_9KJ-us{F;5P$##AOHafKmY;|
zfB*y_Fna<y(=v;E{cqWK4f`*;!vX;aKmY;|fB*y_009U<00Izzz^f@xv2v&F>|Z?I
zvCZ5<E&V5e_t)QDKU=1#Ec?D;-?#sHH3LMgApijgKmY;|fB*y_009U<00I!m1S+}H
zX7(<CRn9Gx(l-J0=l|P={foVwrNtuzAOHafKmY;|fB*y_009U<00PetxNPN(!Akwh
zh<?1F-c)<)pxO6*`Y|8+HJJK26<v#iUOkN5P{wY%E<^e%|C%rTAX%&T4&pYY8*S)#
z4f<7sD<&0s_JtMdIG*ptjzcBu&;KtO_OJG(XGDhG5P$##AOHafKmY;|fB*y_009UL
z1$Od-74wB%s_W~2>q>rbw)DaZ9lQR=@Ba-?99AFz0SG_<0uX=z1Rwwb2tWV=vnY@=
zb4Hav|2OUb4En<Y0SG_<0uX=z1Rwwb2tWV=5P-l83gmLtlK%exJ;VOjzBhv*A|wbv
z00Izz00bZa0SG_<0uX?}Q4)B~%A2J(EyMWp@89nZe#;fje5v$?VN~?@{~s9kBm2Qo
z0zs@0fB*y_009U<00Izz00bZafms(Q(0c)<W##EDfaLRk<HW2-jQAh`0SG_<0uX=z
z1Rwwb2tWV=M_YhD|Ht+J(QX5X9s&@600bZa0SG_<0uX=z1ZH1AfB*j>|NsBP*^d?x
hLI45~fB*y_009U<00Izz00fSbK+d9{{!f1Y|9^f9w@?58


From 4f8cef20c9dd5f7fce5cd20742c1f068d0a18c86 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 20:48:31 -0300
Subject: [PATCH 43/52] add coverage to python job

---
 .github/workflows/python.yaml | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 7d9357809..82b0eede9 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -40,7 +40,7 @@ jobs:
 
     steps:
       - uses: actions/checkout@v3
-      
+
       - name: Set up Python ${{ matrix.python-version }}
         uses: actions/setup-python@v4
         with:
@@ -57,3 +57,30 @@ jobs:
       - name: Run tests
         run: |
           pytest python/tests -v -s
+
+      - name: Run doctests
+        if: success() && matrix.python-version == '3.10'
+        run: |
+          # Needs editable install to run --doctest-modules
+          pip install -e python
+          pytest python --doctest-modules
+
+      - name: Coverage
+        if: success() && matrix.python-version == '3.10'
+        run: |
+          pip uninstall --yes nanoarrow
+          pip install pytest-cov Cython
+          pushd python
+
+          # Build with Cython + gcc coverage options
+          NANOARROW_PYTHON_COVERAGE=1 python setup.py build_ext --inplace
+
+          # Run tests + coverage.py (generates .coverage + coverage.xml files)
+          python -m pytest --cov ./nanoarrow
+          python -m coverage xml
+
+      - name: Upload coverage to codecov
+        if: success() && matrix.python-version == '3.10'
+        uses: codecov/codecov-action@v2
+        with:
+          files: 'python/coverage.xml'

From f6be55c731365b8f7f6b8e14e173bb4b041fa399 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 20:56:48 -0300
Subject: [PATCH 44/52] fix + test doctests from Cython

---
 .github/workflows/python.yaml | 5 +++--
 python/nanoarrow/_lib.pyx     | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 82b0eede9..4b599f7ee 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -61,9 +61,10 @@ jobs:
       - name: Run doctests
         if: success() && matrix.python-version == '3.10'
         run: |
-          # Needs editable install to run --doctest-modules
+          # Needs editable install to run --doctest-cython
+          pip install pytest-cython
           pip install -e python
-          pytest python --doctest-modules
+          pytest python --doctest-cython
 
       - name: Coverage
         if: success() && matrix.python-version == '3.10'
diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index c1d45821e..7317a4b5d 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -773,6 +773,7 @@ cdef class ArrayStream:
     >>> array_stream.get_schema()
     struct<col1: int32>
     >>> array_stream.get_next().length
+    3
     >>> array_stream.get_next() is None
     True
     """

From ce3463fee03d1ba46d5fd8e4f1dc5180b31a8630 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Mon, 12 Jun 2023 22:10:06 -0300
Subject: [PATCH 45/52] basic readme

---
 python/README.ipynb            | 392 +++++++++++++++++++++++++++++++++
 python/README.md               | 176 ++++++++++++++-
 python/nanoarrow/_lib.pyx      |   4 +-
 python/nanoarrow/lib.py        |   2 +-
 python/tests/test_nanoarrow.py |   2 +-
 5 files changed, 562 insertions(+), 14 deletions(-)
 create mode 100644 python/README.ipynb

diff --git a/python/README.ipynb b/python/README.ipynb
new file mode 100644
index 000000000..d89d4c4a6
--- /dev/null
+++ b/python/README.ipynb
@@ -0,0 +1,392 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<!---\n",
+    "  Licensed to the Apache Software Foundation (ASF) under one\n",
+    "  or more contributor license agreements.  See the NOTICE file\n",
+    "  distributed with this work for additional information\n",
+    "  regarding copyright ownership.  The ASF licenses this file\n",
+    "  to you under the Apache License, Version 2.0 (the\n",
+    "  \"License\"); you may not use this file except in compliance\n",
+    "  with the License.  You may obtain a copy of the License at\n",
+    "\n",
+    "    http://www.apache.org/licenses/LICENSE-2.0\n",
+    "\n",
+    "  Unless required by applicable law or agreed to in writing,\n",
+    "  software distributed under the License is distributed on an\n",
+    "  \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n",
+    "  KIND, either express or implied.  See the License for the\n",
+    "  specific language governing permissions and limitations\n",
+    "  under the License.\n",
+    "-->\n",
+    "\n",
+    "<!-- Render with jupyter nbconvert --to markdown README.ipynb -->\n",
+    "\n",
+    "# nanoarrow for Python\n",
+    "\n",
+    "The nanoarrow Python package provides bindings to the nanoarrow C library. Like\n",
+    "the nanoarrow C library, it provides tools to facilitate the use of the\n",
+    "[Arrow C Data](https://arrow.apache.org/docs/format/CDataInterface.html) \n",
+    "and [Arrow C Stream](https://arrow.apache.org/docs/format/CStreamInterface.html) \n",
+    "interfaces.\n",
+    "\n",
+    "## Installation\n",
+    "\n",
+    "Python bindings for nanoarrow are not yet available on PyPI. You can install via\n",
+    "URL (requires a C compiler):\n",
+    "\n",
+    "```bash\n",
+    "python -m pip install \"https://github.com/apache/arrow-nanoarrow/archive/refs/heads/main.zip#egg=nanoarrow&subdirectory=python\"\n",
+    "```\n",
+    "\n",
+    "If you can import the namespace, you're good to go!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nanoarrow as na"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example\n",
+    "\n",
+    "The Arrow C Data and Arrow C Stream interfaces are comprised of three structures: the `ArrowSchema` which represents a data type of an array, the `ArrowArray` which represents the values of an array, and an `ArrowArrayStream`, which represents zero or more `ArrowArray`s with a common `ArrowSchema`. All three can be wrapped by Python objects using the nanoarrow Python package.\n",
+    "\n",
+    "### Schemas\n",
+    "\n",
+    "Use `nanoarrow.schema()` to convert a data type-like object to an `ArrowSchema`. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pyarrow as pa\n",
+    "schema = na.schema(pa.decimal128(10, 3))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can extract the fields of a `Schema` object one at a time or parse it into a view to extract deserialized parameters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "d:10,3\n",
+      "10\n",
+      "3\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(schema.format)\n",
+    "print(schema.view().decimal_precision)\n",
+    "print(schema.view().decimal_scale)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `nanoarrow.schema()` helper is currently only implemented for pyarrow objects. If your data type has an `_export_to_c()`-like function, you can get the address of a freshly-allocated `ArrowSchema` as well:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'int32'"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "schema = na.Schema.allocate()\n",
+    "pa.int32()._export_to_c(schema._addr())\n",
+    "schema.view().type"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `Schema` object cleans up after itself: when the object is deleted, the underlying `Schema` is released."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Arrays\n",
+    "\n",
+    "You can use `nanoarrow.array()` to convert an array-like object to a `nanoarrow.Array`, optionally attaching a `Schema` that can be used to interpret its contents. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "array = na.array(pa.array([\"one\", \"two\", \"three\", None]))"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Like the `Schema`, you can inspect an `Array` by extracting fields individually:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "4\n",
+      "1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(array.length)\n",
+    "print(array.null_count)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "...and parse the `Array`/`Schema` combination into a view whose contents is more readily accessible."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[array([7], dtype=uint8),\n",
+       " array([ 0,  3,  6, 11, 11], dtype=int32),\n",
+       " array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],\n",
+       "       dtype='|S1')]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "view = array.view()\n",
+    "[np.array(buffer) for buffer in view.buffers]"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Like the `Schema`, you can allocate an empty one and access its address with `_addr()` to pass to other array-exporting functions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "3"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "array = na.Array.allocate(na.Schema.allocate())\n",
+    "pa.array([1, 2, 3])._export_to_c(array._addr(), array.schema._addr())\n",
+    "array.length"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Array streams\n",
+    "\n",
+    "You can use `nanoarrow.array_stream()` to convert an object representing a sequence of `Array`s with a common `Schema` to a `nanoarrow.ArrayStream`. This is currently only implemented for pyarrow objects."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pa_array_child = pa.array([1, 2, 3], pa.int32())\n",
+    "pa_array = pa.record_batch([pa_array_child], names=[\"some_column\"])\n",
+    "reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])\n",
+    "array_stream = na.array_stream(reader)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can pull the next array from the stream using `.get_next()` or use it like an interator. The `.get_next()` method will return `None` when there are no more arrays in the stream."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "struct<some_column: int32>\n",
+      "3\n",
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(array_stream.get_schema())\n",
+    "\n",
+    "for array in array_stream:\n",
+    "    print(array.length)\n",
+    "\n",
+    "print(array_stream.get_next() is None)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can also get the address of a freshly-allocated stream to pass to a suitable exporting function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "struct<some_column: int32>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "array_stream = na.ArrayStream.allocate()\n",
+    "reader._export_to_c(array_stream._addr())\n",
+    "array_stream.get_schema()"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Development\n",
+    "\n",
+    "Python bindings for nanoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html).\n",
+    "This means you can build the project using:\n",
+    "\n",
+    "```shell\n",
+    "git clone https://github.com/apache/arrow-nanoarrow.git\n",
+    "cd arrow-nanoarrow/python\n",
+    "pip install -e .\n",
+    "```\n",
+    "\n",
+    "Tests use [pytest](https://docs.pytest.org/):\n",
+    "\n",
+    "```shell\n",
+    "# Install dependencies\n",
+    "pip install -e .[test]\n",
+    "\n",
+    "# Run tests\n",
+    "pytest -vvx\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.6"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/python/README.md b/python/README.md
index 04d05898b..db898d24a 100644
--- a/python/README.md
+++ b/python/README.md
@@ -17,10 +17,15 @@
   under the License.
 -->
 
+<!-- Render with jupyter nbconvert --to markdown README.ipynb -->
+
 # nanoarrow for Python
 
-Python bindings for nanoarrow. These are in a preliminary state: see open issues
-and tests/test_nanoarrow.py for usage.
+The nanoarrow Python package provides bindings to the nanoarrow C library. Like
+the nanoarrow C library, it provides tools to facilitate the use of the
+[Arrow C Data](https://arrow.apache.org/docs/format/CDataInterface.html) 
+and [Arrow C Stream](https://arrow.apache.org/docs/format/CStreamInterface.html) 
+interfaces.
 
 ## Installation
 
@@ -31,18 +36,172 @@ URL (requires a C compiler):
 python -m pip install "https://github.com/apache/arrow-nanoarrow/archive/refs/heads/main.zip#egg=nanoarrow&subdirectory=python"
 ```
 
-## Building
+If you can import the namespace, you're good to go!
+
+
+```python
+import nanoarrow as na
+```
+
+## Example
+
+The Arrow C Data and Arrow C Stream interfaces are comprised of three structures: the `ArrowSchema` which represents a data type of an array, the `ArrowArray` which represents the values of an array, and an `ArrowArrayStream`, which represents zero or more `ArrowArray`s with a common `ArrowSchema`. All three can be wrapped by Python objects using the nanoarrow Python package.
+
+### Schemas
+
+Use `nanoarrow.schema()` to convert a data type-like object to an `ArrowSchema`. This is currently only implemented for pyarrow objects.
+
+
+```python
+import pyarrow as pa
+schema = na.schema(pa.decimal128(10, 3))
+```
+
+You can extract the fields of a `Schema` object one at a time or parse it into a view to extract deserialized parameters.
+
+
+```python
+print(schema.format)
+print(schema.view().decimal_precision)
+print(schema.view().decimal_scale)
+```
+
+    d:10,3
+    10
+    3
+
+
+The `nanoarrow.schema()` helper is currently only implemented for pyarrow objects. If your data type has an `_export_to_c()`-like function, you can get the address of a freshly-allocated `ArrowSchema` as well:
+
+
+```python
+schema = na.Schema.allocate()
+pa.int32()._export_to_c(schema._addr())
+schema.view().type
+```
+
+
+
+
+    'int32'
+
+
+
+The `Schema` object cleans up after itself: when the object is deleted, the underlying `Schema` is released.
+
+### Arrays
+
+You can use `nanoarrow.array()` to convert an array-like object to a `nanoarrow.Array`, optionally attaching a `Schema` that can be used to interpret its contents. This is currently only implemented for pyarrow objects.
+
+
+```python
+array = na.array(pa.array(["one", "two", "three", None]))
+```
+
+Like the `Schema`, you can inspect an `Array` by extracting fields individually:
+
+
+```python
+print(array.length)
+print(array.null_count)
+```
+
+    4
+    1
+
+
+...and parse the `Array`/`Schema` combination into a view whose contents is more readily accessible.
+
+
+```python
+import numpy as np
+view = array.view()
+[np.array(buffer) for buffer in view.buffers]
+```
+
+
+
+
+    [array([7], dtype=uint8),
+     array([ 0,  3,  6, 11, 11], dtype=int32),
+     array([b'o', b'n', b'e', b't', b'w', b'o', b't', b'h', b'r', b'e', b'e'],
+           dtype='|S1')]
+
+
+
+Like the `Schema`, you can allocate an empty one and access its address with `_addr()` to pass to other array-exporting functions.
+
+
+```python
+array = na.Array.allocate(na.Schema.allocate())
+pa.array([1, 2, 3])._export_to_c(array._addr(), array.schema._addr())
+array.length
+```
+
+
+
+
+    3
+
+
+
+### Array streams
+
+You can use `nanoarrow.array_stream()` to convert an object representing a sequence of `Array`s with a common `Schema` to a `nanoarrow.ArrayStream`. This is currently only implemented for pyarrow objects.
+
+
+```python
+pa_array_child = pa.array([1, 2, 3], pa.int32())
+pa_array = pa.record_batch([pa_array_child], names=["some_column"])
+reader = pa.RecordBatchReader.from_batches(pa_array.schema, [pa_array])
+array_stream = na.array_stream(reader)
+```
+
+You can pull the next array from the stream using `.get_next()` or use it like an interator. The `.get_next()` method will return `None` when there are no more arrays in the stream.
+
 
-Python bindings for nanoarrow are managed with setuptools[setuptools]. This means you
-can build the project using:
+```python
+print(array_stream.get_schema())
+
+for array in array_stream:
+    print(array.length)
+
+print(array_stream.get_next() is None)
+```
+
+    struct<some_column: int32>
+    3
+    True
+
+
+You can also get the address of a freshly-allocated stream to pass to a suitable exporting function:
+
+
+```python
+array_stream = na.ArrayStream.allocate()
+reader._export_to_c(array_stream._addr())
+array_stream.get_schema()
+```
+
+
+
+
+    struct<some_column: int32>
+
+
+
+## Development
+
+Python bindings for nanoarrow are managed with [setuptools](https://setuptools.pypa.io/en/latest/index.html).
+This means you can build the project using:
 
 ```shell
 git clone https://github.com/apache/arrow-nanoarrow.git
-cd python
+cd arrow-nanoarrow/python
 pip install -e .
 ```
 
-Tests use [pytest][pytest]:
+Tests use [pytest](https://docs.pytest.org/):
 
 ```shell
 # Install dependencies
@@ -51,6 +210,3 @@ pip install -e .[test]
 # Run tests
 pytest -vvx
 ```
-
-[pytest]: https://docs.pytest.org/
-[setuptools]: https://setuptools.pypa.io/en/latest/index.html
diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index 7317a4b5d..0564ffa7b 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -416,7 +416,7 @@ cdef class Array:
     cdef Schema _schema
 
     @staticmethod
-    def empty(Schema schema):
+    def allocate(Schema schema):
         base = ArrayHolder()
         return Array(base, base._addr(), schema)
 
@@ -838,7 +838,7 @@ cdef class ArrayStream:
             self._cached_schema = Schema.allocate()
             self._get_schema(self._cached_schema)
 
-        cdef Array array = Array.empty(self._cached_schema)
+        cdef Array array = Array.allocate(self._cached_schema)
         cdef int code = self._ptr.get_next(self._ptr, array._ptr)
         cdef const char* message = NULL
         if code != NANOARROW_OK:
diff --git a/python/nanoarrow/lib.py b/python/nanoarrow/lib.py
index 8841ade47..a3c27e72f 100644
--- a/python/nanoarrow/lib.py
+++ b/python/nanoarrow/lib.py
@@ -43,7 +43,7 @@ def array(obj):
     # not fail with a crash (but will fail with a confusing error). The ideal
     # solution here would be something like __arrow_c_array__()
     if hasattr(obj, "_export_to_c"):
-        out = Array.empty(Schema.allocate())
+        out = Array.allocate(Schema.allocate())
         obj._export_to_c(out._addr(), out.schema._addr())
         return out
     else:
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 9e3170caf..3f5bea1a4 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -41,7 +41,7 @@ def test_schema_helper():
 
 
 def test_array_helper():
-    array = na.Array.empty(na.Schema.allocate())
+    array = na.Array.allocate(na.Schema.allocate())
     assert na.array(array) is array
 
     array = na.array(pa.array([], pa.null()))

From 78cd7973f28035a0b3fcb12884207b0ecb0d369f Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 14 Jun 2023 11:12:58 -0300
Subject: [PATCH 46/52] Update python/nanoarrow/_lib.pyx

Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
---
 python/nanoarrow/_lib.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index 0564ffa7b..d3e43e4cf 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -42,7 +42,7 @@ cdef class SchemaHolder:
     """Memory holder for an ArrowSchema
 
     This class is responsible for the lifecycle of the ArrowSchema
-    whose memory it is responsible. When this object is deleted,
+    whose memory it is responsible for. When this object is deleted,
     a non-NULL release callback is invoked.
     """
     cdef ArrowSchema c_schema

From dc7d91cb2048c064cbbffd6f435307adc1b5d9e9 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 14 Jun 2023 16:21:21 -0300
Subject: [PATCH 47/52] use the namespace

---
 python/bootstrap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/bootstrap.py b/python/bootstrap.py
index 9a41446c9..39b4fd950 100644
--- a/python/bootstrap.py
+++ b/python/bootstrap.py
@@ -160,7 +160,7 @@ def copy_or_generate_nanoarrow_c():
         try:
             os.mkdir(build_dir)
             os.chdir(build_dir)
-            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON')
+            os.system(f'cmake ../.. -DNANOARROW_BUNDLE=ON -DNANOARROW_NAMESPACE=PythonPkg')
             os.system(f'cmake --install . --prefix=../nanoarrow')
         finally:
             if os.path.exists(build_dir):

From a1150dd75329066d9e0fec21e8d7461b51937054 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 14 Jun 2023 16:24:20 -0300
Subject: [PATCH 48/52] use cinit instead of init

---
 python/nanoarrow/_lib.pyx | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index d3e43e4cf..f42359c1f 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -104,7 +104,7 @@ cdef class ArrayViewHolder:
     """
     cdef ArrowArrayView c_array_view
 
-    def __init__(self):
+    def __cinit__(self):
         ArrowArrayViewInitFromType(&self.c_array_view, NANOARROW_TYPE_UNINITIALIZED)
 
     def __dealloc__(self):
@@ -193,7 +193,7 @@ cdef class Schema:
         base = SchemaHolder()
         return Schema(base, base._addr())
 
-    def __init__(self, object base, uintptr_t addr):
+    def __cinit__(self, object base, uintptr_t addr):
         self._base = base,
         self._ptr = <ArrowSchema*>addr
 
@@ -318,7 +318,7 @@ cdef class SchemaView:
         NANOARROW_TYPE_SPARSE_UNION
     )
 
-    def __init__(self):
+    def __cinit__(self):
         self._schema_view.type = NANOARROW_TYPE_UNINITIALIZED
         self._schema_view.storage_type = NANOARROW_TYPE_UNINITIALIZED
 
@@ -420,7 +420,7 @@ cdef class Array:
         base = ArrayHolder()
         return Array(base, base._addr(), schema)
 
-    def __init__(self, object base, uintptr_t addr, Schema schema):
+    def __cinit__(self, object base, uintptr_t addr, Schema schema):
         self._base = base,
         self._ptr = <ArrowArray*>addr
         self._schema = schema
@@ -511,7 +511,7 @@ cdef class ArrayView:
     cdef ArrowArrayView* _ptr
     cdef Array _array
 
-    def __init__(self, object base, uintptr_t addr, Array array):
+    def __cinit__(self, object base, uintptr_t addr, Array array):
         self._base = base,
         self._ptr = <ArrowArrayView*>addr
         self._array = array
@@ -542,7 +542,7 @@ cdef class SchemaChildren:
     cdef Schema _parent
     cdef int64_t _length
 
-    def __init__(self, Schema parent):
+    def __cinit__(self, Schema parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -570,7 +570,7 @@ cdef class SchemaMetadata:
     cdef const char* _metadata
     cdef ArrowMetadataReader _reader
 
-    def __init__(self, object parent, uintptr_t ptr):
+    def __cinit__(self, object parent, uintptr_t ptr):
         self._parent = parent
         self._metadata = <const char*>ptr
 
@@ -600,7 +600,7 @@ cdef class ArrayChildren:
     cdef Array _parent
     cdef int64_t _length
 
-    def __init__(self, Array parent):
+    def __cinit__(self, Array parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -625,7 +625,7 @@ cdef class ArrayViewChildren:
     cdef ArrayView _parent
     cdef int64_t _length
 
-    def __init__(self, ArrayView parent):
+    def __cinit__(self, ArrayView parent):
         self._parent = parent
         self._length = parent._ptr.n_children
 
@@ -659,7 +659,7 @@ cdef class BufferView:
     cdef Py_ssize_t _shape
     cdef Py_ssize_t _strides
 
-    def __init__(self, object base, uintptr_t addr,
+    def __cinit__(self, object base, uintptr_t addr,
                  ArrowBufferType buffer_type, ArrowType buffer_data_type,
                  Py_ssize_t element_size_bits):
         self._base = base
@@ -730,7 +730,7 @@ cdef class ArrayViewBuffers:
     cdef ArrayView _array_view
     cdef int64_t _length
 
-    def __init__(self, ArrayView array_view):
+    def __cinit__(self, ArrayView array_view):
         self._array_view = array_view
         self._length = array_view._array._ptr.n_buffers
 
@@ -781,7 +781,7 @@ cdef class ArrayStream:
     cdef ArrowArrayStream* _ptr
     cdef object _cached_schema
 
-    def __init__(self, object base, uintptr_t addr):
+    def __cinit__(self, object base, uintptr_t addr):
         self._base = base,
         self._ptr = <ArrowArrayStream*>addr
         self._cached_schema = None

From b73804cb01e8d9d2411f27501ffa77e2b55418a7 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 14 Jun 2023 16:44:12 -0300
Subject: [PATCH 49/52] clean up Cython

---
 python/nanoarrow/_lib.pyx | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index f42359c1f..4fffea2ad 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -38,6 +38,7 @@ def c_version():
     """
     return ArrowNanoarrowVersion().decode("UTF-8")
 
+
 cdef class SchemaHolder:
     """Memory holder for an ArrowSchema
 
@@ -57,6 +58,7 @@ cdef class SchemaHolder:
     def _addr(self):
         return <uintptr_t>&self.c_schema
 
+
 cdef class ArrayHolder:
     """Memory holder for an ArrowArray
 
@@ -95,6 +97,7 @@ cdef class ArrayStreamHolder:
     def _addr(self):
         return <uintptr_t>&self.c_array_stream
 
+
 cdef class ArrayViewHolder:
     """Memory holder for an ArrowArrayView
 
@@ -123,7 +126,7 @@ class NanoarrowException(RuntimeError):
     and store the components of the original error.
     """
 
-    def __init__(self, what, code, message):
+    def __init__(self, what, code, message=""):
         self.what = what
         self.code = code
         self.message = message
@@ -708,7 +711,7 @@ cdef class BufferView:
             return "B"
 
     def __getbuffer__(self, Py_buffer *buffer, int flags):
-        buffer.buf = self._ptr.data.data
+        buffer.buf = <void*>self._ptr.data.data
         buffer.format = self._get_format()
         buffer.internal = NULL
         buffer.itemsize = self._strides
@@ -782,7 +785,7 @@ cdef class ArrayStream:
     cdef object _cached_schema
 
     def __cinit__(self, object base, uintptr_t addr):
-        self._base = base,
+        self._base = base
         self._ptr = <ArrowArrayStream*>addr
         self._cached_schema = None
 
@@ -811,18 +814,13 @@ cdef class ArrayStream:
                     message.decode("UTF-8")
                 )
             else:
-                Error.raise_error("ArrowArrayStream::get_schema()", code)
+                raise NanoarrowException("ArrowArrayStream::get_schema()", code)
 
         self._cached_schema = schema
 
     def get_schema(self):
         """Get the schema associated with this stream
         """
-        # Update the cached copy of the schema as an independent object
-        self._cached_schema = Schema.allocate()
-        self._get_schema(self._cached_schema)
-
-        # Return an independent copy
         out = Schema.allocate()
         self._get_schema(out)
         return out
@@ -834,6 +832,10 @@ cdef class ArrayStream:
         """
         self._assert_valid()
 
+        # We return a reference to the same Python object for each
+        # Array that is returned. This is independent of get_schema(),
+        # which is guaranteed to call the C object's callback and
+        # faithfully pass on the returned value.
         if self._cached_schema is None:
             self._cached_schema = Schema.allocate()
             self._get_schema(self._cached_schema)
@@ -850,7 +852,7 @@ cdef class ArrayStream:
                     message.decode("UTF-8")
                 )
             else:
-                Error.raise_error("ArrowArrayStream::get_next()", code)
+                raise NanoarrowException("ArrowArrayStream::get_next()", code)
 
         if not array.is_valid():
             return None

From a5d4479e569b783d283fd64612199491ce4670f2 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 14 Jun 2023 16:48:28 -0300
Subject: [PATCH 50/52] use StopIteration

---
 python/nanoarrow/_lib.pyx      | 8 +++-----
 python/tests/test_nanoarrow.py | 3 ++-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index 4fffea2ad..aa3ee2b61 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -855,15 +855,13 @@ cdef class ArrayStream:
                 raise NanoarrowException("ArrowArrayStream::get_next()", code)
 
         if not array.is_valid():
-            return None
+            raise StopIteration()
         else:
             return array
 
     def __iter__(self):
-        array = self.get_next()
-        while array is not None:
-            yield array
-            array = self.get_next()
+        while True:
+            yield self.get_next()
 
     @staticmethod
     def allocate():
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 3f5bea1a4..9501281d6 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -281,7 +281,8 @@ def test_array_stream():
     assert array_stream.is_valid() is True
     array = array_stream.get_next()
     assert array.schema.children[0].name == "some_column"
-    assert array_stream.get_next() is None
+    with pytest.raises(StopIteration):
+        array_stream.get_next()
 
 
 def test_array_stream_iter():

From 6a825047a57228ecbb8a1f56f2bbec0c0236bce9 Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@voltrondata.com>
Date: Wed, 14 Jun 2023 17:23:27 -0300
Subject: [PATCH 51/52] more clear distinction from the array--array view
 interaction

---
 python/nanoarrow/_lib.pyx      | 64 +++++++++++++++++++++++++---------
 python/tests/test_nanoarrow.py |  3 --
 2 files changed, 48 insertions(+), 19 deletions(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index aa3ee2b61..b1851f162 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -487,15 +487,18 @@ cdef class Array:
         if result != NANOARROW_OK:
             error.raise_message("ArrowArrayViewSetArray()", result)
 
-        return ArrayView(holder, holder._addr(), self)
+        return ArrayView(holder, holder._addr(), self._schema, self)
 
 
 cdef class ArrayView:
     """ArrowArrayView wrapper
 
-    The ArrowArrayView is a nanoarrow C library structure that facilitates
-    access to the deserialized content of an ArrowArray (e.g., buffer types,
-    lengths, and content). This wrapper extends that facility to Python.
+    The ArrowArrayView is a nanoarrow C library structure that provides
+    structured access to buffers addresses, buffer sizes, and buffer
+    data types. The buffer data is usually propagated from an ArrowArray
+    but can also be propagated from other types of objects (e.g., serialized
+    IPC). The offset and length of this view are independent of its parent
+    (i.e., this object can also represent a slice of its parent).
 
     Examples
     --------
@@ -512,12 +515,26 @@ cdef class ArrayView:
     """
     cdef object _base
     cdef ArrowArrayView* _ptr
-    cdef Array _array
+    cdef Schema _schema
+    cdef object _base_buffer
 
-    def __cinit__(self, object base, uintptr_t addr, Array array):
-        self._base = base,
+    def __cinit__(self, object base, uintptr_t addr, Schema schema, object base_buffer):
+        self._base = base
         self._ptr = <ArrowArrayView*>addr
-        self._array = array
+        self._schema = schema
+        self._base_buffer = base_buffer
+
+    @property
+    def length(self):
+        return self._ptr.length
+
+    @property
+    def offset(self):
+        return self._ptr.offset
+
+    @property
+    def null_count(self):
+        return self._ptr.null_count
 
     @property
     def children(self):
@@ -529,15 +546,20 @@ cdef class ArrayView:
 
     @property
     def dictionary(self):
-        return ArrayView(self, <uintptr_t>self._ptr.dictionary, self._array.dictionary)
-
-    @property
-    def array(self):
-        return self._array
+        if self._ptr.dictionary == NULL:
+            return None
+        else:
+            return ArrayView(
+                self,
+                <uintptr_t>self._ptr.dictionary,
+                self._schema.dictionary,
+                None
+            )
 
     @property
     def schema(self):
-        return self._array._schema
+        return self._schema
+
 
 cdef class SchemaChildren:
     """Wrapper for a lazily-resolved list of Schema children
@@ -639,13 +661,19 @@ cdef class ArrayViewChildren:
         k = int(k)
         if k < 0 or k >= self._length:
             raise IndexError(f"{k} out of range [0, {self._length})")
-        return ArrayView(self._parent, self._child_addr(k), self._parent._array.children[k])
+        return ArrayView(
+            self._parent,
+            self._child_addr(k),
+            self._parent._schema.children[k],
+            None
+        )
 
     cdef _child_addr(self, int64_t i):
         cdef ArrowArrayView** children = self._parent._ptr.children
         cdef ArrowArrayView* child = children[i]
         return <uintptr_t>child
 
+
 cdef class BufferView:
     """Wrapper for Array buffer content
 
@@ -735,7 +763,11 @@ cdef class ArrayViewBuffers:
 
     def __cinit__(self, ArrayView array_view):
         self._array_view = array_view
-        self._length = array_view._array._ptr.n_buffers
+        self._length = 3
+        for i in range(3):
+            if self._array_view._ptr.layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE:
+                self._length = i
+                break
 
     def __len__(self):
         return self._length
diff --git a/python/tests/test_nanoarrow.py b/python/tests/test_nanoarrow.py
index 9501281d6..316227407 100644
--- a/python/tests/test_nanoarrow.py
+++ b/python/tests/test_nanoarrow.py
@@ -168,7 +168,6 @@ def test_array_view():
     array = na.array(pa.array([1, 2, 3], pa.int32()))
     view = array.view()
 
-    assert view.array is array
     assert view.schema is array.schema
 
     data_buffer = memoryview(view.buffers[1])
@@ -201,11 +200,9 @@ def test_array_view_recursive():
     view = array.view()
     assert len(view.buffers) == 1
     assert len(view.children) == 1
-    assert view.array._addr() == array._addr()
     assert view.schema._addr() == array.schema._addr()
 
     assert len(view.children[0].buffers) == 2
-    assert view.children[0].array._addr() == array.children[0]._addr()
     assert view.children[0].schema._addr() == array.schema.children[0]._addr()
     assert view.children[0].schema._addr() == array.children[0].schema._addr()
 

From 6f530e888edb26f581cae5b0892bf934903644dc Mon Sep 17 00:00:00 2001
From: Dewey Dunnington <dewey@fishandwhistle.net>
Date: Wed, 14 Jun 2023 21:24:32 -0300
Subject: [PATCH 52/52] fix doctest

---
 python/nanoarrow/_lib.pyx | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/nanoarrow/_lib.pyx b/python/nanoarrow/_lib.pyx
index b1851f162..b5210e3e9 100644
--- a/python/nanoarrow/_lib.pyx
+++ b/python/nanoarrow/_lib.pyx
@@ -810,7 +810,9 @@ cdef class ArrayStream:
     >>> array_stream.get_next().length
     3
     >>> array_stream.get_next() is None
-    True
+    Traceback (most recent call last):
+      ...
+    StopIteration
     """
     cdef object _base
     cdef ArrowArrayStream* _ptr