apache · lidavidm · Dec 13, 2023 · Nov 28, 2023 · Dec 6, 2023 · Dec 6, 2023
@@ -22,10 +22,15 @@ from libc.stdint cimport int32_t, int64_t, uint8_t, uint32_t
 
 cdef extern from "adbc.h" nogil:
     # C ABI
+
+    ctypedef void (*CArrowSchemaRelease)(void*)
+    ctypedef void (*CArrowArrayRelease)(void*)
+
     cdef struct CArrowSchema"ArrowSchema":
-        pass
+        CArrowSchemaRelease release
+
     cdef struct CArrowArray"ArrowArray":
-        pass
+        CArrowArrayRelease release
 
     ctypedef int (*CArrowArrayStreamGetLastError)(void*)
     ctypedef int (*CArrowArrayStreamGetNext)(void*, CArrowArray*)

@@ -24,10 +24,13 @@ import threading
 import typing
 from typing import List, Tuple
 
+cimport cpython
 import cython
 from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_New
 from libc.stdint cimport int32_t, int64_t, uint8_t, uint32_t, uintptr_t
-from libc.string cimport memset
+from libc.stdlib cimport malloc, free
+from libc.string cimport memcpy, memset
 from libcpp.vector cimport vector as c_vector
 
 if typing.TYPE_CHECKING:
@@ -304,9 +307,38 @@ cdef class _AdbcHandle:
                     f"with open {self._child_type}")
 
 
+cdef void pycapsule_schema_deleter(object capsule) noexcept:
+    cdef CArrowSchema* allocated = <CArrowSchema*>PyCapsule_GetPointer(
+        capsule, "arrow_schema"
+    )
+    if allocated.release != NULL:
+        allocated.release(allocated)
+    free(allocated)
+
+
+cdef void pycapsule_array_deleter(object capsule) noexcept:
+    cdef CArrowArray* allocated = <CArrowArray*> PyCapsule_GetPointer(
+        capsule, "arrow_array"
+    )
+    if allocated.release != NULL:
+        allocated.release(allocated)
+    free(allocated)
+
+
+cdef void pycapsule_stream_deleter(object capsule) noexcept:
+    cdef CArrowArrayStream* allocated = <CArrowArrayStream*> PyCapsule_GetPointer(
+        capsule, "arrow_array_stream"
+    )
+    if allocated.release != NULL:
+        allocated.release(allocated)
+    free(allocated)
+
+
 cdef class ArrowSchemaHandle:
     """
     A wrapper for an allocated ArrowSchema.
+
+    This object implements the Arrow PyCapsule interface.
     """
     cdef:
         CArrowSchema schema
@@ -316,23 +348,56 @@ cdef class ArrowSchemaHandle:
         """The address of the ArrowSchema."""
         return <uintptr_t> &self.schema
 
+    def __arrow_c_schema__(self) -> object:
+        """Consume this object to get a PyCapsule."""
+        # Reference:
+        # https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html#create-a-pycapsule
+        cdef CArrowSchema* allocated = <CArrowSchema*> malloc(sizeof(CArrowSchema))
+        allocated.release = NULL
+        capsule = PyCapsule_New(
+            <void*>allocated, "arrow_schema", &pycapsule_schema_deleter,
+        )
+        memcpy(allocated, &self.schema, sizeof(CArrowSchema))
+        self.schema.release = NULL
+        return capsule
+
 
 cdef class ArrowArrayHandle:
     """
     A wrapper for an allocated ArrowArray.
+
+    This object implements the Arrow PyCapsule interface.
     """
     cdef:
         CArrowArray array
 
     @property
     def address(self) -> int:
-        """The address of the ArrowArray."""
+        """
+        The address of the ArrowArray.
+        """
         return <uintptr_t> &self.array
 
+    def __arrow_c_array__(self, requested_schema=None) -> object:
+        """Consume this object to get a PyCapsule."""
+        if requested_schema is not None:
+            raise NotImplementedError("requested_schema")
+
+        cdef CArrowArray* allocated = <CArrowArray*> malloc(sizeof(CArrowArray))
+        allocated.release = NULL
+        capsule = PyCapsule_New(
+            <void*>allocated, "arrow_array", pycapsule_array_deleter,
+        )
+        memcpy(allocated, &self.array, sizeof(CArrowArray))
+        self.array.release = NULL
+        return capsule
+
 
 cdef class ArrowArrayStreamHandle:
     """
     A wrapper for an allocated ArrowArrayStream.
+
+    This object implements the Arrow PyCapsule interface.
     """
     cdef:
         CArrowArrayStream stream
@@ -342,6 +407,21 @@ cdef class ArrowArrayStreamHandle:
         """The address of the ArrowArrayStream."""
         return <uintptr_t> &self.stream
 
+    def __arrow_c_stream__(self, requested_schema=None) -> object:
+        """Consume this object to get a PyCapsule."""
+        if requested_schema is not None:
+            raise NotImplementedError("requested_schema")
+
+        cdef CArrowArrayStream* allocated = \
+            <CArrowArrayStream*> malloc(sizeof(CArrowArrayStream))
+        allocated.release = NULL
+        capsule = PyCapsule_New(
+            <void*>allocated, "arrow_array_stream", &pycapsule_stream_deleter,
+        )
+        memcpy(allocated, &self.stream, sizeof(CArrowArrayStream))
+        self.stream.release = NULL
+        return capsule
+
 
 class GetObjectsDepth(enum.IntEnum):
     ALL = ADBC_OBJECT_DEPTH_ALL

@@ -668,7 +668,6 @@ def execute(self, operation: Union[bytes, str], parameters=None) -> None:
         self._prepare_execute(operation, parameters)
         handle, self._rowcount = self._stmt.execute_query()
         self._results = _RowIterator(
-            # pyarrow.RecordBatchReader._import_from_c(handle.address)
             _reader.AdbcRecordBatchReader._import_from_c(handle.address)
         )
 

diff --git a/python/adbc_driver_manager/pyproject.toml b/python/adbc_driver_manager/pyproject.toml
@@ -25,8 +25,8 @@ requires-python = ">=3.9"
 dynamic = ["version"]
 
 [project.optional-dependencies]
-dbapi = ["pandas", "pyarrow>=8.0.0"]
-test = ["duckdb", "pandas", "pyarrow>=8.0.0", "pytest"]
+dbapi = ["pandas", "pyarrow>=14.0.1"]
+test = ["duckdb", "pandas", "pyarrow>=14.0.1", "pytest"]
 
 [project.urls]
 homepage = "https://arrow.apache.org/adbc/"

@@ -390,3 +390,57 @@ def test_child_tracking(sqlite):
                 RuntimeError, match="Cannot close AdbcDatabase with open AdbcConnection"
             ):
                 db.close()
+
+
+@pytest.mark.sqlite
+def test_pycapsule(sqlite):
+    _, conn = sqlite
+    handle = conn.get_table_types()
+    with pyarrow.RecordBatchReader._import_from_c_capsule(handle.__arrow_c_stream__()) as reader:
+        reader.read_all()
+
+    # set up some data
+    data = pyarrow.record_batch(
+        [
+            [1, 2, 3, 4],
+            ["a", "b", "c", "d"],
+        ],
+        names=["ints", "strs"],
+    )
+    with adbc_driver_manager.AdbcStatement(conn) as stmt:
+        stmt.set_options(**{adbc_driver_manager.INGEST_OPTION_TARGET_TABLE: "foo"})
+        _bind(stmt, data)
+        stmt.execute_update()
+
+    # importing a schema
+
+    handle = conn.get_table_schema(catalog=None, db_schema=None, table_name="foo")
+    assert data.schema == pyarrow.schema(handle)
+    # ensure consumed schema was marked as such
+    with pytest.raises(ValueError, match="Cannot import released ArrowSchema"):
+        pyarrow.schema(handle)
+
+    # smoke test for the capsule calling release
+    capsule = conn.get_table_schema(catalog=None, db_schema=None, table_name="foo").__arrow_c_schema__()
+    del capsule
+
+    # importing a stream
+
+    with adbc_driver_manager.AdbcStatement(conn) as stmt:
+        stmt.set_sql_query("SELECT * FROM foo")
+        handle, _ = stmt.execute_query()
+
+    result = pyarrow.table(handle)
+    assert result.to_batches()[0] == data
+
+    # ensure consumed schema was marked as such
+    with pytest.raises(ValueError, match="Cannot import released ArrowArrayStream"):
+        pyarrow.table(handle)
+
+    # smoke test for the capsule calling release
+    with adbc_driver_manager.AdbcStatement(conn) as stmt:
+        stmt.set_sql_query("SELECT * FROM foo")
+        capsule = stmt.execute_query()[0].__arrow_c_stream__()
+    del capsule
+
+    # TODO: also need to import from things supporting protocol