From 952cb73d2cee6305fb5af1ee5f0635dd929523a3 Mon Sep 17 00:00:00 2001
From: Tim Paine <3105306+timkpaine@users.noreply.github.com>
Date: Mon, 31 Jul 2023 03:48:39 +0000
Subject: [PATCH] wip linux

---
 .github/workflows/build.yml                   |   11 +-
 CMakeLists.txt                                |   79 +-
 cmake/modules/FindPyarrow.cmake               |   45 +
 pyproject.toml                                |    1 +
 setup.py                                      |    2 +-
 src/apn-python/caster.hpp                     |    6 -
 src/apn-python/common.cpp                     |   18 +-
 src/apn-python/common.hpp                     |    6 +-
 src/apn-python/cpython.cpp                    |   11 +-
 src/apn-python/cpython.hpp                    |    2 +-
 src/apn-python/pybind11.cpp                   |    1 +
 src/apn-python/pybind11.hpp                   |    8 +-
 .../arrow/python/CMakeLists.txt               |   18 +
 .../arrow/python/arrow_to_pandas.cc           | 2575 +++++++++++++++++
 .../arrow/python/arrow_to_pandas.h            |   22 +
 .../arrow/python/arrow_to_python_internal.h   |   49 +
 .../arrow/python/benchmark.cc                 |   38 +
 .../arrow/python/common.cc                    |  203 ++
 .../apache-arrow-12.0.1/arrow/python/csv.cc   |   62 +
 .../apache-arrow-12.0.1/arrow/python/csv.h    |    2 +-
 .../arrow/python/datetime.cc                  |  663 +++++
 .../arrow/python/datetime.h                   |    4 +-
 .../arrow/python/decimal.cc                   |  246 ++
 .../arrow/python/deserialize.cc               |  495 ++++
 .../arrow/python/deserialize.h                |    2 +-
 .../arrow/python/extension_type.cc            |  217 ++
 .../arrow/python/extension_type.h             |    2 +-
 .../arrow/python/filesystem.cc                |  206 ++
 .../arrow/python/filesystem.h                 |    2 +-
 .../arrow/python/flight.cc                    |  388 +++
 .../apache-arrow-12.0.1/arrow/python/gdb.cc   |  530 ++++
 .../arrow/python/helpers.cc                   |  470 +++
 .../arrow/python/helpers.h                    |    2 +-
 .../arrow/python/inference.cc                 |  748 +++++
 .../arrow/python/inference.h                  |    2 +-
 .../apache-arrow-12.0.1/arrow/python/init.cc  |   24 +
 .../apache-arrow-12.0.1/arrow/python/io.cc    |  384 +++
 .../apache-arrow-12.0.1/arrow/python/ipc.cc   |   67 +
 .../apache-arrow-12.0.1/arrow/python/ipc.h    |    4 +-
 .../arrow/python/numpy_convert.cc             |  562 ++++
 .../arrow/python/numpy_convert.h              |    2 +-
 .../arrow/python/numpy_internal.h             |  182 ++
 .../arrow/python/numpy_to_arrow.cc            |  870 ++++++
 .../arrow/python/parquet_encryption.cc        |   98 +
 .../arrow/python/platform.h                   |    9 +-
 .../arrow/python/pyarrow.cc                   |   94 +
 .../arrow/python/python_test.cc               |  888 ++++++
 .../arrow/python/python_to_arrow.cc           | 1240 ++++++++
 .../arrow/python/python_to_arrow.h            |    2 +-
 .../arrow/python/serialize.cc                 |  798 +++++
 .../arrow/python/serialize.h                  |    2 +-
 .../apache-arrow-12.0.1/arrow/python/udf.cc   |  736 +++++
 .../apache-arrow-12.0.1/arrow/python/udf.h    |   23 +-
 vcpkg.json                                    |    2 +-
 54 files changed, 13036 insertions(+), 87 deletions(-)
 create mode 100644 cmake/modules/FindPyarrow.cmake
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/CMakeLists.txt
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_python_internal.h
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/benchmark.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/common.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/csv.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/datetime.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/decimal.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/flight.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/gdb.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/helpers.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/inference.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/init.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/io.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/ipc.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/numpy_internal.h
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/numpy_to_arrow.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/parquet_encryption.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/pyarrow.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/python_test.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/serialize.cc
 create mode 100644 src/vendored/apache-arrow-12.0.1/arrow/python/udf.cc

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 9c53e7e..efb0744 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -4,6 +4,7 @@ on:
   push:
     branches:
       - main
+      - tkp/linux 
     tags:
       - v*
     paths-ignore:
@@ -102,7 +103,15 @@ jobs:
         uses: actions-ext/cpp/setup@v1
 
       - name: Install dependencies
-        run: make develop
+        run: make develop-cpp
+        if: ${{ matrix.os != 'ubuntu-22.04' }}
+
+      - name: Install dependencies
+        run: sudo apt-get install libarrow-dev
+        if: ${{ matrix.os == 'ubuntu-22.04' }}
+
+      - name: Install dependencies
+        run: make develop-py
 
       - name: Build project
         run: make build
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 962605f..ba353df 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -56,7 +56,7 @@ include_directories ("${CMAKE_SOURCE_DIR}/src")
 find_package (Color)
 
 if (NOT DEFINED CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE "Debugma" CACHE STRING "Release/Debug build")
+  set(CMAKE_BUILD_TYPE "Debug" CACHE STRING "Release/Debug build")
 endif()
 if (NOT DEFINED PYTHON_VERSION)
   set(PYTHON_VERSION "3.9" CACHE STRING "Python version to build against")
@@ -104,24 +104,6 @@ if(WIN32)
     set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 endif()
 
-###############################################################################################################
-# Version #
-###########
-# Set version from cmake and extract latest hash if available
-set(ARROW_PYTHON_NOCOPY_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
-set(ARROW_PYTHON_NOCOPY_VERSION_MINOR ${PROJECT_VERSION_MINOR})
-set(ARROW_PYTHON_NOCOPY_VERSION_PATCH ${PROJECT_VERSION_PATCH})
-if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git")
-  # Get latest commit
-  execute_process(COMMAND git rev-parse HEAD
-    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    OUTPUT_VARIABLE ARROW_PYTHON_NOCOPY_VERSION_COMMIT_SHA)
-  # strip newline
-  string(REGEX REPLACE "\n$" "" ARROW_PYTHON_NOCOPY_VERSION_COMMIT_SHA "${ARROW_PYTHON_NOCOPY_VERSION_COMMIT_SHA}")
-else()
-  set(ARROW_PYTHON_NOCOPY_VERSION_COMMIT_SHA "release")
-endif()
-
 ###############################################################################################################
 # RPath #
 #########
@@ -148,6 +130,9 @@ endif()
 ###############################################################################################################
 # Flags #
 #########
+set(CMAKE_POSITION_INDEPENDENT_CODE On)
+add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+
 # Compiler version flags
 if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++2a")
@@ -244,7 +229,7 @@ set(
 )
 
 add_library(arrow-python-nocopy SHARED ${PROJECT_SRCS})
-target_link_libraries(arrow-python-nocopy Arrow::arrow_static)
+target_link_libraries(arrow-python-nocopy PRIVATE Arrow::arrow_shared)
 set_target_properties(arrow-python-nocopy PROPERTIES PUBLIC_HEADER "${PROJECT_HDRS}")
 
 # export symbols
@@ -261,14 +246,46 @@ install(TARGETS arrow-python-nocopy EXPORT ArrowPythonNocopy LIBRARY DESTINATION
 if(BUILD_PYTHON)
   # Find Numpy
   find_package(NumPy REQUIRED)
+  include_directories(${NUMPY_INCLUDE_DIR})
 
+  # Find PyArrow. We will link against it for the build only
+  find_package(Pyarrow REQUIRED)
+  include_directories(${PYARROW_INCLUDE_DIR})
+   
   # Find PyBind11
-  find_package(pybind11 REQUIRED)
-  include_directories(${pybind11_INCLUDE_DIR})
-  include_directories("${PROJECT_SOURCE_DIR}/src/vendored/apache-arrow-12.0.1")
-
+   find_package(pybind11 REQUIRED)
+   include_directories(${pybind11_INCLUDE_DIR})
+   set(VENDORED_PYARROW_ROOT "${PROJECT_SOURCE_DIR}/src/vendored/apache-arrow-12.0.1")
+   include_directories(${VENDORED_PYARROW_ROOT})
+   set(VENDORED_PYARROW_SRCS
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/arrow_to_pandas.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/benchmark.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/common.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/csv.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/datetime.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/decimal.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/deserialize.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/extension_type.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/filesystem.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/flight.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/gdb.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/helpers.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/inference.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/init.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/io.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/ipc.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/numpy_convert.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/numpy_to_arrow.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/parquet_encryption.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/pyarrow.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/python_test.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/python_to_arrow.cc
+       ${VENDORED_PYARROW_ROOT}/arrow/python/serialize.cc
+       # ${VENDORED_PYARROW_ROOT}/arrow/python/udf.cc
+)
+  
   # common functionality
-  add_library(common SHARED "${PROJECT_SOURCE_DIR}/src/apn-python/common.cpp")
+  add_library(common SHARED "${PROJECT_SOURCE_DIR}/src/apn-python/common.cpp" ${VENDORED_PYARROW_SRCS})
   set_target_properties(common PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/src/apn-python/common.hpp")
 
   # pybind11 extension
@@ -280,12 +297,12 @@ if(BUILD_PYTHON)
   set_target_properties(cpythonextension PROPERTIES PUBLIC_HEADER "${PROJECT_SOURCE_DIR}/src/apn-python/cpython.hpp")
 
   # Link to standalone/common library
-  target_link_libraries(common PUBLIC arrow-python-nocopy)
-  target_link_libraries(pybind11extension PUBLIC common)
-  target_link_libraries(cpythonextension PUBLIC common)
-  set_property(TARGET common PROPERTY INSTALL_RPATH "${module_origin_path}/lib")
-  set_property(TARGET pybind11extension PROPERTY INSTALL_RPATH "${module_origin_path}/lib")
-  set_property(TARGET cpythonextension PROPERTY INSTALL_RPATH "${module_origin_path}/lib")
+  target_link_libraries(common PRIVATE arrow-python-nocopy)
+  target_link_libraries(pybind11extension PRIVATE common)
+  target_link_libraries(cpythonextension PRIVATE common)
+  set_property(TARGET common PROPERTY INSTALL_RPATH "${module_origin_path}:${module_origin_path}/lib")
+  set_property(TARGET pybind11extension PROPERTY INSTALL_RPATH "${module_origin_path}:${module_origin_path}/lib")
+  set_property(TARGET cpythonextension PROPERTY INSTALL_RPATH "${module_origin_path}:${module_origin_path}/lib")
   set_property(TARGET cpythonextension PROPERTY PREFIX "")
 
   # install in python module
diff --git a/cmake/modules/FindPyarrow.cmake b/cmake/modules/FindPyarrow.cmake
new file mode 100644
index 0000000..ca3fac4
--- /dev/null
+++ b/cmake/modules/FindPyarrow.cmake
@@ -0,0 +1,45 @@
+# Find the Pyarrow package
+# PYARROW_INCLUDE_DIR
+# PYARROW_LIB_DIR
+# PYARROW_LIBRARY
+# PYARROW_FOUND
+# will be set by this script
+
+cmake_minimum_required(VERSION 3.7.2)
+
+find_package( PythonInterp ${PYTHON_VERSION} EXACT REQUIRED )
+
+# Find out the include path
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -c
+          "from __future__ import print_function;import pyarrow;print(pyarrow.get_include(), end='')"
+          OUTPUT_VARIABLE __pyarrow_path)
+
+# Find out the library path
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -c
+          "from __future__ import print_function;import pyarrow;print(pyarrow.get_library_dirs()[0], end='')"
+          OUTPUT_VARIABLE __pyarrow_lib_path)
+
+# And the version
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -c
+          "from __future__ import print_function;import pyarrow;print(pyarrow.__version__, end='')"
+  OUTPUT_VARIABLE __pyarrow_version)
+
+find_path(PYARROW_INCLUDE_DIR arrow/python/pyarrow.h
+  HINTS "${__pyarrow_path}" "${PYTHON_INCLUDE_PATH}" NO_DEFAULT_PATH)
+
+find_path(PYARROW_LIB_DIR libarrow_python.so
+  HINTS "${__pyarrow_lib_path}" "${PYTHON_LIBRARY_PATH}" NO_DEFAULT_PATH)
+
+find_file(PYARROW_LIBRARY libarrow_python.so
+  HINTS "${__pyarrow_lib_path}" "${PYTHON_LIBRARY_PATH}" NO_DEFAULT_PATH)
+
+if(PYARROW_INCLUDE_DIR AND PYARROW_LIB_DIR AND PYARROW_LIBRARY)
+  set(PYARROW_FOUND 1 CACHE INTERNAL "Python pyarrow found")
+endif()
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Pyarrow REQUIRED_VARS PYARROW_INCLUDE_DIR PYARROW_LIB_DIR PYARROW_LIBRARY
+                                        VERSION_VAR __pyarrow_version)
diff --git a/pyproject.toml b/pyproject.toml
index a849837..b0625b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -91,6 +91,7 @@ test-requires = [
 [tool.cibuildwheel.linux]
 archs = "x86_64 aarch64"
 skip = "*musllinux* *i686"
+before-all = "make develop && sudo apt-install libarrow-dev"
 
 [tool.cibuildwheel.macos]
 archs = "x86_64 arm64"
diff --git a/setup.py b/setup.py
index 4af45e2..315f767 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,7 @@
     )
 )
 
-if os.path.exists(vcpkg_config_file):
+if os.path.exists(vcpkg_config_file) and os.name != "posix":
     cmake_args.append(
         f"-DCMAKE_TOOLCHAIN_FILE={vcpkg_config_file}"
     )
diff --git a/src/apn-python/caster.hpp b/src/apn-python/caster.hpp
index a0c9913..248e265 100644
--- a/src/apn-python/caster.hpp
+++ b/src/apn-python/caster.hpp
@@ -8,7 +8,6 @@ namespace detail {
     PYBIND11_TYPE_CASTER(std::shared_ptr<arrow::Array>, const_name("pyarrow::Array"));
     /* Python->C++ */
     bool load(handle src, bool) {
-      arrow::py::import_pyarrow();
       PyObject* source = src.ptr();
       if(!arrow::py::is_array(source))
         return false;
@@ -21,7 +20,6 @@ namespace detail {
 
     /* C++ -> Python) */
     static handle cast(std::shared_ptr<arrow::Array> src, return_value_policy /* policy */, handle /* parent */) {
-      arrow::py::import_pyarrow();
       return arrow::py::wrap_array(src);
     }
   };
@@ -32,7 +30,6 @@ namespace detail {
     PYBIND11_TYPE_CASTER(std::shared_ptr<arrow::Schema>, const_name("pyarrow::Schema"));
     /* Python->C++ */
     bool load(handle src, bool) {
-      arrow::py::import_pyarrow();
       PyObject* source = src.ptr();
       if(!arrow::py::is_schema(source))
         return false;
@@ -45,7 +42,6 @@ namespace detail {
 
     /* C++ -> Python) */
     static handle cast(std::shared_ptr<arrow::Schema> src, return_value_policy /* policy */, handle /* parent */) {
-      arrow::py::import_pyarrow();
       return arrow::py::wrap_schema(src);
     }
   };
@@ -56,7 +52,6 @@ namespace detail {
     PYBIND11_TYPE_CASTER(std::shared_ptr<arrow::Table>, const_name("pyarrow::Table"));
     /* Python->C++ */
     bool load(handle src, bool) {
-      arrow::py::import_pyarrow();
       PyObject* source = src.ptr();
       if(!arrow::py::is_table(source))
         return false;
@@ -69,7 +64,6 @@ namespace detail {
 
     /* C++ -> Python) */
     static handle cast(std::shared_ptr<arrow::Table> src, return_value_policy /* policy */, handle /* parent */) {
-      arrow::py::import_pyarrow();
       return arrow::py::wrap_table(src);
     }
   };
diff --git a/src/apn-python/common.cpp b/src/apn-python/common.cpp
index b135e2d..f4e6888 100644
--- a/src/apn-python/common.cpp
+++ b/src/apn-python/common.cpp
@@ -1,20 +1,19 @@
+#include <arrow/python/pyarrow.h>
 #include <arrow/c/bridge.h>
 #include <apn-python/common.hpp>
 #include <apn/bridge.h>
 
-std::string array_info_py(std::shared_ptr<arrow::Array> array) {
+char* array_info_py(std::shared_ptr<arrow::Array> array) {
   // ABI unstable!
   // return array_info(array);
-  char buffer[100];
+  char* buffer = new char[100];
   struct ArrowArray c_array;
   (void)ExportArray(*array, &c_array);
   array_info_cabi(&c_array, buffer, 100);
-  return std::string(buffer);
+  return buffer;
 }
 
 std::shared_ptr<arrow::Array> create_array_py() {
-  arrow::py::import_pyarrow();
-
   // ABI unstable!
   // std::shared_ptr<arrow::Array> arrow_array = create_array_cabi();
   struct ArrowArray c_array;
@@ -23,19 +22,17 @@ std::shared_ptr<arrow::Array> create_array_py() {
   return arrow_array;
 }
 
-std::string schema_info_py(std::shared_ptr<arrow::Schema> schema) {
+char* schema_info_py(std::shared_ptr<arrow::Schema> schema) {
   // ABI unstable!
   // return schema_info(schema);
-  char buffer[100];
+  char* buffer = new char[100];
   struct ArrowSchema c_schema;
   (void)arrow::ExportSchema(*schema, &c_schema);
   schema_info_cabi(&c_schema, buffer, 100);
-  return std::string(buffer);
+  return buffer;
 }
 
 std::shared_ptr<arrow::Schema> create_schema_py() {
-  arrow::py::import_pyarrow();
-
   // ABI unstable!
   // std::shared_ptr<arrow::Schema> arrow_schema = create_schema();
 
@@ -50,7 +47,6 @@ std::shared_ptr<arrow::Schema> create_schema_py() {
 // }
 
 // std::shared_ptr<arrow::Table> create_table_py() {
-//     arrow::py::import_pyarrow();
 //     std::shared_ptr<arrow::Table> arrow_table = create_table();
 //     return arrow_table;
 //     // PyObject* obj = arrow::py::wrap_table(arrow_table);
diff --git a/src/apn-python/common.hpp b/src/apn-python/common.hpp
index d9fac7c..0564b08 100644
--- a/src/apn-python/common.hpp
+++ b/src/apn-python/common.hpp
@@ -4,11 +4,11 @@
 #include <arrow/python/pyarrow.h>
 #include <apn-common/exports.h>
 
-LIB_EXPORT std::string array_info_py(std::shared_ptr<arrow::Array> array);
+LIB_EXPORT char* array_info_py(std::shared_ptr<arrow::Array> array);
 LIB_EXPORT std::shared_ptr<arrow::Array> create_array_py();
 
-LIB_EXPORT std::string schema_info_py(std::shared_ptr<arrow::Schema> schema);
+LIB_EXPORT char* schema_info_py(std::shared_ptr<arrow::Schema> schema);
 LIB_EXPORT std::shared_ptr<arrow::Schema> create_schema_py();
 
-LIB_EXPORT std::string table_info_py(std::shared_ptr<arrow::Table> table);
+LIB_EXPORT char* table_info_py(std::shared_ptr<arrow::Table> table);
 LIB_EXPORT std::shared_ptr<arrow::Table> create_table_py();
diff --git a/src/apn-python/cpython.cpp b/src/apn-python/cpython.cpp
index e7142fc..9fd6284 100644
--- a/src/apn-python/cpython.cpp
+++ b/src/apn-python/cpython.cpp
@@ -1,5 +1,4 @@
 #include <apn-python/cpython.hpp>
-#include <iostream>
 
 static PyObject* _raise_error(PyObject* module) {
   PyErr_SetString(PyExc_TypeError, "Bad value provided");
@@ -8,7 +7,6 @@ static PyObject* _raise_error(PyObject* module) {
 
 PyObject* array_info_py_raw(PyObject* self, PyObject* args) {
   PyObject* source;
-  arrow::py::import_pyarrow();
 
   if(!PyArg_ParseTuple(args, "O", &source))
     return _raise_error(self);
@@ -21,8 +19,8 @@ PyObject* array_info_py_raw(PyObject* self, PyObject* args) {
   if(!result.ok())
     return _raise_error(self);
 
-  std::string ret_str = array_info_py(std::static_pointer_cast<arrow::Array>(result.ValueOrDie()));
-  return PyUnicode_FromStringAndSize(ret_str.c_str(), ret_str.length());
+  char* ret_str = array_info_py(std::static_pointer_cast<arrow::Array>(result.ValueOrDie()));
+  return PyUnicode_FromStringAndSize(ret_str, strlen(ret_str));
 }
 
 PyObject* create_array_py_raw(PyObject* self, PyObject* args) {
@@ -31,7 +29,6 @@ PyObject* create_array_py_raw(PyObject* self, PyObject* args) {
 
 PyObject* schema_info_py_raw(PyObject* self, PyObject* args) {
   PyObject* source;
-  arrow::py::import_pyarrow();
 
   // parse arguments
   if(!PyArg_ParseTuple(args, "O", &source))
@@ -45,8 +42,8 @@ PyObject* schema_info_py_raw(PyObject* self, PyObject* args) {
   if(!result.ok())
     return _raise_error(self);
 
-  std::string ret_str = schema_info_py(std::static_pointer_cast<arrow::Schema>(result.ValueOrDie()));
-  return PyUnicode_FromStringAndSize(ret_str.c_str(), ret_str.length());
+  char* ret_str = schema_info_py(std::static_pointer_cast<arrow::Schema>(result.ValueOrDie()));
+  return PyUnicode_FromStringAndSize(ret_str, strlen(ret_str));
 }
 
 PyObject* create_schema_py_raw(PyObject* self, PyObject* Py_UNUSED(args)) {
diff --git a/src/apn-python/cpython.hpp b/src/apn-python/cpython.hpp
index a65bb8b..17ffbcb 100644
--- a/src/apn-python/cpython.hpp
+++ b/src/apn-python/cpython.hpp
@@ -5,7 +5,6 @@
 
 LIB_EXPORT PyObject* array_info_py_raw(PyObject*, PyObject*);
 LIB_EXPORT PyObject* create_array_py_raw(PyObject*, PyObject*);
-
 LIB_EXPORT PyObject* schema_info_py_raw(PyObject*, PyObject*);
 LIB_EXPORT PyObject* create_schema_py_raw(PyObject*, PyObject*);
 
@@ -24,6 +23,7 @@ static PyModuleDef cpythonextension_module = {
   PyModuleDef_HEAD_INIT, "cpythonextension", "cpython", -1, cpythonextension_methods};
 
 PyMODINIT_FUNC PyInit_cpythonextension(void) {
+  arrow::py::import_pyarrow();
   Py_Initialize();
   return PyModule_Create(&cpythonextension_module);
 }
diff --git a/src/apn-python/pybind11.cpp b/src/apn-python/pybind11.cpp
index 8e212b1..003774c 100644
--- a/src/apn-python/pybind11.cpp
+++ b/src/apn-python/pybind11.cpp
@@ -1 +1,2 @@
 #include <apn-python/pybind11.hpp>
+
diff --git a/src/apn-python/pybind11.hpp b/src/apn-python/pybind11.hpp
index f072224..277b16b 100644
--- a/src/apn-python/pybind11.hpp
+++ b/src/apn-python/pybind11.hpp
@@ -1,13 +1,19 @@
 #pragma once
+#include <dlfcn.h>
 #include <pybind11/pybind11.h>
 #include <arrow/python/pyarrow.h>
+#include <arrow/python/lib_api.h>
 #include <apn-python/caster.hpp>
 #include <apn-python/common.hpp>
 
 namespace py = pybind11;
 
+// LIB_EXPORT PyObject* table_info_py_raw(PyObject*, PyObject*);
 PYBIND11_MODULE(pybind11extension, m) {
-  arrow::py::import_pyarrow();
+  py::module_::import("pyarrow");
+  // dlopen("arrow_python.so", RTLD_LAZY);
+  // dlopen("libarrow_python.so", RTLD_LAZY);
+  import_pyarrow__lib();
   m.doc() = "pybind11";
   m.def("array_info", &array_info_py);
   m.def("create_array", &create_array_py);
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/CMakeLists.txt b/src/vendored/apache-arrow-12.0.1/arrow/python/CMakeLists.txt
new file mode 100644
index 0000000..ff355e4
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/CMakeLists.txt
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+arrow_install_all_headers("arrow/python")
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.cc
new file mode 100644
index 0000000..91c7b8a
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.cc
@@ -0,0 +1,2575 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for pandas conversion via NumPy
+
+#include "arrow/python/arrow_to_pandas.h"
+#include "arrow/python/numpy_interop.h"  // IWYU pragma: expand
+
+#include <cmath>
+#include <cstdint>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <string>
+#include <string_view>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/buffer.h"
+#include "arrow/datum.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/hashing.h"
+#include "arrow/util/int_util.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/parallel.h"
+#include "arrow/visit_type_inline.h"
+
+#include "arrow/compute/api.h"
+
+#include "arrow/python/arrow_to_python_internal.h"
+#include "arrow/python/common.h"
+#include "arrow/python/datetime.h"
+#include "arrow/python/decimal.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_internal.h"
+#include "arrow/python/pyarrow.h"
+#include "arrow/python/python_to_arrow.h"
+#include "arrow/python/type_traits.h"
+
+namespace arrow {
+
+class MemoryPool;
+
+using internal::checked_cast;
+using internal::CheckIndexBounds;
+using internal::OptionalParallelFor;
+
+namespace py {
+namespace {
+
+// Fix options for conversion of an inner (child) array.
+PandasOptions MakeInnerOptions(PandasOptions options) {
+  // Make sure conversion of inner dictionary arrays always returns an array,
+  // not a dict {'indices': array, 'dictionary': array, 'ordered': bool}
+  options.decode_dictionaries = true;
+  options.categorical_columns.clear();
+  options.strings_to_categorical = false;
+
+  // In ARROW-7723, we found as a result of ARROW-3789 that second
+  // through microsecond resolution tz-aware timestamps were being promoted to
+  // use the DATETIME_NANO_TZ conversion path, yielding a datetime64[ns] NumPy
+  // array in this function. PyArray_GETITEM returns datetime.datetime for
+  // units second through microsecond but PyLong for nanosecond (because
+  // datetime.datetime does not support nanoseconds).
+  // We force the object conversion to preserve the value of the timezone.
+  // Nanoseconds are returned as integers.
+  options.coerce_temporal_nanoseconds = false;
+
+  return options;
+}
+
+// ----------------------------------------------------------------------
+// PyCapsule code for setting ndarray base to reference C++ object
+
+struct ArrayCapsule {
+  std::shared_ptr<Array> array;
+};
+
+struct BufferCapsule {
+  std::shared_ptr<Buffer> buffer;
+};
+
+void ArrayCapsule_Destructor(PyObject* capsule) {
+  delete reinterpret_cast<ArrayCapsule*>(PyCapsule_GetPointer(capsule, "arrow::Array"));
+}
+
+void BufferCapsule_Destructor(PyObject* capsule) {
+  delete reinterpret_cast<BufferCapsule*>(PyCapsule_GetPointer(capsule, "arrow::Buffer"));
+}
+
+// ----------------------------------------------------------------------
+// pandas 0.x DataFrame conversion internals
+
+using internal::arrow_traits;
+using internal::npy_traits;
+
+template <typename T>
+struct WrapBytes {};
+
+template <>
+struct WrapBytes<StringType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyUnicode_FromStringAndSize(data, length);
+  }
+};
+
+template <>
+struct WrapBytes<LargeStringType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyUnicode_FromStringAndSize(data, length);
+  }
+};
+
+template <>
+struct WrapBytes<BinaryType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyBytes_FromStringAndSize(data, length);
+  }
+};
+
+template <>
+struct WrapBytes<LargeBinaryType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyBytes_FromStringAndSize(data, length);
+  }
+};
+
+template <>
+struct WrapBytes<FixedSizeBinaryType> {
+  static inline PyObject* Wrap(const char* data, int64_t length) {
+    return PyBytes_FromStringAndSize(data, length);
+  }
+};
+
+static inline bool ListTypeSupported(const DataType& type) {
+  switch (type.id()) {
+    case Type::BOOL:
+    case Type::UINT8:
+    case Type::INT8:
+    case Type::UINT16:
+    case Type::INT16:
+    case Type::UINT32:
+    case Type::INT32:
+    case Type::INT64:
+    case Type::UINT64:
+    case Type::HALF_FLOAT:
+    case Type::FLOAT:
+    case Type::DOUBLE:
+    case Type::DECIMAL128:
+    case Type::DECIMAL256:
+    case Type::BINARY:
+    case Type::LARGE_BINARY:
+    case Type::STRING:
+    case Type::LARGE_STRING:
+    case Type::DATE32:
+    case Type::DATE64:
+    case Type::STRUCT:
+    case Type::MAP:
+    case Type::TIME32:
+    case Type::TIME64:
+    case Type::TIMESTAMP:
+    case Type::DURATION:
+    case Type::DICTIONARY:
+    case Type::INTERVAL_MONTH_DAY_NANO:
+    case Type::NA:  // empty list
+      // The above types are all supported.
+      return true;
+    case Type::FIXED_SIZE_LIST:
+    case Type::LIST:
+    case Type::LARGE_LIST: {
+      const auto& list_type = checked_cast<const BaseListType&>(type);
+      return ListTypeSupported(*list_type.value_type());
+    }
+    case Type::EXTENSION: {
+      const auto& ext = checked_cast<const ExtensionType&>(*type.GetSharedPtr());
+      return ListTypeSupported(*(ext.storage_type()));
+    }
+    default:
+      break;
+  }
+  return false;
+}
+
+Status CapsulizeArray(const std::shared_ptr<Array>& arr, PyObject** out) {
+  auto capsule = new ArrayCapsule{{arr}};
+  *out = PyCapsule_New(reinterpret_cast<void*>(capsule), "arrow::Array",
+                       &ArrayCapsule_Destructor);
+  if (*out == nullptr) {
+    delete capsule;
+    RETURN_IF_PYERROR();
+  }
+  return Status::OK();
+}
+
+Status CapsulizeBuffer(const std::shared_ptr<Buffer>& buffer, PyObject** out) {
+  auto capsule = new BufferCapsule{{buffer}};
+  *out = PyCapsule_New(reinterpret_cast<void*>(capsule), "arrow::Buffer",
+                       &BufferCapsule_Destructor);
+  if (*out == nullptr) {
+    delete capsule;
+    RETURN_IF_PYERROR();
+  }
+  return Status::OK();
+}
+
+Status SetNdarrayBase(PyArrayObject* arr, PyObject* base) {
+  if (PyArray_SetBaseObject(arr, base) == -1) {
+    // Error occurred, trust that SetBaseObject sets the error state
+    Py_XDECREF(base);
+    RETURN_IF_PYERROR();
+  }
+  return Status::OK();
+}
+
+Status SetBufferBase(PyArrayObject* arr, const std::shared_ptr<Buffer>& buffer) {
+  PyObject* base;
+  RETURN_NOT_OK(CapsulizeBuffer(buffer, &base));
+  return SetNdarrayBase(arr, base);
+}
+
+inline void set_numpy_metadata(int type, const DataType* datatype, PyArray_Descr* out) {
+  auto metadata = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(out->c_metadata);
+  if (type == NPY_DATETIME) {
+    if (datatype->id() == Type::TIMESTAMP) {
+      const auto& timestamp_type = checked_cast<const TimestampType&>(*datatype);
+      metadata->meta.base = internal::NumPyFrequency(timestamp_type.unit());
+    } else {
+      DCHECK(false) << "NPY_DATETIME views only supported for Arrow TIMESTAMP types";
+    }
+  } else if (type == NPY_TIMEDELTA) {
+    DCHECK_EQ(datatype->id(), Type::DURATION);
+    const auto& duration_type = checked_cast<const DurationType&>(*datatype);
+    metadata->meta.base = internal::NumPyFrequency(duration_type.unit());
+  }
+}
+
+Status PyArray_NewFromPool(int nd, npy_intp* dims, PyArray_Descr* descr, MemoryPool* pool,
+                           PyObject** out) {
+  // ARROW-6570: Allocate memory from MemoryPool for a couple reasons
+  //
+  // * Track allocations
+  // * Get better performance through custom allocators
+  int64_t total_size = descr->elsize;
+  for (int i = 0; i < nd; ++i) {
+    total_size *= dims[i];
+  }
+
+  ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(total_size, pool));
+  *out = PyArray_NewFromDescr(&PyArray_Type, descr, nd, dims,
+                              /*strides=*/nullptr,
+                              /*data=*/buffer->mutable_data(),
+                              /*flags=*/NPY_ARRAY_CARRAY | NPY_ARRAY_WRITEABLE,
+                              /*obj=*/nullptr);
+  if (*out == nullptr) {
+    RETURN_IF_PYERROR();
+    // Trust that error set if NULL returned
+  }
+  return SetBufferBase(reinterpret_cast<PyArrayObject*>(*out), std::move(buffer));
+}
+
+template <typename T = void>
+inline const T* GetPrimitiveValues(const Array& arr) {
+  if (arr.length() == 0) {
+    return nullptr;
+  }
+  const int elsize = arr.type()->byte_width();
+  const auto& prim_arr = checked_cast<const PrimitiveArray&>(arr);
+  return reinterpret_cast<const T*>(prim_arr.values()->data() + arr.offset() * elsize);
+}
+
+Status MakeNumPyView(std::shared_ptr<Array> arr, PyObject* py_ref, int npy_type, int ndim,
+                     npy_intp* dims, PyObject** out) {
+  PyAcquireGIL lock;
+
+  PyArray_Descr* descr = internal::GetSafeNumPyDtype(npy_type);
+  set_numpy_metadata(npy_type, arr->type().get(), descr);
+  PyObject* result = PyArray_NewFromDescr(
+      &PyArray_Type, descr, ndim, dims, /*strides=*/nullptr,
+      const_cast<void*>(GetPrimitiveValues(*arr)), /*flags=*/0, nullptr);
+  PyArrayObject* np_arr = reinterpret_cast<PyArrayObject*>(result);
+  if (np_arr == nullptr) {
+    // Error occurred, trust that error set
+    return Status::OK();
+  }
+
+  PyObject* base;
+  if (py_ref == nullptr) {
+    // Capsule will be owned by the ndarray, no incref necessary. See
+    // ARROW-1973
+    RETURN_NOT_OK(CapsulizeArray(arr, &base));
+  } else {
+    Py_INCREF(py_ref);
+    base = py_ref;
+  }
+  RETURN_NOT_OK(SetNdarrayBase(np_arr, base));
+
+  // Do not allow Arrow data to be mutated
+  PyArray_CLEARFLAGS(np_arr, NPY_ARRAY_WRITEABLE);
+  *out = result;
+  return Status::OK();
+}
+
+class PandasWriter {
+ public:
+  enum type {
+    OBJECT,
+    UINT8,
+    INT8,
+    UINT16,
+    INT16,
+    UINT32,
+    INT32,
+    UINT64,
+    INT64,
+    HALF_FLOAT,
+    FLOAT,
+    DOUBLE,
+    BOOL,
+    DATETIME_DAY,
+    DATETIME_SECOND,
+    DATETIME_MILLI,
+    DATETIME_MICRO,
+    DATETIME_NANO,
+    DATETIME_SECOND_TZ,
+    DATETIME_MILLI_TZ,
+    DATETIME_MICRO_TZ,
+    DATETIME_NANO_TZ,
+    TIMEDELTA_SECOND,
+    TIMEDELTA_MILLI,
+    TIMEDELTA_MICRO,
+    TIMEDELTA_NANO,
+    CATEGORICAL,
+    EXTENSION
+  };
+
+  PandasWriter(const PandasOptions& options, int64_t num_rows, int num_columns)
+      : options_(options), num_rows_(num_rows), num_columns_(num_columns) {
+    PyAcquireGIL lock;
+    internal::InitPandasStaticData();
+  }
+  virtual ~PandasWriter() {}
+
+  void SetBlockData(PyObject* arr) {
+    block_arr_.reset(arr);
+    block_data_ =
+        reinterpret_cast<uint8_t*>(PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  }
+
+  /// \brief Either copy or wrap single array to create pandas-compatible array
+  /// for Series or DataFrame. num_columns_ can only be 1. Will try to zero
+  /// copy if possible (or error if not possible and zero_copy_only=True)
+  virtual Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) = 0;
+
+  /// \brief Copy ChunkedArray into a multi-column block
+  virtual Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) = 0;
+
+  Status EnsurePlacementAllocated() {
+    std::lock_guard<std::mutex> guard(allocation_lock_);
+    if (placement_data_ != nullptr) {
+      return Status::OK();
+    }
+    PyAcquireGIL lock;
+    npy_intp placement_dims[1] = {num_columns_};
+    PyObject* placement_arr = PyArray_SimpleNew(1, placement_dims, NPY_INT64);
+    RETURN_IF_PYERROR();
+    placement_arr_.reset(placement_arr);
+    placement_data_ = reinterpret_cast<int64_t*>(
+        PyArray_DATA(reinterpret_cast<PyArrayObject*>(placement_arr)));
+    return Status::OK();
+  }
+
+  Status EnsureAllocated() {
+    std::lock_guard<std::mutex> guard(allocation_lock_);
+    if (block_data_ != nullptr) {
+      return Status::OK();
+    }
+    RETURN_NOT_OK(Allocate());
+    return Status::OK();
+  }
+
+  virtual bool CanZeroCopy(const ChunkedArray& data) const { return false; }
+
+  virtual Status Write(std::shared_ptr<ChunkedArray> data, int64_t abs_placement,
+                       int64_t rel_placement) {
+    RETURN_NOT_OK(EnsurePlacementAllocated());
+    if (num_columns_ == 1 && options_.allow_zero_copy_blocks) {
+      RETURN_NOT_OK(TransferSingle(data, /*py_ref=*/nullptr));
+    } else {
+      RETURN_NOT_OK(
+          CheckNoZeroCopy("Cannot do zero copy conversion into "
+                          "multi-column DataFrame block"));
+      RETURN_NOT_OK(EnsureAllocated());
+      RETURN_NOT_OK(CopyInto(data, rel_placement));
+    }
+    placement_data_[rel_placement] = abs_placement;
+    return Status::OK();
+  }
+
+  virtual Status GetDataFrameResult(PyObject** out) {
+    PyObject* result = PyDict_New();
+    RETURN_IF_PYERROR();
+
+    PyObject* block;
+    RETURN_NOT_OK(GetResultBlock(&block));
+
+    PyDict_SetItemString(result, "block", block);
+    PyDict_SetItemString(result, "placement", placement_arr_.obj());
+
+    RETURN_NOT_OK(AddResultMetadata(result));
+    *out = result;
+    return Status::OK();
+  }
+
+  // Caller steals the reference to this object
+  virtual Status GetSeriesResult(PyObject** out) {
+    RETURN_NOT_OK(MakeBlock1D());
+    // Caller owns the object now
+    *out = block_arr_.detach();
+    return Status::OK();
+  }
+
+ protected:
+  virtual Status AddResultMetadata(PyObject* result) { return Status::OK(); }
+
+  Status MakeBlock1D() {
+    // For Series or for certain DataFrame block types, we need to shape to a
+    // 1D array when there is only one column
+    PyAcquireGIL lock;
+
+    DCHECK_EQ(1, num_columns_);
+
+    npy_intp new_dims[1] = {static_cast<npy_intp>(num_rows_)};
+    PyArray_Dims dims;
+    dims.ptr = new_dims;
+    dims.len = 1;
+
+    PyObject* reshaped = PyArray_Newshape(
+        reinterpret_cast<PyArrayObject*>(block_arr_.obj()), &dims, NPY_ANYORDER);
+    RETURN_IF_PYERROR();
+
+    // ARROW-8801: Here a PyArrayObject is created that is not being managed by
+    // any OwnedRef object. This object is then put in the resulting object
+    // with PyDict_SetItemString, which increments the reference count, so a
+    // memory leak ensues. There are several ways to fix the memory leak but a
+    // simple one is to put the reshaped 1D block array in this OwnedRefNoGIL
+    // so it will be correctly decref'd when this class is destructed.
+    block_arr_.reset(reshaped);
+    return Status::OK();
+  }
+
+  virtual Status GetResultBlock(PyObject** out) {
+    *out = block_arr_.obj();
+    return Status::OK();
+  }
+
+  Status CheckNoZeroCopy(const std::string& message) {
+    if (options_.zero_copy_only) {
+      return Status::Invalid(message);
+    }
+    return Status::OK();
+  }
+
+  Status CheckNotZeroCopyOnly(const ChunkedArray& data) {
+    if (options_.zero_copy_only) {
+      return Status::Invalid("Needed to copy ", data.num_chunks(), " chunks with ",
+                             data.null_count(), " nulls, but zero_copy_only was True");
+    }
+    return Status::OK();
+  }
+
+  virtual Status Allocate() {
+    return Status::NotImplemented("Override Allocate in subclasses");
+  }
+
+  Status AllocateNDArray(int npy_type, int ndim = 2) {
+    PyAcquireGIL lock;
+
+    PyObject* block_arr = nullptr;
+    npy_intp block_dims[2] = {0, 0};
+
+    if (ndim == 2) {
+      block_dims[0] = num_columns_;
+      block_dims[1] = num_rows_;
+    } else {
+      block_dims[0] = num_rows_;
+    }
+    PyArray_Descr* descr = internal::GetSafeNumPyDtype(npy_type);
+    if (PyDataType_REFCHK(descr)) {
+      // ARROW-6876: if the array has refcounted items, let Numpy
+      // own the array memory so as to decref elements on array destruction
+      block_arr = PyArray_SimpleNewFromDescr(ndim, block_dims, descr);
+      RETURN_IF_PYERROR();
+    } else {
+      RETURN_NOT_OK(
+          PyArray_NewFromPool(ndim, block_dims, descr, options_.pool, &block_arr));
+    }
+
+    SetBlockData(block_arr);
+    return Status::OK();
+  }
+
+  void SetDatetimeUnit(NPY_DATETIMEUNIT unit) {
+    PyAcquireGIL lock;
+    auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(
+        PyArray_DESCR(reinterpret_cast<PyArrayObject*>(block_arr_.obj()))->c_metadata);
+    date_dtype->meta.base = unit;
+  }
+
+  PandasOptions options_;
+
+  std::mutex allocation_lock_;
+
+  int64_t num_rows_;
+  int num_columns_;
+
+  OwnedRefNoGIL block_arr_;
+  uint8_t* block_data_ = nullptr;
+
+  // ndarray<int32>
+  OwnedRefNoGIL placement_arr_;
+  int64_t* placement_data_ = nullptr;
+
+ private:
+  ARROW_DISALLOW_COPY_AND_ASSIGN(PandasWriter);
+};
+
+template <typename InType, typename OutType>
+inline void ConvertIntegerWithNulls(const PandasOptions& options,
+                                    const ChunkedArray& data, OutType* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const InType* in_values = GetPrimitiveValues<InType>(arr);
+    // Upcast to double, set NaN as appropriate
+
+    for (int i = 0; i < arr.length(); ++i) {
+      *out_values++ =
+          arr.IsNull(i) ? static_cast<OutType>(NAN) : static_cast<OutType>(in_values[i]);
+    }
+  }
+}
+
+template <typename T>
+inline void ConvertIntegerNoNullsSameType(const PandasOptions& options,
+                                          const ChunkedArray& data, T* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    if (arr.length() > 0) {
+      const T* in_values = GetPrimitiveValues<T>(arr);
+      memcpy(out_values, in_values, sizeof(T) * arr.length());
+      out_values += arr.length();
+    }
+  }
+}
+
+template <typename InType, typename OutType>
+inline void ConvertIntegerNoNullsCast(const PandasOptions& options,
+                                      const ChunkedArray& data, OutType* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const InType* in_values = GetPrimitiveValues<InType>(arr);
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values = in_values[i];
+    }
+  }
+}
+
+template <typename T, typename Enable = void>
+struct MemoizationTraits {
+  using Scalar = typename T::c_type;
+};
+
+template <typename T>
+struct MemoizationTraits<T, enable_if_has_string_view<T>> {
+  // For binary, we memoize string_view as a scalar value to avoid having to
+  // unnecessarily copy the memory into the memo table data structure
+  using Scalar = std::string_view;
+};
+
+// Generic Array -> PyObject** converter that handles object deduplication, if
+// requested
+template <typename Type, typename WrapFunction>
+inline Status ConvertAsPyObjects(const PandasOptions& options, const ChunkedArray& data,
+                                 WrapFunction&& wrap_func, PyObject** out_values) {
+  using ArrayType = typename TypeTraits<Type>::ArrayType;
+  using Scalar = typename MemoizationTraits<Type>::Scalar;
+
+  ::arrow::internal::ScalarMemoTable<Scalar> memo_table(options.pool);
+  std::vector<PyObject*> unique_values;
+  int32_t memo_size = 0;
+
+  auto WrapMemoized = [&](const Scalar& value, PyObject** out_values) {
+    int32_t memo_index;
+    RETURN_NOT_OK(memo_table.GetOrInsert(value, &memo_index));
+    if (memo_index == memo_size) {
+      // New entry
+      RETURN_NOT_OK(wrap_func(value, out_values));
+      unique_values.push_back(*out_values);
+      ++memo_size;
+    } else {
+      // Duplicate entry
+      Py_INCREF(unique_values[memo_index]);
+      *out_values = unique_values[memo_index];
+    }
+    return Status::OK();
+  };
+
+  auto WrapUnmemoized = [&](const Scalar& value, PyObject** out_values) {
+    return wrap_func(value, out_values);
+  };
+
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = arrow::internal::checked_cast<const ArrayType&>(*data.chunk(c));
+    if (options.deduplicate_objects) {
+      RETURN_NOT_OK(internal::WriteArrayObjects(arr, WrapMemoized, out_values));
+    } else {
+      RETURN_NOT_OK(internal::WriteArrayObjects(arr, WrapUnmemoized, out_values));
+    }
+    out_values += arr.length();
+  }
+  return Status::OK();
+}
+
+Status ConvertStruct(PandasOptions options, const ChunkedArray& data,
+                     PyObject** out_values) {
+  if (data.num_chunks() == 0) {
+    return Status::OK();
+  }
+  // ChunkedArray has at least one chunk
+  auto arr = checked_cast<const StructArray*>(data.chunk(0).get());
+  // Use it to cache the struct type and number of fields for all chunks
+  int32_t num_fields = arr->num_fields();
+  auto array_type = arr->type();
+  std::vector<OwnedRef> fields_data(num_fields * data.num_chunks());
+  OwnedRef dict_item;
+
+  // See notes in MakeInnerOptions.
+  options = MakeInnerOptions(std::move(options));
+  // Don't blindly convert because timestamps in lists are handled differently.
+  options.timestamp_as_object = true;
+
+  for (int c = 0; c < data.num_chunks(); c++) {
+    auto fields_data_offset = c * num_fields;
+    auto arr = checked_cast<const StructArray*>(data.chunk(c).get());
+    // Convert the struct arrays first
+    for (int32_t i = 0; i < num_fields; i++) {
+      auto field = arr->field(static_cast<int>(i));
+      // In case the field is an extension array, use .storage() to convert to Pandas
+      if (field->type()->id() == Type::EXTENSION) {
+        const ExtensionArray& arr_ext = checked_cast<const ExtensionArray&>(*field);
+        field = arr_ext.storage();
+      }
+      RETURN_NOT_OK(ConvertArrayToPandas(options, field, nullptr,
+                                         fields_data[i + fields_data_offset].ref()));
+      DCHECK(PyArray_Check(fields_data[i + fields_data_offset].obj()));
+    }
+
+    // Construct a dictionary for each row
+    const bool has_nulls = data.null_count() > 0;
+    for (int64_t i = 0; i < arr->length(); ++i) {
+      if (has_nulls && arr->IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+      } else {
+        // Build the new dict object for the row
+        dict_item.reset(PyDict_New());
+        RETURN_IF_PYERROR();
+        for (int32_t field_idx = 0; field_idx < num_fields; ++field_idx) {
+          OwnedRef field_value;
+          auto name = array_type->field(static_cast<int>(field_idx))->name();
+          if (!arr->field(static_cast<int>(field_idx))->IsNull(i)) {
+            // Value exists in child array, obtain it
+            auto array = reinterpret_cast<PyArrayObject*>(
+                fields_data[field_idx + fields_data_offset].obj());
+            auto ptr = reinterpret_cast<const char*>(PyArray_GETPTR1(array, i));
+            field_value.reset(PyArray_GETITEM(array, ptr));
+            RETURN_IF_PYERROR();
+          } else {
+            // Translate the Null to a None
+            Py_INCREF(Py_None);
+            field_value.reset(Py_None);
+          }
+          // PyDict_SetItemString increments reference count
+          auto setitem_result =
+              PyDict_SetItemString(dict_item.obj(), name.c_str(), field_value.obj());
+          RETURN_IF_PYERROR();
+          DCHECK_EQ(setitem_result, 0);
+        }
+        *out_values = dict_item.obj();
+        // Grant ownership to the resulting array
+        Py_INCREF(*out_values);
+      }
+      ++out_values;
+    }
+  }
+  return Status::OK();
+}
+
+Status DecodeDictionaries(MemoryPool* pool, const std::shared_ptr<DataType>& dense_type,
+                          ArrayVector* arrays) {
+  compute::ExecContext ctx(pool);
+  compute::CastOptions options;
+  for (size_t i = 0; i < arrays->size(); ++i) {
+    ARROW_ASSIGN_OR_RAISE((*arrays)[i],
+                          compute::Cast(*(*arrays)[i], dense_type, options, &ctx));
+  }
+  return Status::OK();
+}
+
+Status DecodeDictionaries(MemoryPool* pool, const std::shared_ptr<DataType>& dense_type,
+                          std::shared_ptr<ChunkedArray>* array) {
+  auto chunks = (*array)->chunks();
+  RETURN_NOT_OK(DecodeDictionaries(pool, dense_type, &chunks));
+  *array = std::make_shared<ChunkedArray>(std::move(chunks), dense_type);
+  return Status::OK();
+}
+
+template <typename ListArrayT>
+Status ConvertListsLike(PandasOptions options, const ChunkedArray& data,
+                        PyObject** out_values) {
+  // Get column of underlying value arrays
+  ArrayVector value_arrays;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
+    // values() does not account for offsets, so we need to slice into it.
+    // We can't use Flatten(), because it removes the values behind a null list
+    // value, and that makes the offsets into original list values and our
+    // flattened_values array different.
+    std::shared_ptr<Array> flattened_values = arr.values()->Slice(
+        arr.value_offset(0), arr.value_offset(arr.length()) - arr.value_offset(0));
+    if (arr.value_type()->id() == Type::EXTENSION) {
+      const auto& arr_ext = checked_cast<const ExtensionArray&>(*flattened_values);
+      value_arrays.emplace_back(arr_ext.storage());
+    } else {
+      value_arrays.emplace_back(flattened_values);
+    }
+  }
+
+  using ListArrayType = typename ListArrayT::TypeClass;
+  const auto& list_type = checked_cast<const ListArrayType&>(*data.type());
+  auto value_type = list_type.value_type();
+  if (value_type->id() == Type::EXTENSION) {
+    value_type = checked_cast<const ExtensionType&>(*value_type).storage_type();
+  }
+
+  auto flat_column = std::make_shared<ChunkedArray>(value_arrays, value_type);
+
+  options = MakeInnerOptions(std::move(options));
+
+  OwnedRefNoGIL owned_numpy_array;
+  RETURN_NOT_OK(ConvertChunkedArrayToPandas(options, flat_column, nullptr,
+                                            owned_numpy_array.ref()));
+  PyObject* numpy_array = owned_numpy_array.obj();
+  DCHECK(PyArray_Check(numpy_array));
+
+  int64_t chunk_offset = 0;
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = checked_cast<const ListArrayT&>(*data.chunk(c));
+    const bool has_nulls = data.null_count() > 0;
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      if (has_nulls && arr.IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+      } else {
+        // Need to subtract value_offset(0) since the original chunk might be a slice
+        // into another array.
+        OwnedRef start(PyLong_FromLongLong(arr.value_offset(i) + chunk_offset -
+                                           arr.value_offset(0)));
+        OwnedRef end(PyLong_FromLongLong(arr.value_offset(i + 1) + chunk_offset -
+                                         arr.value_offset(0)));
+        OwnedRef slice(PySlice_New(start.obj(), end.obj(), nullptr));
+
+        if (ARROW_PREDICT_FALSE(slice.obj() == nullptr)) {
+          // Fall out of loop, will return from RETURN_IF_PYERROR
+          break;
+        }
+        *out_values = PyObject_GetItem(numpy_array, slice.obj());
+
+        if (*out_values == nullptr) {
+          // Fall out of loop, will return from RETURN_IF_PYERROR
+          break;
+        }
+      }
+      ++out_values;
+    }
+    RETURN_IF_PYERROR();
+
+    chunk_offset += arr.value_offset(arr.length()) - arr.value_offset(0);
+  }
+
+  return Status::OK();
+}
+
+template <typename F1, typename F2, typename F3>
+Status ConvertMapHelper(F1 resetRow, F2 addPairToRow, F3 stealRow,
+                        const ChunkedArray& data, PyArrayObject* py_keys,
+                        PyArrayObject* py_items,
+                        // needed for null checks in items
+                        const std::vector<std::shared_ptr<Array>> item_arrays,
+                        PyObject** out_values) {
+  OwnedRef key_value;
+  OwnedRef item_value;
+
+  int64_t chunk_offset = 0;
+  for (int c = 0; c < data.num_chunks(); ++c) {
+    const auto& arr = checked_cast<const MapArray&>(*data.chunk(c));
+    const bool has_nulls = data.null_count() > 0;
+
+    // Make a list of key/item pairs for each row in array
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      if (has_nulls && arr.IsNull(i)) {
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+      } else {
+        int64_t entry_offset = arr.value_offset(i);
+        int64_t num_pairs = arr.value_offset(i + 1) - entry_offset;
+
+        // Build the new list object for the row of Python pairs
+        RETURN_NOT_OK(resetRow(num_pairs));
+
+        // Add each key/item pair in the row
+        for (int64_t j = 0; j < num_pairs; ++j) {
+          // Get key value, key is non-nullable for a valid row
+          auto ptr_key = reinterpret_cast<const char*>(
+              PyArray_GETPTR1(py_keys, chunk_offset + entry_offset + j));
+          key_value.reset(PyArray_GETITEM(py_keys, ptr_key));
+          RETURN_IF_PYERROR();
+
+          if (item_arrays[c]->IsNull(entry_offset + j)) {
+            // Translate the Null to a None
+            Py_INCREF(Py_None);
+            item_value.reset(Py_None);
+          } else {
+            // Get valid value from item array
+            auto ptr_item = reinterpret_cast<const char*>(
+                PyArray_GETPTR1(py_items, chunk_offset + entry_offset + j));
+            item_value.reset(PyArray_GETITEM(py_items, ptr_item));
+            RETURN_IF_PYERROR();
+          }
+
+          // Add the key/item pair to the row
+          RETURN_NOT_OK(addPairToRow(j, key_value, item_value));
+        }
+
+        // Pass ownership to the resulting array
+        *out_values = stealRow();
+      }
+      ++out_values;
+    }
+    RETURN_IF_PYERROR();
+
+    chunk_offset += arr.values()->length();
+  }
+
+  return Status::OK();
+}
+
+// A more helpful error message around TypeErrors that may stem from unhashable keys
+Status CheckMapAsPydictsTypeError() {
+  if (ARROW_PREDICT_TRUE(!PyErr_Occurred())) {
+    return Status::OK();
+  }
+  if (PyErr_ExceptionMatches(PyExc_TypeError)) {
+    // Modify the error string directly, so it is re-raised
+    // with our additional info.
+    //
+    // There are not many interesting things happening when this
+    // is hit. This is intended to only be called directly after
+    // PyDict_SetItem, where a finite set of errors could occur.
+    PyObject *type, *value, *traceback;
+    PyErr_Fetch(&type, &value, &traceback);
+    std::string message;
+    RETURN_NOT_OK(internal::PyObject_StdStringStr(value, &message));
+    message +=
+        ". If keys are not hashable, then you must use the option "
+        "[maps_as_pydicts=None (default)]";
+
+    // resets the error
+    PyErr_SetString(PyExc_TypeError, message.c_str());
+  }
+  return ConvertPyError();
+}
+
+Status CheckForDuplicateKeys(bool error_on_duplicate_keys, Py_ssize_t total_dict_len,
+                             Py_ssize_t total_raw_len) {
+  if (total_dict_len < total_raw_len) {
+    const char* message =
+        "[maps_as_pydicts] "
+        "After conversion of Arrow maps to pydicts, "
+        "detected data loss due to duplicate keys. "
+        "Original input length is [%lld], total converted pydict length is [%lld].";
+    std::array<char, 256> buf;
+    std::snprintf(buf.data(), buf.size(), message, total_raw_len, total_dict_len);
+
+    if (error_on_duplicate_keys) {
+      return Status::UnknownError(buf.data());
+    } else {
+      ARROW_LOG(WARNING) << buf.data();
+    }
+  }
+  return Status::OK();
+}
+
+Status ConvertMap(PandasOptions options, const ChunkedArray& data,
+                  PyObject** out_values) {
+  // Get columns of underlying key/item arrays
+  std::vector<std::shared_ptr<Array>> key_arrays;
+  std::vector<std::shared_ptr<Array>> item_arrays;
+  for (int c = 0; c < data.num_chunks(); ++c) {
+    const auto& map_arr = checked_cast<const MapArray&>(*data.chunk(c));
+    key_arrays.emplace_back(map_arr.keys());
+    item_arrays.emplace_back(map_arr.items());
+  }
+
+  const auto& map_type = checked_cast<const MapType&>(*data.type());
+  auto key_type = map_type.key_type();
+  auto item_type = map_type.item_type();
+
+  // ARROW-6899: Convert dictionary-encoded children to dense instead of
+  // failing below. A more efficient conversion than this could be done later
+  if (key_type->id() == Type::DICTIONARY) {
+    auto dense_type = checked_cast<const DictionaryType&>(*key_type).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &key_arrays));
+    key_type = dense_type;
+  }
+  if (item_type->id() == Type::DICTIONARY) {
+    auto dense_type = checked_cast<const DictionaryType&>(*item_type).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &item_arrays));
+    item_type = dense_type;
+  }
+
+  // See notes in MakeInnerOptions.
+  options = MakeInnerOptions(std::move(options));
+  // Don't blindly convert because timestamps in lists are handled differently.
+  options.timestamp_as_object = true;
+
+  auto flat_keys = std::make_shared<ChunkedArray>(key_arrays, key_type);
+  auto flat_items = std::make_shared<ChunkedArray>(item_arrays, item_type);
+  OwnedRefNoGIL owned_numpy_keys;
+  RETURN_NOT_OK(
+      ConvertChunkedArrayToPandas(options, flat_keys, nullptr, owned_numpy_keys.ref()));
+  OwnedRefNoGIL owned_numpy_items;
+  RETURN_NOT_OK(
+      ConvertChunkedArrayToPandas(options, flat_items, nullptr, owned_numpy_items.ref()));
+  PyArrayObject* py_keys = reinterpret_cast<PyArrayObject*>(owned_numpy_keys.obj());
+  PyArrayObject* py_items = reinterpret_cast<PyArrayObject*>(owned_numpy_items.obj());
+
+  if (options.maps_as_pydicts == MapConversionType::DEFAULT) {
+    // The default behavior to express an Arrow MAP as a list of [(key, value), ...] pairs
+    OwnedRef list_item;
+    return ConvertMapHelper(
+        [&list_item](int64_t num_pairs) {
+          list_item.reset(PyList_New(num_pairs));
+          return CheckPyError();
+        },
+        [&list_item](int64_t idx, OwnedRef& key_value, OwnedRef& item_value) {
+          PyList_SET_ITEM(list_item.obj(), idx,
+                          PyTuple_Pack(2, key_value.obj(), item_value.obj()));
+          return CheckPyError();
+        },
+        [&list_item] { return list_item.detach(); }, data, py_keys, py_items, item_arrays,
+        out_values);
+  } else {
+    // Use a native pydict
+    OwnedRef dict_item;
+    Py_ssize_t total_dict_len{0};
+    Py_ssize_t total_raw_len{0};
+
+    bool error_on_duplicate_keys;
+    if (options.maps_as_pydicts == MapConversionType::LOSSY) {
+      error_on_duplicate_keys = false;
+    } else if (options.maps_as_pydicts == MapConversionType::STRICT_) {
+      error_on_duplicate_keys = true;
+    } else {
+      auto val = std::underlying_type_t<MapConversionType>(options.maps_as_pydicts);
+      return Status::UnknownError("Received unknown option for maps_as_pydicts: " +
+                                  std::to_string(val));
+    }
+
+    auto status = ConvertMapHelper(
+        [&dict_item, &total_raw_len](int64_t num_pairs) {
+          total_raw_len += num_pairs;
+          dict_item.reset(PyDict_New());
+          return CheckPyError();
+        },
+        [&dict_item]([[maybe_unused]] int64_t idx, OwnedRef& key_value,
+                     OwnedRef& item_value) {
+          auto setitem_result =
+              PyDict_SetItem(dict_item.obj(), key_value.obj(), item_value.obj());
+          ARROW_RETURN_NOT_OK(CheckMapAsPydictsTypeError());
+          // returns -1 if there are internal errors around hashing/resizing
+          return setitem_result == 0 ? Status::OK()
+                                     : Status::UnknownError(
+                                           "[maps_as_pydicts] "
+                                           "Unexpected failure inserting Arrow (key, "
+                                           "value) pair into Python dict");
+        },
+        [&dict_item, &total_dict_len] {
+          total_dict_len += PyDict_Size(dict_item.obj());
+          return dict_item.detach();
+        },
+        data, py_keys, py_items, item_arrays, out_values);
+
+    ARROW_RETURN_NOT_OK(status);
+    // If there were no errors generating the pydicts,
+    // then check if we detected any data loss from duplicate keys.
+    return CheckForDuplicateKeys(error_on_duplicate_keys, total_dict_len, total_raw_len);
+  }
+}
+
+template <typename InType, typename OutType>
+inline void ConvertNumericNullable(const ChunkedArray& data, InType na_value,
+                                   OutType* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const InType* in_values = GetPrimitiveValues<InType>(arr);
+
+    if (arr.null_count() > 0) {
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        *out_values++ = arr.IsNull(i) ? na_value : in_values[i];
+      }
+    } else {
+      memcpy(out_values, in_values, sizeof(InType) * arr.length());
+      out_values += arr.length();
+    }
+  }
+}
+
+template <typename InType, typename OutType>
+inline void ConvertNumericNullableCast(const ChunkedArray& data, InType na_value,
+                                       OutType* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const InType* in_values = GetPrimitiveValues<InType>(arr);
+
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? static_cast<OutType>(na_value)
+                                    : static_cast<OutType>(in_values[i]);
+    }
+  }
+}
+
+template <int NPY_TYPE>
+class TypedPandasWriter : public PandasWriter {
+ public:
+  using T = typename npy_traits<NPY_TYPE>::value_type;
+
+  using PandasWriter::PandasWriter;
+
+  Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) override {
+    if (CanZeroCopy(*data)) {
+      PyObject* wrapped;
+      npy_intp dims[2] = {static_cast<npy_intp>(num_columns_),
+                          static_cast<npy_intp>(num_rows_)};
+      RETURN_NOT_OK(
+          MakeNumPyView(data->chunk(0), py_ref, NPY_TYPE, /*ndim=*/2, dims, &wrapped));
+      SetBlockData(wrapped);
+      return Status::OK();
+    } else {
+      RETURN_NOT_OK(CheckNotZeroCopyOnly(*data));
+      RETURN_NOT_OK(EnsureAllocated());
+      return CopyInto(data, /*rel_placement=*/0);
+    }
+  }
+
+  Status CheckTypeExact(const DataType& type, Type::type expected) {
+    if (type.id() != expected) {
+      // TODO(wesm): stringify NumPy / pandas type
+      return Status::NotImplemented("Cannot write Arrow data of type ", type.ToString());
+    }
+    return Status::OK();
+  }
+
+  T* GetBlockColumnStart(int64_t rel_placement) {
+    return reinterpret_cast<T*>(block_data_) + rel_placement * num_rows_;
+  }
+
+ protected:
+  Status Allocate() override { return AllocateNDArray(NPY_TYPE); }
+};
+
+struct ObjectWriterVisitor {
+  const PandasOptions& options;
+  const ChunkedArray& data;
+  PyObject** out_values;
+
+  Status Visit(const NullType& type) {
+    for (int c = 0; c < data.num_chunks(); c++) {
+      std::shared_ptr<Array> arr = data.chunk(c);
+
+      for (int64_t i = 0; i < arr->length(); ++i) {
+        // All values are null
+        Py_INCREF(Py_None);
+        *out_values = Py_None;
+        ++out_values;
+      }
+    }
+    return Status::OK();
+  }
+
+  Status Visit(const BooleanType& type) {
+    for (int c = 0; c < data.num_chunks(); c++) {
+      const auto& arr = checked_cast<const BooleanArray&>(*data.chunk(c));
+
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        if (arr.IsNull(i)) {
+          Py_INCREF(Py_None);
+          *out_values++ = Py_None;
+        } else if (arr.Value(i)) {
+          // True
+          Py_INCREF(Py_True);
+          *out_values++ = Py_True;
+        } else {
+          // False
+          Py_INCREF(Py_False);
+          *out_values++ = Py_False;
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_integer<Type, Status> Visit(const Type& type) {
+    using T = typename Type::c_type;
+    auto WrapValue = [](T value, PyObject** out) {
+      *out = std::is_signed<T>::value ? PyLong_FromLongLong(value)
+                                      : PyLong_FromUnsignedLongLong(value);
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    };
+    return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+  }
+
+  template <typename Type>
+  enable_if_t<is_base_binary_type<Type>::value || is_fixed_size_binary_type<Type>::value,
+              Status>
+  Visit(const Type& type) {
+    auto WrapValue = [](const std::string_view& view, PyObject** out) {
+      *out = WrapBytes<Type>::Wrap(view.data(), view.length());
+      if (*out == nullptr) {
+        PyErr_Clear();
+        return Status::UnknownError("Wrapping ", view, " failed");
+      }
+      return Status::OK();
+    };
+    return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+  }
+
+  template <typename Type>
+  enable_if_date<Type, Status> Visit(const Type& type) {
+    auto WrapValue = [](typename Type::c_type value, PyObject** out) {
+      RETURN_NOT_OK(internal::PyDate_from_int(value, Type::UNIT, out));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    };
+    return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+  }
+
+  template <typename Type>
+  enable_if_time<Type, Status> Visit(const Type& type) {
+    const TimeUnit::type unit = type.unit();
+    auto WrapValue = [unit](typename Type::c_type value, PyObject** out) {
+      RETURN_NOT_OK(internal::PyTime_from_int(value, unit, out));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    };
+    return ConvertAsPyObjects<Type>(options, data, WrapValue, out_values);
+  }
+
+  template <typename Type>
+  enable_if_timestamp<Type, Status> Visit(const Type& type) {
+    const TimeUnit::type unit = type.unit();
+    OwnedRef tzinfo;
+
+    auto ConvertTimezoneNaive = [&](typename Type::c_type value, PyObject** out) {
+      RETURN_NOT_OK(internal::PyDateTime_from_int(value, unit, out));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    };
+    auto ConvertTimezoneAware = [&](typename Type::c_type value, PyObject** out) {
+      PyObject* naive_datetime;
+      RETURN_NOT_OK(ConvertTimezoneNaive(value, &naive_datetime));
+
+      // convert the timezone naive datetime object to timezone aware
+      // two step conversion of the datetime mimics Python's code:
+      // dt.replace(tzinfo=datetime.timezone.utc).astimezone(tzinfo)
+      // first step: replacing timezone with timezone.utc (replace method)
+      OwnedRef args(PyTuple_New(0));
+      OwnedRef keywords(PyDict_New());
+      PyDict_SetItemString(keywords.obj(), "tzinfo", PyDateTime_TimeZone_UTC);
+      OwnedRef naive_datetime_replace(PyObject_GetAttrString(naive_datetime, "replace"));
+      OwnedRef datetime_utc(
+          PyObject_Call(naive_datetime_replace.obj(), args.obj(), keywords.obj()));
+      // second step: adjust the datetime to tzinfo timezone (astimezone method)
+      *out = PyObject_CallMethod(datetime_utc.obj(), "astimezone", "O", tzinfo.obj());
+
+      // the timezone naive object is no longer required
+      Py_DECREF(naive_datetime);
+      RETURN_IF_PYERROR();
+
+      return Status::OK();
+    };
+
+    if (!type.timezone().empty() && !options.ignore_timezone) {
+      // convert timezone aware
+      PyObject* tzobj;
+      ARROW_ASSIGN_OR_RAISE(tzobj, internal::StringToTzinfo(type.timezone()));
+      tzinfo.reset(tzobj);
+      RETURN_IF_PYERROR();
+      RETURN_NOT_OK(
+          ConvertAsPyObjects<Type>(options, data, ConvertTimezoneAware, out_values));
+    } else {
+      // convert timezone naive
+      RETURN_NOT_OK(
+          ConvertAsPyObjects<Type>(options, data, ConvertTimezoneNaive, out_values));
+    }
+
+    return Status::OK();
+  }
+
+  template <typename Type>
+  enable_if_t<std::is_same<Type, MonthDayNanoIntervalType>::value, Status> Visit(
+      const Type& type) {
+    OwnedRef args(PyTuple_New(0));
+    OwnedRef kwargs(PyDict_New());
+    RETURN_IF_PYERROR();
+    auto to_date_offset = [&](const MonthDayNanoIntervalType::MonthDayNanos& interval,
+                              PyObject** out) {
+      DCHECK(internal::BorrowPandasDataOffsetType() != nullptr);
+      // DateOffset objects do not add nanoseconds component to pd.Timestamp.
+      // as of  Pandas 1.3.3
+      // (https://github.com/pandas-dev/pandas/issues/43892).
+      // So convert microseconds and remainder to preserve data
+      // but give users more expected results.
+      int64_t microseconds = interval.nanoseconds / 1000;
+      int64_t nanoseconds;
+      if (interval.nanoseconds >= 0) {
+        nanoseconds = interval.nanoseconds % 1000;
+      } else {
+        nanoseconds = -((-interval.nanoseconds) % 1000);
+      }
+
+      PyDict_SetItemString(kwargs.obj(), "months", PyLong_FromLong(interval.months));
+      PyDict_SetItemString(kwargs.obj(), "days", PyLong_FromLong(interval.days));
+      PyDict_SetItemString(kwargs.obj(), "microseconds",
+                           PyLong_FromLongLong(microseconds));
+      PyDict_SetItemString(kwargs.obj(), "nanoseconds", PyLong_FromLongLong(nanoseconds));
+      *out =
+          PyObject_Call(internal::BorrowPandasDataOffsetType(), args.obj(), kwargs.obj());
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    };
+    return ConvertAsPyObjects<MonthDayNanoIntervalType>(options, data, to_date_offset,
+                                                        out_values);
+  }
+
+  Status Visit(const Decimal128Type& type) {
+    OwnedRef decimal;
+    OwnedRef Decimal;
+    RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
+    RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal));
+    PyObject* decimal_constructor = Decimal.obj();
+
+    for (int c = 0; c < data.num_chunks(); c++) {
+      const auto& arr = checked_cast<const arrow::Decimal128Array&>(*data.chunk(c));
+
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        if (arr.IsNull(i)) {
+          Py_INCREF(Py_None);
+          *out_values++ = Py_None;
+        } else {
+          *out_values++ =
+              internal::DecimalFromString(decimal_constructor, arr.FormatValue(i));
+          RETURN_IF_PYERROR();
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+
+  Status Visit(const Decimal256Type& type) {
+    OwnedRef decimal;
+    OwnedRef Decimal;
+    RETURN_NOT_OK(internal::ImportModule("decimal", &decimal));
+    RETURN_NOT_OK(internal::ImportFromModule(decimal.obj(), "Decimal", &Decimal));
+    PyObject* decimal_constructor = Decimal.obj();
+
+    for (int c = 0; c < data.num_chunks(); c++) {
+      const auto& arr = checked_cast<const arrow::Decimal256Array&>(*data.chunk(c));
+
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        if (arr.IsNull(i)) {
+          Py_INCREF(Py_None);
+          *out_values++ = Py_None;
+        } else {
+          *out_values++ =
+              internal::DecimalFromString(decimal_constructor, arr.FormatValue(i));
+          RETURN_IF_PYERROR();
+        }
+      }
+    }
+
+    return Status::OK();
+  }
+
+  template <typename T>
+  enable_if_t<is_fixed_size_list_type<T>::value || is_var_length_list_type<T>::value,
+              Status>
+  Visit(const T& type) {
+    using ArrayType = typename TypeTraits<T>::ArrayType;
+    if (!ListTypeSupported(*type.value_type())) {
+      return Status::NotImplemented(
+          "Not implemented type for conversion from List to Pandas: ",
+          type.value_type()->ToString());
+    }
+    return ConvertListsLike<ArrayType>(options, data, out_values);
+  }
+
+  Status Visit(const MapType& type) { return ConvertMap(options, data, out_values); }
+
+  Status Visit(const StructType& type) {
+    return ConvertStruct(options, data, out_values);
+  }
+
+  template <typename Type>
+  enable_if_t<is_floating_type<Type>::value ||
+                  std::is_same<DictionaryType, Type>::value ||
+                  std::is_same<DurationType, Type>::value ||
+                  std::is_same<RunEndEncodedType, Type>::value ||
+                  std::is_same<ExtensionType, Type>::value ||
+                  (std::is_base_of<IntervalType, Type>::value &&
+                   !std::is_same<MonthDayNanoIntervalType, Type>::value) ||
+                  std::is_base_of<UnionType, Type>::value,
+              Status>
+  Visit(const Type& type) {
+    return Status::NotImplemented("No implemented conversion to object dtype: ",
+                                  type.ToString());
+  }
+};
+
+class ObjectWriter : public TypedPandasWriter<NPY_OBJECT> {
+ public:
+  using TypedPandasWriter<NPY_OBJECT>::TypedPandasWriter;
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    PyAcquireGIL lock;
+    ObjectWriterVisitor visitor{this->options_, *data,
+                                this->GetBlockColumnStart(rel_placement)};
+    return VisitTypeInline(*data->type(), &visitor);
+  }
+};
+
+static inline bool IsNonNullContiguous(const ChunkedArray& data) {
+  return data.num_chunks() == 1 && data.null_count() == 0;
+}
+
+template <int NPY_TYPE>
+class IntWriter : public TypedPandasWriter<NPY_TYPE> {
+ public:
+  using ArrowType = typename npy_traits<NPY_TYPE>::TypeClass;
+  using TypedPandasWriter<NPY_TYPE>::TypedPandasWriter;
+
+  bool CanZeroCopy(const ChunkedArray& data) const override {
+    return IsNonNullContiguous(data);
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    RETURN_NOT_OK(this->CheckTypeExact(*data->type(), ArrowType::type_id));
+    ConvertIntegerNoNullsSameType<typename ArrowType::c_type>(
+        this->options_, *data, this->GetBlockColumnStart(rel_placement));
+    return Status::OK();
+  }
+};
+
+template <int NPY_TYPE>
+class FloatWriter : public TypedPandasWriter<NPY_TYPE> {
+ public:
+  using ArrowType = typename npy_traits<NPY_TYPE>::TypeClass;
+  using TypedPandasWriter<NPY_TYPE>::TypedPandasWriter;
+  using T = typename ArrowType::c_type;
+
+  bool CanZeroCopy(const ChunkedArray& data) const override {
+    return IsNonNullContiguous(data) && data.type()->id() == ArrowType::type_id;
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    Type::type in_type = data->type()->id();
+    auto out_values = this->GetBlockColumnStart(rel_placement);
+
+#define INTEGER_CASE(IN_TYPE)                                             \
+  ConvertIntegerWithNulls<IN_TYPE, T>(this->options_, *data, out_values); \
+  break;
+
+    switch (in_type) {
+      case Type::UINT8:
+        INTEGER_CASE(uint8_t);
+      case Type::INT8:
+        INTEGER_CASE(int8_t);
+      case Type::UINT16:
+        INTEGER_CASE(uint16_t);
+      case Type::INT16:
+        INTEGER_CASE(int16_t);
+      case Type::UINT32:
+        INTEGER_CASE(uint32_t);
+      case Type::INT32:
+        INTEGER_CASE(int32_t);
+      case Type::UINT64:
+        INTEGER_CASE(uint64_t);
+      case Type::INT64:
+        INTEGER_CASE(int64_t);
+      case Type::HALF_FLOAT:
+        ConvertNumericNullableCast(*data, npy_traits<NPY_TYPE>::na_sentinel, out_values);
+      case Type::FLOAT:
+        ConvertNumericNullableCast(*data, npy_traits<NPY_TYPE>::na_sentinel, out_values);
+        break;
+      case Type::DOUBLE:
+        ConvertNumericNullableCast(*data, npy_traits<NPY_TYPE>::na_sentinel, out_values);
+        break;
+      default:
+        return Status::NotImplemented("Cannot write Arrow data of type ",
+                                      data->type()->ToString(),
+                                      " to a Pandas floating point block");
+    }
+
+#undef INTEGER_CASE
+
+    return Status::OK();
+  }
+};
+
+using UInt8Writer = IntWriter<NPY_UINT8>;
+using Int8Writer = IntWriter<NPY_INT8>;
+using UInt16Writer = IntWriter<NPY_UINT16>;
+using Int16Writer = IntWriter<NPY_INT16>;
+using UInt32Writer = IntWriter<NPY_UINT32>;
+using Int32Writer = IntWriter<NPY_INT32>;
+using UInt64Writer = IntWriter<NPY_UINT64>;
+using Int64Writer = IntWriter<NPY_INT64>;
+using Float16Writer = FloatWriter<NPY_FLOAT16>;
+using Float32Writer = FloatWriter<NPY_FLOAT32>;
+using Float64Writer = FloatWriter<NPY_FLOAT64>;
+
+class BoolWriter : public TypedPandasWriter<NPY_BOOL> {
+ public:
+  using TypedPandasWriter<NPY_BOOL>::TypedPandasWriter;
+
+  Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) override {
+    RETURN_NOT_OK(
+        CheckNoZeroCopy("Zero copy conversions not possible with "
+                        "boolean types"));
+    RETURN_NOT_OK(EnsureAllocated());
+    return CopyInto(data, /*rel_placement=*/0);
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    RETURN_NOT_OK(this->CheckTypeExact(*data->type(), Type::BOOL));
+    auto out_values = this->GetBlockColumnStart(rel_placement);
+    for (int c = 0; c < data->num_chunks(); c++) {
+      const auto& arr = checked_cast<const BooleanArray&>(*data->chunk(c));
+      for (int64_t i = 0; i < arr.length(); ++i) {
+        *out_values++ = static_cast<uint8_t>(arr.Value(i));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+// ----------------------------------------------------------------------
+// Date / timestamp types
+
+template <typename T, int64_t SHIFT>
+inline void ConvertDatetime(const ChunkedArray& data, int64_t* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const T* in_values = GetPrimitiveValues<T>(arr);
+
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? kPandasTimestampNull
+                                    : (static_cast<int64_t>(in_values[i]) * SHIFT);
+    }
+  }
+}
+
+template <typename T, int SHIFT>
+void ConvertDatesShift(const ChunkedArray& data, int64_t* out_values) {
+  for (int c = 0; c < data.num_chunks(); c++) {
+    const auto& arr = *data.chunk(c);
+    const T* in_values = GetPrimitiveValues<T>(arr);
+    for (int64_t i = 0; i < arr.length(); ++i) {
+      *out_values++ = arr.IsNull(i) ? kPandasTimestampNull
+                                    : static_cast<int64_t>(in_values[i]) / SHIFT;
+    }
+  }
+}
+
+class DatetimeDayWriter : public TypedPandasWriter<NPY_DATETIME> {
+ public:
+  using TypedPandasWriter<NPY_DATETIME>::TypedPandasWriter;
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+    const auto& type = checked_cast<const DateType&>(*data->type());
+    switch (type.unit()) {
+      case DateUnit::DAY:
+        ConvertDatesShift<int32_t, 1LL>(*data, out_values);
+        break;
+      case DateUnit::MILLI:
+        ConvertDatesShift<int64_t, 86400000LL>(*data, out_values);
+        break;
+    }
+    return Status::OK();
+  }
+
+ protected:
+  Status Allocate() override {
+    RETURN_NOT_OK(this->AllocateNDArray(NPY_DATETIME));
+    SetDatetimeUnit(NPY_FR_D);
+    return Status::OK();
+  }
+};
+
+template <TimeUnit::type UNIT>
+class DatetimeWriter : public TypedPandasWriter<NPY_DATETIME> {
+ public:
+  using TypedPandasWriter<NPY_DATETIME>::TypedPandasWriter;
+
+  bool CanZeroCopy(const ChunkedArray& data) const override {
+    if (data.type()->id() == Type::TIMESTAMP) {
+      const auto& type = checked_cast<const TimestampType&>(*data.type());
+      return IsNonNullContiguous(data) && type.unit() == UNIT;
+    } else {
+      return false;
+    }
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
+    DCHECK_EQ(UNIT, ts_type.unit()) << "Should only call instances of this writer "
+                                    << "with arrays of the correct unit";
+    ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull,
+                                    this->GetBlockColumnStart(rel_placement));
+    return Status::OK();
+  }
+
+ protected:
+  Status Allocate() override {
+    RETURN_NOT_OK(this->AllocateNDArray(NPY_DATETIME));
+    SetDatetimeUnit(internal::NumPyFrequency(UNIT));
+    return Status::OK();
+  }
+};
+
+using DatetimeSecondWriter = DatetimeWriter<TimeUnit::SECOND>;
+
+class DatetimeMilliWriter : public DatetimeWriter<TimeUnit::MILLI> {
+ public:
+  using DatetimeWriter<TimeUnit::MILLI>::DatetimeWriter;
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    Type::type type = data->type()->id();
+    int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+    if (type == Type::DATE32) {
+      // Convert from days since epoch to datetime64[ms]
+      ConvertDatetime<int32_t, 86400000L>(*data, out_values);
+    } else if (type == Type::DATE64) {
+      ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+    } else {
+      const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
+      DCHECK_EQ(TimeUnit::MILLI, ts_type.unit())
+          << "Should only call instances of this writer "
+          << "with arrays of the correct unit";
+      ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+    }
+    return Status::OK();
+  }
+};
+
+using DatetimeMicroWriter = DatetimeWriter<TimeUnit::MICRO>;
+
+class DatetimeNanoWriter : public DatetimeWriter<TimeUnit::NANO> {
+ public:
+  using DatetimeWriter<TimeUnit::NANO>::DatetimeWriter;
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    Type::type type = data->type()->id();
+    int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+    compute::ExecContext ctx(options_.pool);
+    compute::CastOptions options;
+    if (options_.safe_cast) {
+      options = compute::CastOptions::Safe();
+    } else {
+      options = compute::CastOptions::Unsafe();
+    }
+    Datum out;
+    auto target_type = timestamp(TimeUnit::NANO);
+
+    if (type == Type::DATE32) {
+      // Convert from days since epoch to datetime64[ns]
+      ConvertDatetime<int32_t, kNanosecondsInDay>(*data, out_values);
+    } else if (type == Type::DATE64) {
+      // Date64Type is millisecond timestamp stored as int64_t
+      // TODO(wesm): Do we want to make sure to zero out the milliseconds?
+      ConvertDatetime<int64_t, 1000000L>(*data, out_values);
+    } else if (type == Type::TIMESTAMP) {
+      const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
+
+      if (ts_type.unit() == TimeUnit::NANO) {
+        ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+      } else if (ts_type.unit() == TimeUnit::MICRO || ts_type.unit() == TimeUnit::MILLI ||
+                 ts_type.unit() == TimeUnit::SECOND) {
+        ARROW_ASSIGN_OR_RAISE(out, compute::Cast(data, target_type, options, &ctx));
+        ConvertNumericNullable<int64_t>(*out.chunked_array(), kPandasTimestampNull,
+                                        out_values);
+      } else {
+        return Status::NotImplemented("Unsupported time unit");
+      }
+    } else {
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    data->type()->ToString(),
+                                    " to a Pandas datetime block.");
+    }
+    return Status::OK();
+  }
+};
+
+template <typename BASE>
+class DatetimeTZWriter : public BASE {
+ public:
+  DatetimeTZWriter(const PandasOptions& options, const std::string& timezone,
+                   int64_t num_rows)
+      : BASE(options, num_rows, 1), timezone_(timezone) {}
+
+ protected:
+  Status GetResultBlock(PyObject** out) override {
+    RETURN_NOT_OK(this->MakeBlock1D());
+    *out = this->block_arr_.obj();
+    return Status::OK();
+  }
+
+  Status AddResultMetadata(PyObject* result) override {
+    PyObject* py_tz = PyUnicode_FromStringAndSize(
+        timezone_.c_str(), static_cast<Py_ssize_t>(timezone_.size()));
+    RETURN_IF_PYERROR();
+    PyDict_SetItemString(result, "timezone", py_tz);
+    Py_DECREF(py_tz);
+    return Status::OK();
+  }
+
+ private:
+  std::string timezone_;
+};
+
+using DatetimeSecondTZWriter = DatetimeTZWriter<DatetimeSecondWriter>;
+using DatetimeMilliTZWriter = DatetimeTZWriter<DatetimeMilliWriter>;
+using DatetimeMicroTZWriter = DatetimeTZWriter<DatetimeMicroWriter>;
+using DatetimeNanoTZWriter = DatetimeTZWriter<DatetimeNanoWriter>;
+
+template <TimeUnit::type UNIT>
+class TimedeltaWriter : public TypedPandasWriter<NPY_TIMEDELTA> {
+ public:
+  using TypedPandasWriter<NPY_TIMEDELTA>::TypedPandasWriter;
+
+  Status AllocateTimedelta(int ndim) {
+    RETURN_NOT_OK(this->AllocateNDArray(NPY_TIMEDELTA, ndim));
+    SetDatetimeUnit(internal::NumPyFrequency(UNIT));
+    return Status::OK();
+  }
+
+  bool CanZeroCopy(const ChunkedArray& data) const override {
+    const auto& type = checked_cast<const DurationType&>(*data.type());
+    return IsNonNullContiguous(data) && type.unit() == UNIT;
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    const auto& type = checked_cast<const DurationType&>(*data->type());
+    DCHECK_EQ(UNIT, type.unit()) << "Should only call instances of this writer "
+                                 << "with arrays of the correct unit";
+    ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull,
+                                    this->GetBlockColumnStart(rel_placement));
+    return Status::OK();
+  }
+
+ protected:
+  Status Allocate() override { return AllocateTimedelta(2); }
+};
+
+using TimedeltaSecondWriter = TimedeltaWriter<TimeUnit::SECOND>;
+using TimedeltaMilliWriter = TimedeltaWriter<TimeUnit::MILLI>;
+using TimedeltaMicroWriter = TimedeltaWriter<TimeUnit::MICRO>;
+
+class TimedeltaNanoWriter : public TimedeltaWriter<TimeUnit::NANO> {
+ public:
+  using TimedeltaWriter<TimeUnit::NANO>::TimedeltaWriter;
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    Type::type type = data->type()->id();
+    int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+    if (type == Type::DURATION) {
+      const auto& ts_type = checked_cast<const DurationType&>(*data->type());
+      if (ts_type.unit() == TimeUnit::NANO) {
+        ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull, out_values);
+      } else if (ts_type.unit() == TimeUnit::MICRO) {
+        ConvertDatetime<int64_t, 1000L>(*data, out_values);
+      } else if (ts_type.unit() == TimeUnit::MILLI) {
+        ConvertDatetime<int64_t, 1000000L>(*data, out_values);
+      } else if (ts_type.unit() == TimeUnit::SECOND) {
+        ConvertDatetime<int64_t, 1000000000L>(*data, out_values);
+      } else {
+        return Status::NotImplemented("Unsupported time unit");
+      }
+    } else {
+      return Status::NotImplemented("Cannot write Arrow data of type ",
+                                    data->type()->ToString(),
+                                    " to a Pandas timedelta block.");
+    }
+    return Status::OK();
+  }
+};
+
+Status MakeZeroLengthArray(const std::shared_ptr<DataType>& type,
+                           std::shared_ptr<Array>* out) {
+  std::unique_ptr<ArrayBuilder> builder;
+  RETURN_NOT_OK(MakeBuilder(default_memory_pool(), type, &builder));
+  RETURN_NOT_OK(builder->Resize(0));
+  return builder->Finish(out);
+}
+
+bool NeedDictionaryUnification(const ChunkedArray& data) {
+  if (data.num_chunks() < 2) {
+    return false;
+  }
+  const auto& arr_first = checked_cast<const DictionaryArray&>(*data.chunk(0));
+  for (int c = 1; c < data.num_chunks(); c++) {
+    const auto& arr = checked_cast<const DictionaryArray&>(*data.chunk(c));
+    if (!(arr_first.dictionary()->Equals(arr.dictionary()))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+template <typename IndexType>
+class CategoricalWriter
+    : public TypedPandasWriter<arrow_traits<IndexType::type_id>::npy_type> {
+ public:
+  using TRAITS = arrow_traits<IndexType::type_id>;
+  using ArrayType = typename TypeTraits<IndexType>::ArrayType;
+  using T = typename TRAITS::T;
+
+  explicit CategoricalWriter(const PandasOptions& options, int64_t num_rows)
+      : TypedPandasWriter<TRAITS::npy_type>(options, num_rows, 1),
+        ordered_(false),
+        needs_copy_(false) {}
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    return Status::NotImplemented("categorical type");
+  }
+
+  Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) override {
+    const auto& dict_type = checked_cast<const DictionaryType&>(*data->type());
+    std::shared_ptr<Array> dict;
+    if (data->num_chunks() == 0) {
+      // no dictionary values => create empty array
+      RETURN_NOT_OK(this->AllocateNDArray(TRAITS::npy_type, 1));
+      RETURN_NOT_OK(MakeZeroLengthArray(dict_type.value_type(), &dict));
+    } else {
+      DCHECK_EQ(IndexType::type_id, dict_type.index_type()->id());
+      RETURN_NOT_OK(WriteIndices(*data, &dict));
+    }
+
+    PyObject* pydict;
+    RETURN_NOT_OK(ConvertArrayToPandas(this->options_, dict, nullptr, &pydict));
+    dictionary_.reset(pydict);
+    ordered_ = dict_type.ordered();
+    return Status::OK();
+  }
+
+  Status Write(std::shared_ptr<ChunkedArray> data, int64_t abs_placement,
+               int64_t rel_placement) override {
+    RETURN_NOT_OK(this->EnsurePlacementAllocated());
+    RETURN_NOT_OK(TransferSingle(data, /*py_ref=*/nullptr));
+    this->placement_data_[rel_placement] = abs_placement;
+    return Status::OK();
+  }
+
+  Status GetSeriesResult(PyObject** out) override {
+    PyAcquireGIL lock;
+
+    PyObject* result = PyDict_New();
+    RETURN_IF_PYERROR();
+
+    // Expected single array dictionary layout
+    PyDict_SetItemString(result, "indices", this->block_arr_.obj());
+    RETURN_IF_PYERROR();
+    RETURN_NOT_OK(AddResultMetadata(result));
+
+    *out = result;
+    return Status::OK();
+  }
+
+ protected:
+  Status AddResultMetadata(PyObject* result) override {
+    PyDict_SetItemString(result, "dictionary", dictionary_.obj());
+    PyObject* py_ordered = ordered_ ? Py_True : Py_False;
+    Py_INCREF(py_ordered);
+    PyDict_SetItemString(result, "ordered", py_ordered);
+    return Status::OK();
+  }
+
+  Status WriteIndicesUniform(const ChunkedArray& data) {
+    RETURN_NOT_OK(this->AllocateNDArray(TRAITS::npy_type, 1));
+    T* out_values = reinterpret_cast<T*>(this->block_data_);
+
+    for (int c = 0; c < data.num_chunks(); c++) {
+      const auto& arr = checked_cast<const DictionaryArray&>(*data.chunk(c));
+      const auto& indices = checked_cast<const ArrayType&>(*arr.indices());
+      auto values = reinterpret_cast<const T*>(indices.raw_values());
+
+      RETURN_NOT_OK(CheckIndexBounds(*indices.data(), arr.dictionary()->length()));
+      // Null is -1 in CategoricalBlock
+      for (int i = 0; i < arr.length(); ++i) {
+        if (indices.IsValid(i)) {
+          *out_values++ = values[i];
+        } else {
+          *out_values++ = -1;
+        }
+      }
+    }
+    return Status::OK();
+  }
+
+  Status WriteIndicesVarying(const ChunkedArray& data, std::shared_ptr<Array>* out_dict) {
+    // Yield int32 indices to allow for dictionary outgrowing the current index
+    // type
+    RETURN_NOT_OK(this->AllocateNDArray(NPY_INT32, 1));
+    auto out_values = reinterpret_cast<int32_t*>(this->block_data_);
+
+    const auto& dict_type = checked_cast<const DictionaryType&>(*data.type());
+
+    ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(dict_type.value_type(),
+                                                                this->options_.pool));
+    for (int c = 0; c < data.num_chunks(); c++) {
+      const auto& arr = checked_cast<const DictionaryArray&>(*data.chunk(c));
+      const auto& indices = checked_cast<const ArrayType&>(*arr.indices());
+      auto values = reinterpret_cast<const T*>(indices.raw_values());
+
+      std::shared_ptr<Buffer> transpose_buffer;
+      RETURN_NOT_OK(unifier->Unify(*arr.dictionary(), &transpose_buffer));
+
+      auto transpose = reinterpret_cast<const int32_t*>(transpose_buffer->data());
+      int64_t dict_length = arr.dictionary()->length();
+
+      RETURN_NOT_OK(CheckIndexBounds(*indices.data(), dict_length));
+
+      // Null is -1 in CategoricalBlock
+      for (int i = 0; i < arr.length(); ++i) {
+        if (indices.IsValid(i)) {
+          *out_values++ = transpose[values[i]];
+        } else {
+          *out_values++ = -1;
+        }
+      }
+    }
+
+    std::shared_ptr<DataType> unused_type;
+    return unifier->GetResult(&unused_type, out_dict);
+  }
+
+  Status WriteIndices(const ChunkedArray& data, std::shared_ptr<Array>* out_dict) {
+    DCHECK_GT(data.num_chunks(), 0);
+
+    // Sniff the first chunk
+    const auto& arr_first = checked_cast<const DictionaryArray&>(*data.chunk(0));
+    const auto indices_first = std::static_pointer_cast<ArrayType>(arr_first.indices());
+
+    if (data.num_chunks() == 1 && indices_first->null_count() == 0) {
+      RETURN_NOT_OK(
+          CheckIndexBounds(*indices_first->data(), arr_first.dictionary()->length()));
+
+      PyObject* wrapped;
+      npy_intp dims[1] = {static_cast<npy_intp>(this->num_rows_)};
+      RETURN_NOT_OK(MakeNumPyView(indices_first, /*py_ref=*/nullptr, TRAITS::npy_type,
+                                  /*ndim=*/1, dims, &wrapped));
+      this->SetBlockData(wrapped);
+      *out_dict = arr_first.dictionary();
+    } else {
+      RETURN_NOT_OK(this->CheckNotZeroCopyOnly(data));
+      if (NeedDictionaryUnification(data)) {
+        RETURN_NOT_OK(WriteIndicesVarying(data, out_dict));
+      } else {
+        RETURN_NOT_OK(WriteIndicesUniform(data));
+        *out_dict = arr_first.dictionary();
+      }
+    }
+    return Status::OK();
+  }
+
+  OwnedRefNoGIL dictionary_;
+  bool ordered_;
+  bool needs_copy_;
+};
+
+class ExtensionWriter : public PandasWriter {
+ public:
+  using PandasWriter::PandasWriter;
+
+  Status Allocate() override {
+    // no-op
+    return Status::OK();
+  }
+
+  Status TransferSingle(std::shared_ptr<ChunkedArray> data, PyObject* py_ref) override {
+    PyAcquireGIL lock;
+    PyObject* py_array;
+    py_array = wrap_chunked_array(data);
+    py_array_.reset(py_array);
+
+    return Status::OK();
+  }
+
+  Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement) override {
+    return TransferSingle(data, nullptr);
+  }
+
+  Status GetDataFrameResult(PyObject** out) override {
+    PyAcquireGIL lock;
+    PyObject* result = PyDict_New();
+    RETURN_IF_PYERROR();
+
+    PyDict_SetItemString(result, "py_array", py_array_.obj());
+    PyDict_SetItemString(result, "placement", placement_arr_.obj());
+    *out = result;
+    return Status::OK();
+  }
+
+  Status GetSeriesResult(PyObject** out) override {
+    *out = py_array_.detach();
+    return Status::OK();
+  }
+
+ protected:
+  OwnedRefNoGIL py_array_;
+};
+
+Status MakeWriter(const PandasOptions& options, PandasWriter::type writer_type,
+                  const DataType& type, int64_t num_rows, int num_columns,
+                  std::shared_ptr<PandasWriter>* writer) {
+#define BLOCK_CASE(NAME, TYPE)                                        \
+  case PandasWriter::NAME:                                            \
+    *writer = std::make_shared<TYPE>(options, num_rows, num_columns); \
+    break;
+
+#define CATEGORICAL_CASE(TYPE)                                              \
+  case TYPE::type_id:                                                       \
+    *writer = std::make_shared<CategoricalWriter<TYPE>>(options, num_rows); \
+    break;
+
+#define TZ_CASE(NAME, TYPE)                                                  \
+  case PandasWriter::NAME: {                                                 \
+    const auto& ts_type = checked_cast<const TimestampType&>(type);          \
+    *writer = std::make_shared<TYPE>(options, ts_type.timezone(), num_rows); \
+  } break;
+
+  switch (writer_type) {
+    case PandasWriter::CATEGORICAL: {
+      const auto& index_type = *checked_cast<const DictionaryType&>(type).index_type();
+      switch (index_type.id()) {
+        CATEGORICAL_CASE(Int8Type);
+        CATEGORICAL_CASE(Int16Type);
+        CATEGORICAL_CASE(Int32Type);
+        CATEGORICAL_CASE(Int64Type);
+        case Type::UINT8:
+        case Type::UINT16:
+        case Type::UINT32:
+        case Type::UINT64:
+          return Status::TypeError(
+              "Converting unsigned dictionary indices to pandas",
+              " not yet supported, index type: ", index_type.ToString());
+        default:
+          // Unreachable
+          DCHECK(false);
+          break;
+      }
+    } break;
+    case PandasWriter::EXTENSION:
+      *writer = std::make_shared<ExtensionWriter>(options, num_rows, num_columns);
+      break;
+      BLOCK_CASE(OBJECT, ObjectWriter);
+      BLOCK_CASE(UINT8, UInt8Writer);
+      BLOCK_CASE(INT8, Int8Writer);
+      BLOCK_CASE(UINT16, UInt16Writer);
+      BLOCK_CASE(INT16, Int16Writer);
+      BLOCK_CASE(UINT32, UInt32Writer);
+      BLOCK_CASE(INT32, Int32Writer);
+      BLOCK_CASE(UINT64, UInt64Writer);
+      BLOCK_CASE(INT64, Int64Writer);
+      BLOCK_CASE(HALF_FLOAT, Float16Writer);
+      BLOCK_CASE(FLOAT, Float32Writer);
+      BLOCK_CASE(DOUBLE, Float64Writer);
+      BLOCK_CASE(BOOL, BoolWriter);
+      BLOCK_CASE(DATETIME_DAY, DatetimeDayWriter);
+      BLOCK_CASE(DATETIME_SECOND, DatetimeSecondWriter);
+      BLOCK_CASE(DATETIME_MILLI, DatetimeMilliWriter);
+      BLOCK_CASE(DATETIME_MICRO, DatetimeMicroWriter);
+      BLOCK_CASE(DATETIME_NANO, DatetimeNanoWriter);
+      BLOCK_CASE(TIMEDELTA_SECOND, TimedeltaSecondWriter);
+      BLOCK_CASE(TIMEDELTA_MILLI, TimedeltaMilliWriter);
+      BLOCK_CASE(TIMEDELTA_MICRO, TimedeltaMicroWriter);
+      BLOCK_CASE(TIMEDELTA_NANO, TimedeltaNanoWriter);
+      TZ_CASE(DATETIME_SECOND_TZ, DatetimeSecondTZWriter);
+      TZ_CASE(DATETIME_MILLI_TZ, DatetimeMilliTZWriter);
+      TZ_CASE(DATETIME_MICRO_TZ, DatetimeMicroTZWriter);
+      TZ_CASE(DATETIME_NANO_TZ, DatetimeNanoTZWriter);
+    default:
+      return Status::NotImplemented("Unsupported block type");
+  }
+
+#undef BLOCK_CASE
+#undef CATEGORICAL_CASE
+
+  return Status::OK();
+}
+
+static Status GetPandasWriterType(const ChunkedArray& data, const PandasOptions& options,
+                                  PandasWriter::type* output_type) {
+#define INTEGER_CASE(NAME)                                                             \
+  *output_type =                                                                       \
+      data.null_count() > 0                                                            \
+          ? options.integer_object_nulls ? PandasWriter::OBJECT : PandasWriter::DOUBLE \
+          : PandasWriter::NAME;                                                        \
+  break;
+
+  switch (data.type()->id()) {
+    case Type::BOOL:
+      *output_type = data.null_count() > 0 ? PandasWriter::OBJECT : PandasWriter::BOOL;
+      break;
+    case Type::UINT8:
+      INTEGER_CASE(UINT8);
+    case Type::INT8:
+      INTEGER_CASE(INT8);
+    case Type::UINT16:
+      INTEGER_CASE(UINT16);
+    case Type::INT16:
+      INTEGER_CASE(INT16);
+    case Type::UINT32:
+      INTEGER_CASE(UINT32);
+    case Type::INT32:
+      INTEGER_CASE(INT32);
+    case Type::UINT64:
+      INTEGER_CASE(UINT64);
+    case Type::INT64:
+      INTEGER_CASE(INT64);
+    case Type::HALF_FLOAT:
+      *output_type = PandasWriter::HALF_FLOAT;
+      break;
+    case Type::FLOAT:
+      *output_type = PandasWriter::FLOAT;
+      break;
+    case Type::DOUBLE:
+      *output_type = PandasWriter::DOUBLE;
+      break;
+    case Type::STRING:        // fall through
+    case Type::LARGE_STRING:  // fall through
+    case Type::BINARY:        // fall through
+    case Type::LARGE_BINARY:
+    case Type::NA:                       // fall through
+    case Type::FIXED_SIZE_BINARY:        // fall through
+    case Type::STRUCT:                   // fall through
+    case Type::TIME32:                   // fall through
+    case Type::TIME64:                   // fall through
+    case Type::DECIMAL128:               // fall through
+    case Type::DECIMAL256:               // fall through
+    case Type::INTERVAL_MONTH_DAY_NANO:  // fall through
+      *output_type = PandasWriter::OBJECT;
+      break;
+    case Type::DATE32:
+      if (options.date_as_object) {
+        *output_type = PandasWriter::OBJECT;
+      } else if (options.coerce_temporal_nanoseconds) {
+        *output_type = PandasWriter::DATETIME_NANO;
+      } else if (options.to_numpy) {
+        // Numpy supports Day, but Pandas does not
+        *output_type = PandasWriter::DATETIME_DAY;
+      } else {
+        *output_type = PandasWriter::DATETIME_MILLI;
+      }
+      break;
+    case Type::DATE64:
+      if (options.date_as_object) {
+        *output_type = PandasWriter::OBJECT;
+      } else if (options.coerce_temporal_nanoseconds) {
+        *output_type = PandasWriter::DATETIME_NANO;
+      } else {
+        *output_type = PandasWriter::DATETIME_MILLI;
+      }
+      break;
+    case Type::TIMESTAMP: {
+      const auto& ts_type = checked_cast<const TimestampType&>(*data.type());
+      if (options.timestamp_as_object && ts_type.unit() != TimeUnit::NANO) {
+        // Nanoseconds are never out of bounds for pandas, so in that case
+        // we don't convert to object
+        *output_type = PandasWriter::OBJECT;
+      } else if (options.coerce_temporal_nanoseconds) {
+        if (!ts_type.timezone().empty()) {
+          *output_type = PandasWriter::DATETIME_NANO_TZ;
+        } else {
+          *output_type = PandasWriter::DATETIME_NANO;
+        }
+      } else {
+        if (!ts_type.timezone().empty()) {
+          switch (ts_type.unit()) {
+            case TimeUnit::SECOND:
+              *output_type = PandasWriter::DATETIME_SECOND_TZ;
+              break;
+            case TimeUnit::MILLI:
+              *output_type = PandasWriter::DATETIME_MILLI_TZ;
+              break;
+            case TimeUnit::MICRO:
+              *output_type = PandasWriter::DATETIME_MICRO_TZ;
+              break;
+            case TimeUnit::NANO:
+              *output_type = PandasWriter::DATETIME_NANO_TZ;
+              break;
+          }
+        } else {
+          switch (ts_type.unit()) {
+            case TimeUnit::SECOND:
+              *output_type = PandasWriter::DATETIME_SECOND;
+              break;
+            case TimeUnit::MILLI:
+              *output_type = PandasWriter::DATETIME_MILLI;
+              break;
+            case TimeUnit::MICRO:
+              *output_type = PandasWriter::DATETIME_MICRO;
+              break;
+            case TimeUnit::NANO:
+              *output_type = PandasWriter::DATETIME_NANO;
+              break;
+          }
+        }
+      }
+    } break;
+    case Type::DURATION: {
+      const auto& dur_type = checked_cast<const DurationType&>(*data.type());
+      if (options.coerce_temporal_nanoseconds) {
+        *output_type = PandasWriter::TIMEDELTA_NANO;
+      } else {
+        switch (dur_type.unit()) {
+          case TimeUnit::SECOND:
+            *output_type = PandasWriter::TIMEDELTA_SECOND;
+            break;
+          case TimeUnit::MILLI:
+            *output_type = PandasWriter::TIMEDELTA_MILLI;
+            break;
+          case TimeUnit::MICRO:
+            *output_type = PandasWriter::TIMEDELTA_MICRO;
+            break;
+          case TimeUnit::NANO:
+            *output_type = PandasWriter::TIMEDELTA_NANO;
+            break;
+        }
+      }
+    } break;
+    case Type::FIXED_SIZE_LIST:
+    case Type::LIST:
+    case Type::LARGE_LIST:
+    case Type::MAP: {
+      auto list_type = std::static_pointer_cast<BaseListType>(data.type());
+      if (!ListTypeSupported(*list_type->value_type())) {
+        return Status::NotImplemented("Not implemented type for Arrow list to pandas: ",
+                                      list_type->value_type()->ToString());
+      }
+      *output_type = PandasWriter::OBJECT;
+    } break;
+    case Type::DICTIONARY:
+      *output_type = PandasWriter::CATEGORICAL;
+      break;
+    case Type::EXTENSION:
+      *output_type = PandasWriter::EXTENSION;
+      break;
+    default:
+      return Status::NotImplemented(
+          "No known equivalent Pandas block for Arrow data of type ",
+          data.type()->ToString(), " is known.");
+  }
+  return Status::OK();
+}
+
+// Construct the exact pandas "BlockManager" memory layout
+//
+// * For each column determine the correct output pandas type
+// * Allocate 2D blocks (ncols x nrows) for each distinct data type in output
+// * Allocate  block placement arrays
+// * Write Arrow columns out into each slice of memory; populate block
+// * placement arrays as we go
+class PandasBlockCreator {
+ public:
+  using WriterMap = std::unordered_map<int, std::shared_ptr<PandasWriter>>;
+
+  explicit PandasBlockCreator(const PandasOptions& options, FieldVector fields,
+                              ChunkedArrayVector arrays)
+      : options_(options), fields_(std::move(fields)), arrays_(std::move(arrays)) {
+    num_columns_ = static_cast<int>(arrays_.size());
+    if (num_columns_ > 0) {
+      num_rows_ = arrays_[0]->length();
+    }
+    column_block_placement_.resize(num_columns_);
+  }
+  virtual ~PandasBlockCreator() = default;
+
+  virtual Status Convert(PyObject** out) = 0;
+
+  Status AppendBlocks(const WriterMap& blocks, PyObject* list) {
+    for (const auto& it : blocks) {
+      PyObject* item;
+      RETURN_NOT_OK(it.second->GetDataFrameResult(&item));
+      if (PyList_Append(list, item) < 0) {
+        RETURN_IF_PYERROR();
+      }
+
+      // ARROW-1017; PyList_Append increments object refcount
+      Py_DECREF(item);
+    }
+    return Status::OK();
+  }
+
+ protected:
+  PandasOptions options_;
+
+  FieldVector fields_;
+  ChunkedArrayVector arrays_;
+  int num_columns_;
+  int64_t num_rows_;
+
+  // column num -> relative placement within internal block
+  std::vector<int> column_block_placement_;
+};
+
+// Helper function for extension chunked arrays
+// Constructing a storage chunked array of an extension chunked array
+std::shared_ptr<ChunkedArray> GetStorageChunkedArray(std::shared_ptr<ChunkedArray> arr) {
+  auto value_type = checked_cast<const ExtensionType&>(*arr->type()).storage_type();
+  ArrayVector storage_arrays;
+  for (int c = 0; c < arr->num_chunks(); c++) {
+    const auto& arr_ext = checked_cast<const ExtensionArray&>(*arr->chunk(c));
+    storage_arrays.emplace_back(arr_ext.storage());
+  }
+  return std::make_shared<ChunkedArray>(std::move(storage_arrays), value_type);
+};
+
+class ConsolidatedBlockCreator : public PandasBlockCreator {
+ public:
+  using PandasBlockCreator::PandasBlockCreator;
+
+  Status Convert(PyObject** out) override {
+    column_types_.resize(num_columns_);
+    RETURN_NOT_OK(CreateBlocks());
+    RETURN_NOT_OK(WriteTableToBlocks());
+    PyAcquireGIL lock;
+
+    PyObject* result = PyList_New(0);
+    RETURN_IF_PYERROR();
+
+    RETURN_NOT_OK(AppendBlocks(blocks_, result));
+    RETURN_NOT_OK(AppendBlocks(singleton_blocks_, result));
+
+    *out = result;
+    return Status::OK();
+  }
+
+  Status GetBlockType(int column_index, PandasWriter::type* out) {
+    if (options_.extension_columns.count(fields_[column_index]->name())) {
+      *out = PandasWriter::EXTENSION;
+      return Status::OK();
+    } else {
+      // In case of an extension array default to the storage type
+      if (arrays_[column_index]->type()->id() == Type::EXTENSION) {
+        arrays_[column_index] = GetStorageChunkedArray(arrays_[column_index]);
+      }
+      return GetPandasWriterType(*arrays_[column_index], options_, out);
+    }
+  }
+
+  Status CreateBlocks() {
+    for (int i = 0; i < num_columns_; ++i) {
+      const DataType& type = *arrays_[i]->type();
+      PandasWriter::type output_type;
+      RETURN_NOT_OK(GetBlockType(i, &output_type));
+
+      int block_placement = 0;
+      std::shared_ptr<PandasWriter> writer;
+      if (output_type == PandasWriter::CATEGORICAL ||
+          output_type == PandasWriter::DATETIME_SECOND_TZ ||
+          output_type == PandasWriter::DATETIME_MILLI_TZ ||
+          output_type == PandasWriter::DATETIME_MICRO_TZ ||
+          output_type == PandasWriter::DATETIME_NANO_TZ ||
+          output_type == PandasWriter::EXTENSION) {
+        RETURN_NOT_OK(MakeWriter(options_, output_type, type, num_rows_,
+                                 /*num_columns=*/1, &writer));
+        singleton_blocks_[i] = writer;
+      } else {
+        auto it = block_sizes_.find(output_type);
+        if (it != block_sizes_.end()) {
+          block_placement = it->second;
+          // Increment count
+          ++it->second;
+        } else {
+          // Add key to map
+          block_sizes_[output_type] = 1;
+        }
+      }
+      column_types_[i] = output_type;
+      column_block_placement_[i] = block_placement;
+    }
+
+    // Create normal non-categorical blocks
+    for (const auto& it : this->block_sizes_) {
+      PandasWriter::type output_type = static_cast<PandasWriter::type>(it.first);
+      std::shared_ptr<PandasWriter> block;
+      RETURN_NOT_OK(MakeWriter(this->options_, output_type, /*unused*/ *null(), num_rows_,
+                               it.second, &block));
+      this->blocks_[output_type] = block;
+    }
+    return Status::OK();
+  }
+
+  Status GetWriter(int i, std::shared_ptr<PandasWriter>* block) {
+    PandasWriter::type output_type = this->column_types_[i];
+    switch (output_type) {
+      case PandasWriter::CATEGORICAL:
+      case PandasWriter::DATETIME_SECOND_TZ:
+      case PandasWriter::DATETIME_MILLI_TZ:
+      case PandasWriter::DATETIME_MICRO_TZ:
+      case PandasWriter::DATETIME_NANO_TZ:
+      case PandasWriter::EXTENSION: {
+        auto it = this->singleton_blocks_.find(i);
+        if (it == this->singleton_blocks_.end()) {
+          return Status::KeyError("No block allocated");
+        }
+        *block = it->second;
+      } break;
+      default:
+        auto it = this->blocks_.find(output_type);
+        if (it == this->blocks_.end()) {
+          return Status::KeyError("No block allocated");
+        }
+        *block = it->second;
+        break;
+    }
+    return Status::OK();
+  }
+
+  Status WriteTableToBlocks() {
+    auto WriteColumn = [this](int i) {
+      std::shared_ptr<PandasWriter> block;
+      RETURN_NOT_OK(this->GetWriter(i, &block));
+      // ARROW-3789 Use std::move on the array to permit self-destructing
+      return block->Write(std::move(arrays_[i]), i, this->column_block_placement_[i]);
+    };
+
+    return OptionalParallelFor(options_.use_threads, num_columns_, WriteColumn);
+  }
+
+ private:
+  // column num -> block type id
+  std::vector<PandasWriter::type> column_types_;
+
+  // block type -> type count
+  std::unordered_map<int, int> block_sizes_;
+  std::unordered_map<int, const DataType*> block_types_;
+
+  // block type -> block
+  WriterMap blocks_;
+
+  WriterMap singleton_blocks_;
+};
+
+/// \brief Create blocks for pandas.DataFrame block manager using one block per
+/// column strategy. This permits some zero-copy optimizations as well as the
+/// ability for the table to "self-destruct" if selected by the user.
+class SplitBlockCreator : public PandasBlockCreator {
+ public:
+  using PandasBlockCreator::PandasBlockCreator;
+
+  Status GetWriter(int i, std::shared_ptr<PandasWriter>* writer) {
+    PandasWriter::type output_type = PandasWriter::OBJECT;
+    const DataType& type = *arrays_[i]->type();
+    if (options_.extension_columns.count(fields_[i]->name())) {
+      output_type = PandasWriter::EXTENSION;
+    } else {
+      // Null count needed to determine output type
+      RETURN_NOT_OK(GetPandasWriterType(*arrays_[i], options_, &output_type));
+    }
+    return MakeWriter(this->options_, output_type, type, num_rows_, 1, writer);
+  }
+
+  Status Convert(PyObject** out) override {
+    PyAcquireGIL lock;
+
+    PyObject* result = PyList_New(0);
+    RETURN_IF_PYERROR();
+
+    for (int i = 0; i < num_columns_; ++i) {
+      std::shared_ptr<PandasWriter> writer;
+      RETURN_NOT_OK(GetWriter(i, &writer));
+      // ARROW-3789 Use std::move on the array to permit self-destructing
+      RETURN_NOT_OK(writer->Write(std::move(arrays_[i]), i, /*rel_placement=*/0));
+
+      PyObject* item;
+      RETURN_NOT_OK(writer->GetDataFrameResult(&item));
+      if (PyList_Append(result, item) < 0) {
+        RETURN_IF_PYERROR();
+      }
+      // PyList_Append increments object refcount
+      Py_DECREF(item);
+    }
+
+    *out = result;
+    return Status::OK();
+  }
+
+ private:
+  std::vector<std::shared_ptr<PandasWriter>> writers_;
+};
+
+Status ConvertCategoricals(const PandasOptions& options, ChunkedArrayVector* arrays,
+                           FieldVector* fields) {
+  std::vector<int> columns_to_encode;
+
+  // For Categorical conversions
+  auto EncodeColumn = [&](int j) {
+    int i = columns_to_encode[j];
+    if (options.zero_copy_only) {
+      return Status::Invalid("Need to dictionary encode a column, but ",
+                             "only zero-copy conversions allowed");
+    }
+    compute::ExecContext ctx(options.pool);
+    ARROW_ASSIGN_OR_RAISE(
+        Datum out, DictionaryEncode((*arrays)[i],
+                                    compute::DictionaryEncodeOptions::Defaults(), &ctx));
+    (*arrays)[i] = out.chunked_array();
+    (*fields)[i] = (*fields)[i]->WithType((*arrays)[i]->type());
+    return Status::OK();
+  };
+
+  if (!options.categorical_columns.empty()) {
+    for (int i = 0; i < static_cast<int>(arrays->size()); i++) {
+      if ((*arrays)[i]->type()->id() != Type::DICTIONARY &&
+          options.categorical_columns.count((*fields)[i]->name())) {
+        columns_to_encode.push_back(i);
+      }
+    }
+  }
+  if (options.strings_to_categorical) {
+    for (int i = 0; i < static_cast<int>(arrays->size()); i++) {
+      if (is_base_binary_like((*arrays)[i]->type()->id())) {
+        columns_to_encode.push_back(i);
+      }
+    }
+  }
+  return OptionalParallelFor(options.use_threads,
+                             static_cast<int>(columns_to_encode.size()), EncodeColumn);
+}
+
+}  // namespace
+
+Status ConvertArrayToPandas(const PandasOptions& options, std::shared_ptr<Array> arr,
+                            PyObject* py_ref, PyObject** out) {
+  return ConvertChunkedArrayToPandas(
+      options, std::make_shared<ChunkedArray>(std::move(arr)), py_ref, out);
+}
+
+Status ConvertChunkedArrayToPandas(const PandasOptions& options,
+                                   std::shared_ptr<ChunkedArray> arr, PyObject* py_ref,
+                                   PyObject** out) {
+  if (options.decode_dictionaries && arr->type()->id() == Type::DICTIONARY) {
+    const auto& dense_type =
+        checked_cast<const DictionaryType&>(*arr->type()).value_type();
+    RETURN_NOT_OK(DecodeDictionaries(options.pool, dense_type, &arr));
+    DCHECK_NE(arr->type()->id(), Type::DICTIONARY);
+
+    // The original Python DictionaryArray won't own the memory anymore
+    // as we actually built a new array when we decoded the DictionaryArray
+    // thus let the final resulting numpy array own the memory through a Capsule
+    py_ref = nullptr;
+  }
+
+  if (options.strings_to_categorical && is_base_binary_like(arr->type()->id())) {
+    if (options.zero_copy_only) {
+      return Status::Invalid("Need to dictionary encode a column, but ",
+                             "only zero-copy conversions allowed");
+    }
+    compute::ExecContext ctx(options.pool);
+    ARROW_ASSIGN_OR_RAISE(
+        Datum out,
+        DictionaryEncode(arr, compute::DictionaryEncodeOptions::Defaults(), &ctx));
+    arr = out.chunked_array();
+  }
+
+  PandasOptions modified_options = options;
+  modified_options.strings_to_categorical = false;
+
+  // ARROW-7596: We permit the hybrid Series/DataFrame code path to do zero copy
+  // optimizations that we do not allow in the default case when converting
+  // Table->DataFrame
+  modified_options.allow_zero_copy_blocks = true;
+
+  // In case of an extension array default to the storage type
+  if (arr->type()->id() == Type::EXTENSION) {
+    arr = GetStorageChunkedArray(arr);
+  }
+
+  PandasWriter::type output_type;
+  RETURN_NOT_OK(GetPandasWriterType(*arr, modified_options, &output_type));
+  if (options.decode_dictionaries) {
+    DCHECK_NE(output_type, PandasWriter::CATEGORICAL);
+  }
+
+  std::shared_ptr<PandasWriter> writer;
+  RETURN_NOT_OK(MakeWriter(modified_options, output_type, *arr->type(), arr->length(),
+                           /*num_columns=*/1, &writer));
+  RETURN_NOT_OK(writer->TransferSingle(std::move(arr), py_ref));
+  return writer->GetSeriesResult(out);
+}
+
+Status ConvertTableToPandas(const PandasOptions& options, std::shared_ptr<Table> table,
+                            PyObject** out) {
+  ChunkedArrayVector arrays = table->columns();
+  FieldVector fields = table->fields();
+
+  // ARROW-3789: allow "self-destructing" by releasing references to columns as
+  // we convert them to pandas
+  table = nullptr;
+
+  RETURN_NOT_OK(ConvertCategoricals(options, &arrays, &fields));
+
+  PandasOptions modified_options = options;
+  modified_options.strings_to_categorical = false;
+  modified_options.categorical_columns.clear();
+
+  if (options.split_blocks) {
+    modified_options.allow_zero_copy_blocks = true;
+    SplitBlockCreator helper(modified_options, std::move(fields), std::move(arrays));
+    return helper.Convert(out);
+  } else {
+    ConsolidatedBlockCreator helper(modified_options, std::move(fields),
+                                    std::move(arrays));
+    return helper.Convert(out);
+  }
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.h b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.h
index 6570364..82e0a60 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_pandas.h
@@ -41,6 +41,13 @@ class Table;
 
 namespace py {
 
+enum class MapConversionType {
+  DEFAULT,  // convert arrow maps to assoc lists (list of kev-value tuples) in Pandas
+  LOSSY,    // report warnings when lossiness is encountered due to duplicate keys
+  STRICT_,  // raise a Python exception when lossiness is encountered due to duplicate
+            // keys
+};
+
 struct PandasOptions {
   /// arrow::MemoryPool to use for memory allocations
   MemoryPool* pool = default_memory_pool();
@@ -90,6 +97,17 @@ struct PandasOptions {
   /// conversions
   bool self_destruct = false;
 
+  /// \brief The default behavior (DEFAULT), is to convert Arrow Map arrays to
+  /// Python association lists (list-of-tuples) in the same order as the Arrow
+  /// Map, as in [(key1, value1), (key2, value2), ...]
+  /// If LOSSY or STRICT, convert Arrow Map arrays to native Python dicts.
+  /// This can change the ordering of (key, value) pairs, and will deduplicate
+  /// multiple keys, resulting in a possible loss of data.
+  /// If 'lossy', this key deduplication results in a warning printed
+  /// when detected. If 'strict', this instead results in an exception
+  /// being raised when detected.
+  MapConversionType maps_as_pydicts = MapConversionType::DEFAULT;
+
   // Used internally for nested arrays.
   bool decode_dictionaries = false;
 
@@ -99,6 +117,10 @@ struct PandasOptions {
   // Columns that should be passed through to be converted to
   // ExtensionArray/Block
   std::unordered_set<std::string> extension_columns;
+
+  // Used internally to decipher between to_numpy() and to_pandas() when
+  // the expected output differs
+  bool to_numpy = false;
 };
 
 ARROW_PYTHON_EXPORT
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_python_internal.h b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_python_internal.h
new file mode 100644
index 0000000..514cda3
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/arrow_to_python_internal.h
@@ -0,0 +1,49 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/array.h"
+#include "arrow/python/platform.h"
+
+namespace arrow {
+namespace py {
+namespace internal {
+// TODO(ARROW-12976):  See if we can refactor Pandas ObjectWriter logic
+// to the .cc file and move this there as well if we can.
+
+// Converts array to a sequency of python objects.
+template <typename ArrayType, typename WriteValue, typename Assigner>
+inline Status WriteArrayObjects(const ArrayType& arr, WriteValue&& write_func,
+                                Assigner out_values) {
+  // TODO(ARROW-12976): Use visitor here?
+  const bool has_nulls = arr.null_count() > 0;
+  for (int64_t i = 0; i < arr.length(); ++i) {
+    if (has_nulls && arr.IsNull(i)) {
+      Py_INCREF(Py_None);
+      *out_values = Py_None;
+    } else {
+      RETURN_NOT_OK(write_func(arr.GetView(i), out_values));
+    }
+    ++out_values;
+  }
+  return Status::OK();
+}
+
+}  // namespace internal
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/benchmark.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/benchmark.cc
new file mode 100644
index 0000000..6dcc959
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/benchmark.cc
@@ -0,0 +1,38 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/benchmark.h"
+#include "arrow/python/helpers.h"
+
+namespace arrow {
+namespace py {
+namespace benchmark {
+
+void Benchmark_PandasObjectIsNull(PyObject* list) {
+  if (!PyList_CheckExact(list)) {
+    PyErr_SetString(PyExc_TypeError, "expected a list");
+    return;
+  }
+  Py_ssize_t i, n = PyList_GET_SIZE(list);
+  for (i = 0; i < n; i++) {
+    internal::PandasObjectIsNull(PyList_GET_ITEM(list, i));
+  }
+}
+
+}  // namespace benchmark
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/common.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/common.cc
new file mode 100644
index 0000000..6fe2ed4
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/common.cc
@@ -0,0 +1,203 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/common.h"
+
+#include <cstdlib>
+#include <mutex>
+#include <string>
+
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/helpers.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace py {
+
+static std::mutex memory_pool_mutex;
+static MemoryPool* default_python_pool = nullptr;
+
+void set_default_memory_pool(MemoryPool* pool) {
+  std::lock_guard<std::mutex> guard(memory_pool_mutex);
+  default_python_pool = pool;
+}
+
+MemoryPool* get_memory_pool() {
+  std::lock_guard<std::mutex> guard(memory_pool_mutex);
+  if (default_python_pool) {
+    return default_python_pool;
+  } else {
+    return default_memory_pool();
+  }
+}
+
+// ----------------------------------------------------------------------
+// PythonErrorDetail
+
+namespace {
+
+const char kErrorDetailTypeId[] = "arrow::py::PythonErrorDetail";
+
+// Try to match the Python exception type with an appropriate Status code
+StatusCode MapPyError(PyObject* exc_type) {
+  StatusCode code;
+
+  if (PyErr_GivenExceptionMatches(exc_type, PyExc_MemoryError)) {
+    code = StatusCode::OutOfMemory;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_IndexError)) {
+    code = StatusCode::IndexError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_KeyError)) {
+    code = StatusCode::KeyError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_TypeError)) {
+    code = StatusCode::TypeError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_ValueError) ||
+             PyErr_GivenExceptionMatches(exc_type, PyExc_OverflowError)) {
+    code = StatusCode::Invalid;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_EnvironmentError)) {
+    code = StatusCode::IOError;
+  } else if (PyErr_GivenExceptionMatches(exc_type, PyExc_NotImplementedError)) {
+    code = StatusCode::NotImplemented;
+  } else {
+    code = StatusCode::UnknownError;
+  }
+  return code;
+}
+
+// PythonErrorDetail indicates a Python exception was raised.
+class PythonErrorDetail : public StatusDetail {
+ public:
+  const char* type_id() const override { return kErrorDetailTypeId; }
+
+  std::string ToString() const override {
+    // This is simple enough not to need the GIL
+    const auto ty = reinterpret_cast<const PyTypeObject*>(exc_type_.obj());
+    // XXX Should we also print traceback?
+    return std::string("Python exception: ") + ty->tp_name;
+  }
+
+  void RestorePyError() const {
+    Py_INCREF(exc_type_.obj());
+    Py_INCREF(exc_value_.obj());
+    Py_INCREF(exc_traceback_.obj());
+    PyErr_Restore(exc_type_.obj(), exc_value_.obj(), exc_traceback_.obj());
+  }
+
+  PyObject* exc_type() const { return exc_type_.obj(); }
+
+  PyObject* exc_value() const { return exc_value_.obj(); }
+
+  static std::shared_ptr<PythonErrorDetail> FromPyError() {
+    PyObject* exc_type = nullptr;
+    PyObject* exc_value = nullptr;
+    PyObject* exc_traceback = nullptr;
+
+    PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+    PyErr_NormalizeException(&exc_type, &exc_value, &exc_traceback);
+    ARROW_CHECK(exc_type)
+        << "PythonErrorDetail::FromPyError called without a Python error set";
+    DCHECK(PyType_Check(exc_type));
+    DCHECK(exc_value);  // Ensured by PyErr_NormalizeException, double-check
+    if (exc_traceback == nullptr) {
+      // Needed by PyErr_Restore()
+      Py_INCREF(Py_None);
+      exc_traceback = Py_None;
+    }
+
+    std::shared_ptr<PythonErrorDetail> detail(new PythonErrorDetail);
+    detail->exc_type_.reset(exc_type);
+    detail->exc_value_.reset(exc_value);
+    detail->exc_traceback_.reset(exc_traceback);
+    return detail;
+  }
+
+ protected:
+  PythonErrorDetail() = default;
+
+  OwnedRefNoGIL exc_type_, exc_value_, exc_traceback_;
+};
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Python exception <-> Status
+
+Status ConvertPyError(StatusCode code) {
+  auto detail = PythonErrorDetail::FromPyError();
+  if (code == StatusCode::UnknownError) {
+    code = MapPyError(detail->exc_type());
+  }
+
+  std::string message;
+  RETURN_NOT_OK(internal::PyObject_StdStringStr(detail->exc_value(), &message));
+  return Status(code, message, detail);
+}
+
+bool IsPyError(const Status& status) {
+  if (status.ok()) {
+    return false;
+  }
+  auto detail = status.detail();
+  bool result = detail != nullptr && detail->type_id() == kErrorDetailTypeId;
+  return result;
+}
+
+void RestorePyError(const Status& status) {
+  ARROW_CHECK(IsPyError(status));
+  const auto& detail = checked_cast<const PythonErrorDetail&>(*status.detail());
+  detail.RestorePyError();
+}
+
+// ----------------------------------------------------------------------
+// PyBuffer
+
+PyBuffer::PyBuffer() : Buffer(nullptr, 0) {}
+
+Status PyBuffer::Init(PyObject* obj) {
+  if (!PyObject_GetBuffer(obj, &py_buf_, PyBUF_ANY_CONTIGUOUS)) {
+    data_ = reinterpret_cast<const uint8_t*>(py_buf_.buf);
+    ARROW_CHECK_NE(data_, nullptr) << "Null pointer in Py_buffer";
+    size_ = py_buf_.len;
+    capacity_ = py_buf_.len;
+    is_mutable_ = !py_buf_.readonly;
+    return Status::OK();
+  } else {
+    return ConvertPyError(StatusCode::Invalid);
+  }
+}
+
+Result<std::shared_ptr<Buffer>> PyBuffer::FromPyObject(PyObject* obj) {
+  PyBuffer* buf = new PyBuffer();
+  std::shared_ptr<Buffer> res(buf);
+  RETURN_NOT_OK(buf->Init(obj));
+  return res;
+}
+
+PyBuffer::~PyBuffer() {
+  if (data_ != nullptr) {
+    PyAcquireGIL lock;
+    PyBuffer_Release(&py_buf_);
+  }
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/csv.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/csv.cc
new file mode 100644
index 0000000..1df3a94
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/csv.cc
@@ -0,0 +1,62 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "csv.h"
+
+#include <memory>
+
+#include "arrow/python/common.h"
+
+namespace arrow {
+
+using csv::InvalidRow;
+using csv::InvalidRowHandler;
+using csv::InvalidRowResult;
+
+namespace py {
+namespace csv {
+
+InvalidRowHandler MakeInvalidRowHandler(PyInvalidRowCallback cb, PyObject* py_handler) {
+  if (cb == nullptr) {
+    return InvalidRowHandler{};
+  }
+
+  struct Handler {
+    PyInvalidRowCallback cb;
+    std::shared_ptr<OwnedRefNoGIL> handler_ref;
+
+    InvalidRowResult operator()(const InvalidRow& invalid_row) {
+      InvalidRowResult result;
+      auto st = SafeCallIntoPython([&]() -> Status {
+        result = cb(handler_ref->obj(), invalid_row);
+        if (PyErr_Occurred()) {
+          PyErr_WriteUnraisable(handler_ref->obj());
+        }
+        return Status::OK();
+      });
+      ARROW_UNUSED(st);
+      return result;
+    }
+  };
+
+  Py_INCREF(py_handler);
+  return Handler{cb, std::make_shared<OwnedRefNoGIL>(py_handler)};
+}
+
+}  // namespace csv
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/csv.h b/src/vendored/apache-arrow-12.0.1/arrow/python/csv.h
index 2295c49..34302e9 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/csv.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/csv.h
@@ -23,8 +23,8 @@
 #include <vector>
 
 #include "arrow/csv/options.h"
-#include "arrow/util/macros.h"
 #include "arrow/python/common.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 namespace py {
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.cc
new file mode 100644
index 0000000..0e817dd
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.cc
@@ -0,0 +1,663 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#include "datetime.h"
+
+#include <algorithm>
+#include <chrono>
+#include <iomanip>
+#include <regex>
+#include <string_view>
+
+#include "arrow/array.h"
+#include "arrow/python/arrow_to_python_internal.h"
+#include "arrow/python/common.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/platform.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/regex.h"
+#include "arrow/util/value_parsing.h"
+
+namespace arrow {
+
+using internal::RegexMatch;
+
+namespace py {
+namespace internal {
+
+namespace {
+
+bool MatchFixedOffset(const std::string& tz, std::string_view* sign,
+                      std::string_view* hour, std::string_view* minute) {
+  static const std::regex regex("^([+-])(0[0-9]|1[0-9]|2[0-3]):([0-5][0-9])$");
+  if (tz.size() < 5) {
+    return false;
+  }
+  return RegexMatch(regex, tz, {sign, hour, minute});
+}
+
+constexpr char* NonConst(const char* st) {
+  // Hack for python versions < 3.7 where members of PyStruct members
+  // where non-const (C++ doesn't like assigning string literals to these types)
+  return const_cast<char*>(st);
+}
+
+static PyTypeObject MonthDayNanoTupleType = {};
+
+static PyStructSequence_Field MonthDayNanoField[] = {
+    {NonConst("months"), NonConst("The number of months in the interval")},
+    {NonConst("days"), NonConst("The number days in the interval")},
+    {NonConst("nanoseconds"), NonConst("The number of nanoseconds in the interval")},
+    {nullptr, nullptr}};
+
+static PyStructSequence_Desc MonthDayNanoTupleDesc = {
+    NonConst("MonthDayNano"),
+    NonConst("A calendar interval consisting of months, days and nanoseconds."),
+    MonthDayNanoField,
+    /*n_in_sequence=*/3};
+
+}  // namespace
+
+#ifndef PYPY_VERSION
+PyDateTime_CAPI* datetime_api = nullptr;
+
+void InitDatetime() {
+  PyAcquireGIL lock;
+  datetime_api =
+      reinterpret_cast<PyDateTime_CAPI*>(PyCapsule_Import(PyDateTime_CAPSULE_NAME, 0));
+  if (datetime_api == nullptr) {
+    Py_FatalError("Could not import datetime C API");
+  }
+}
+#endif
+
+// The following code is adapted from
+// https://github.com/numpy/numpy/blob/main/numpy/core/src/multiarray/datetime.c
+
+// Days per month, regular year and leap year
+static int64_t _days_per_month_table[2][12] = {
+    {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
+    {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
+
+static bool is_leapyear(int64_t year) {
+  return (year & 0x3) == 0 &&  // year % 4 == 0
+         ((year % 100) != 0 || (year % 400) == 0);
+}
+
+// Calculates the days offset from the 1970 epoch.
+static int64_t get_days_from_date(int64_t date_year, int64_t date_month,
+                                  int64_t date_day) {
+  int64_t i, month;
+  int64_t year, days = 0;
+  int64_t* month_lengths;
+
+  year = date_year - 1970;
+  days = year * 365;
+
+  // Adjust for leap years
+  if (days >= 0) {
+    // 1968 is the closest leap year before 1970.
+    // Exclude the current year, so add 1.
+    year += 1;
+    // Add one day for each 4 years
+    days += year / 4;
+    // 1900 is the closest previous year divisible by 100
+    year += 68;
+    // Subtract one day for each 100 years
+    days -= year / 100;
+    // 1600 is the closest previous year divisible by 400
+    year += 300;
+    // Add one day for each 400 years
+    days += year / 400;
+  } else {
+    // 1972 is the closest later year after 1970.
+    // Include the current year, so subtract 2.
+    year -= 2;
+    // Subtract one day for each 4 years
+    days += year / 4;
+    // 2000 is the closest later year divisible by 100
+    year -= 28;
+    // Add one day for each 100 years
+    days -= year / 100;
+    // 2000 is also the closest later year divisible by 400
+    // Subtract one day for each 400 years
+    days += year / 400;
+  }
+
+  month_lengths = _days_per_month_table[is_leapyear(date_year)];
+  month = date_month - 1;
+
+  // Add the months
+  for (i = 0; i < month; ++i) {
+    days += month_lengths[i];
+  }
+
+  // Add the days
+  days += date_day - 1;
+
+  return days;
+}
+
+// Modifies '*days_' to be the day offset within the year,
+// and returns the year.
+static int64_t days_to_yearsdays(int64_t* days_) {
+  const int64_t days_per_400years = (400 * 365 + 100 - 4 + 1);
+  // Adjust so it's relative to the year 2000 (divisible by 400)
+  int64_t days = (*days_) - (365 * 30 + 7);
+  int64_t year;
+
+  // Break down the 400 year cycle to get the year and day within the year
+  if (days >= 0) {
+    year = 400 * (days / days_per_400years);
+    days = days % days_per_400years;
+  } else {
+    year = 400 * ((days - (days_per_400years - 1)) / days_per_400years);
+    days = days % days_per_400years;
+    if (days < 0) {
+      days += days_per_400years;
+    }
+  }
+
+  // Work out the year/day within the 400 year cycle
+  if (days >= 366) {
+    year += 100 * ((days - 1) / (100 * 365 + 25 - 1));
+    days = (days - 1) % (100 * 365 + 25 - 1);
+    if (days >= 365) {
+      year += 4 * ((days + 1) / (4 * 365 + 1));
+      days = (days + 1) % (4 * 365 + 1);
+      if (days >= 366) {
+        year += (days - 1) / 365;
+        days = (days - 1) % 365;
+      }
+    }
+  }
+
+  *days_ = days;
+  return year + 2000;
+}
+
+// Extracts the month and year and day number from a number of days
+static void get_date_from_days(int64_t days, int64_t* date_year, int64_t* date_month,
+                               int64_t* date_day) {
+  int64_t *month_lengths, i;
+
+  *date_year = days_to_yearsdays(&days);
+  month_lengths = _days_per_month_table[is_leapyear(*date_year)];
+
+  for (i = 0; i < 12; ++i) {
+    if (days < month_lengths[i]) {
+      *date_month = i + 1;
+      *date_day = days + 1;
+      return;
+    } else {
+      days -= month_lengths[i];
+    }
+  }
+
+  // Should never get here
+  return;
+}
+
+// Splitting time quantities, for example splitting total seconds into
+// minutes and remaining seconds. After we run
+// int64_t remaining = split_time(total, quotient, &next)
+// we have
+// total = next * quotient + remaining. Handles negative values by propagating
+// them: If total is negative, next will be negative and remaining will
+// always be non-negative.
+static inline int64_t split_time(int64_t total, int64_t quotient, int64_t* next) {
+  int64_t r = total % quotient;
+  if (r < 0) {
+    *next = total / quotient - 1;
+    return r + quotient;
+  } else {
+    *next = total / quotient;
+    return r;
+  }
+}
+
+static inline Status PyTime_convert_int(int64_t val, const TimeUnit::type unit,
+                                        int64_t* hour, int64_t* minute, int64_t* second,
+                                        int64_t* microsecond) {
+  switch (unit) {
+    case TimeUnit::NANO:
+      if (val % 1000 != 0) {
+        return Status::Invalid("Value ", val, " has non-zero nanoseconds");
+      }
+      val /= 1000;
+    // fall through
+    case TimeUnit::MICRO:
+      *microsecond = split_time(val, 1000000LL, &val);
+      *second = split_time(val, 60, &val);
+      *minute = split_time(val, 60, hour);
+      break;
+    case TimeUnit::MILLI:
+      *microsecond = split_time(val, 1000, &val) * 1000;
+    // fall through
+    case TimeUnit::SECOND:
+      *second = split_time(val, 60, &val);
+      *minute = split_time(val, 60, hour);
+      break;
+    default:
+      break;
+  }
+  return Status::OK();
+}
+
+static inline Status PyDate_convert_int(int64_t val, const DateUnit unit, int64_t* year,
+                                        int64_t* month, int64_t* day) {
+  switch (unit) {
+    case DateUnit::MILLI:
+      val /= 86400000LL;  // fall through
+    case DateUnit::DAY:
+      get_date_from_days(val, year, month, day);
+    default:
+      break;
+  }
+  return Status::OK();
+}
+
+PyObject* NewMonthDayNanoTupleType() {
+  if (MonthDayNanoTupleType.tp_name == nullptr) {
+    if (PyStructSequence_InitType2(&MonthDayNanoTupleType, &MonthDayNanoTupleDesc) != 0) {
+      Py_FatalError("Could not initialize MonthDayNanoTuple");
+    }
+  }
+  Py_INCREF(&MonthDayNanoTupleType);
+  return (PyObject*)&MonthDayNanoTupleType;
+}
+
+Status PyTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out) {
+  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
+  *out = PyTime_FromTime(static_cast<int32_t>(hour), static_cast<int32_t>(minute),
+                         static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
+  return Status::OK();
+}
+
+Status PyDate_from_int(int64_t val, const DateUnit unit, PyObject** out) {
+  int64_t year = 0, month = 0, day = 0;
+  RETURN_NOT_OK(PyDate_convert_int(val, unit, &year, &month, &day));
+  *out = PyDate_FromDate(static_cast<int32_t>(year), static_cast<int32_t>(month),
+                         static_cast<int32_t>(day));
+  return Status::OK();
+}
+
+Status PyDateTime_from_int(int64_t val, const TimeUnit::type unit, PyObject** out) {
+  int64_t hour = 0, minute = 0, second = 0, microsecond = 0;
+  RETURN_NOT_OK(PyTime_convert_int(val, unit, &hour, &minute, &second, &microsecond));
+  int64_t total_days = 0;
+  hour = split_time(hour, 24, &total_days);
+  int64_t year = 0, month = 0, day = 0;
+  get_date_from_days(total_days, &year, &month, &day);
+  *out = PyDateTime_FromDateAndTime(
+      static_cast<int32_t>(year), static_cast<int32_t>(month), static_cast<int32_t>(day),
+      static_cast<int32_t>(hour), static_cast<int32_t>(minute),
+      static_cast<int32_t>(second), static_cast<int32_t>(microsecond));
+  return Status::OK();
+}
+
+int64_t PyDate_to_days(PyDateTime_Date* pydate) {
+  return get_days_from_date(PyDateTime_GET_YEAR(pydate), PyDateTime_GET_MONTH(pydate),
+                            PyDateTime_GET_DAY(pydate));
+}
+
+Result<int64_t> PyDateTime_utcoffset_s(PyObject* obj) {
+  // calculate offset from UTC timezone in seconds
+  // supports only PyDateTime_DateTime and PyDateTime_Time objects
+  OwnedRef pyoffset(PyObject_CallMethod(obj, "utcoffset", NULL));
+  RETURN_IF_PYERROR();
+  if (pyoffset.obj() != nullptr && pyoffset.obj() != Py_None) {
+    auto delta = reinterpret_cast<PyDateTime_Delta*>(pyoffset.obj());
+    return internal::PyDelta_to_s(delta);
+  } else {
+    return 0;
+  }
+}
+
+Result<std::string> PyTZInfo_utcoffset_hhmm(PyObject* pytzinfo) {
+  // attempt to convert timezone offset objects to "+/-{hh}:{mm}" format
+  OwnedRef pydelta_object(PyObject_CallMethod(pytzinfo, "utcoffset", "O", Py_None));
+  RETURN_IF_PYERROR();
+
+  if (!PyDelta_Check(pydelta_object.obj())) {
+    return Status::Invalid(
+        "Object returned by tzinfo.utcoffset(None) is not an instance of "
+        "datetime.timedelta");
+  }
+  auto pydelta = reinterpret_cast<PyDateTime_Delta*>(pydelta_object.obj());
+
+  // retrieve the offset as seconds
+  auto total_seconds = internal::PyDelta_to_s(pydelta);
+
+  // determine whether the offset is positive or negative
+  auto sign = (total_seconds < 0) ? "-" : "+";
+  total_seconds = abs(total_seconds);
+
+  // calculate offset components
+  int64_t hours, minutes, seconds;
+  seconds = split_time(total_seconds, 60, &minutes);
+  minutes = split_time(minutes, 60, &hours);
+  if (seconds > 0) {
+    // check there are no remaining seconds
+    return Status::Invalid("Offset must represent whole number of minutes");
+  }
+
+  // construct the timezone string
+  std::stringstream stream;
+  stream << sign << std::setfill('0') << std::setw(2) << hours << ":" << std::setfill('0')
+         << std::setw(2) << minutes;
+  return stream.str();
+}
+
+// Converted from python.  See https://github.com/apache/arrow/pull/7604
+// for details.
+Result<PyObject*> StringToTzinfo(const std::string& tz) {
+  std::string_view sign_str, hour_str, minute_str;
+  OwnedRef pytz;
+  OwnedRef zoneinfo;
+  OwnedRef datetime;
+
+  if (internal::ImportModule("pytz", &pytz).ok()) {
+    if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+      int sign = -1;
+      if (sign_str == "+") {
+        sign = 1;
+      }
+      OwnedRef fixed_offset;
+      RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset", &fixed_offset));
+      uint32_t minutes, hours;
+      if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(), &hours) ||
+          !::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(),
+                                            &minutes)) {
+        return Status::Invalid("Invalid timezone: ", tz);
+      }
+      OwnedRef total_minutes(PyLong_FromLong(
+          sign * ((static_cast<int>(hours) * 60) + static_cast<int>(minutes))));
+      RETURN_IF_PYERROR();
+      auto tzinfo =
+          PyObject_CallFunctionObjArgs(fixed_offset.obj(), total_minutes.obj(), NULL);
+      RETURN_IF_PYERROR();
+      return tzinfo;
+    }
+
+    OwnedRef timezone;
+    RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone));
+    OwnedRef py_tz_string(
+        PyUnicode_FromStringAndSize(tz.c_str(), static_cast<Py_ssize_t>(tz.size())));
+    auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(), py_tz_string.obj(), NULL);
+    RETURN_IF_PYERROR();
+    return tzinfo;
+  }
+
+  // catch fixed offset if pytz is not present
+  if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+    RETURN_NOT_OK(internal::ImportModule("datetime", &datetime));
+    int sign = -1;
+    if (sign_str == "+") {
+      sign = 1;
+    }
+
+    // import timezone and timedelta module to create a tzinfo object
+    OwnedRef class_timezone;
+    OwnedRef class_timedelta;
+    RETURN_NOT_OK(
+        internal::ImportFromModule(datetime.obj(), "timezone", &class_timezone));
+    RETURN_NOT_OK(
+        internal::ImportFromModule(datetime.obj(), "timedelta", &class_timedelta));
+
+    // check input
+    uint32_t minutes, hours;
+    if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(), &hours) ||
+        !::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(),
+                                          &minutes)) {
+      return Status::Invalid("Invalid timezone: ", tz);
+    }
+
+    // save offset as a signed integer
+    OwnedRef total_minutes(PyLong_FromLong(
+        sign * ((static_cast<int>(hours) * 60) + static_cast<int>(minutes))));
+    // create zero integers for empty arguments in datetime.timedelta
+    OwnedRef zero(PyLong_FromLong(static_cast<int>(0)));
+
+    // call datetime.timedelta to get correct offset object for datetime.timezone
+    auto offset =
+        PyObject_CallFunctionObjArgs(class_timedelta.obj(), zero.obj(), zero.obj(),
+                                     zero.obj(), zero.obj(), total_minutes.obj(), NULL);
+    RETURN_IF_PYERROR();
+    // call datetime.timezone
+    auto tzinfo = PyObject_CallFunctionObjArgs(class_timezone.obj(), offset, NULL);
+    RETURN_IF_PYERROR();
+    return tzinfo;
+  }
+
+  // fallback on zoneinfo if tz is string and pytz is not present
+  if (internal::ImportModule("zoneinfo", &zoneinfo).ok()) {
+    OwnedRef class_zoneinfo;
+    RETURN_NOT_OK(
+        internal::ImportFromModule(zoneinfo.obj(), "ZoneInfo", &class_zoneinfo));
+    OwnedRef py_tz_string(
+        PyUnicode_FromStringAndSize(tz.c_str(), static_cast<Py_ssize_t>(tz.size())));
+    auto tzinfo =
+        PyObject_CallFunctionObjArgs(class_zoneinfo.obj(), py_tz_string.obj(), NULL);
+    RETURN_IF_PYERROR();
+    return tzinfo;
+  }
+
+  return Status::Invalid(
+      "Pytz package or Python>=3.8 for zoneinfo module must be installed.");
+}
+
+Result<std::string> TzinfoToString(PyObject* tzinfo) {
+  OwnedRef module_pytz;        // import pytz
+  OwnedRef module_datetime;    // import datetime
+  OwnedRef module_zoneinfo;    // import zoneinfo
+  OwnedRef module_dateutil;    // import dateutil
+  OwnedRef class_timezone;     // from datetime import timezone
+  OwnedRef class_fixedoffset;  // from pytz import _FixedOffset
+  OwnedRef class_basetzinfo;   // from pytz import BaseTzInfo
+  OwnedRef class_zoneinfo;     // from zoneinfo import ZoneInfo
+  OwnedRef class_tzfile;       // from zoneinfo import tzfile
+
+  // import necessary modules
+  RETURN_NOT_OK(internal::ImportModule("datetime", &module_datetime));
+  // import necessary classes
+  RETURN_NOT_OK(
+      internal::ImportFromModule(module_datetime.obj(), "timezone", &class_timezone));
+
+  // check that it's a valid tzinfo object
+  if (!PyTZInfo_Check(tzinfo)) {
+    return Status::TypeError("Not an instance of datetime.tzinfo");
+  }
+
+  // if tzinfo is an instance of datetime.timezone return the
+  // HH:MM offset string representation
+  if (PyObject_IsInstance(tzinfo, class_timezone.obj())) {
+    // still recognize datetime.timezone.utc as UTC (instead of +00:00)
+    OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None));
+    RETURN_IF_PYERROR();
+    if (PyUnicode_Check(tzname_object.obj())) {
+      std::string result;
+      RETURN_NOT_OK(internal::PyUnicode_AsStdString(tzname_object.obj(), &result));
+      if (result == "UTC") {
+        return result;
+      }
+    }
+    return PyTZInfo_utcoffset_hhmm(tzinfo);
+  }
+
+  // Try to import pytz if it is available
+  if (internal::ImportModule("pytz", &module_pytz).ok()) {
+    RETURN_NOT_OK(internal::ImportFromModule(module_pytz.obj(), "_FixedOffset",
+                                             &class_fixedoffset));
+    RETURN_NOT_OK(
+        internal::ImportFromModule(module_pytz.obj(), "BaseTzInfo", &class_basetzinfo));
+  }
+
+  // if tzinfo is an instance of pytz._FixedOffset return the
+  // HH:MM offset string representation
+  if (module_pytz.obj() != nullptr &&
+      PyObject_IsInstance(tzinfo, class_fixedoffset.obj())) {
+    OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None));
+    RETURN_IF_PYERROR();
+    return PyTZInfo_utcoffset_hhmm(tzinfo);
+  }
+
+  // if pytz is installed and tzinfo is and instance of pytz.BaseTzInfo
+  if (module_pytz.obj() != nullptr &&
+      PyObject_IsInstance(tzinfo, class_basetzinfo.obj())) {
+    OwnedRef zone(PyObject_GetAttrString(tzinfo, "zone"));
+    RETURN_IF_PYERROR();
+    std::string result;
+    RETURN_NOT_OK(internal::PyUnicode_AsStdString(zone.obj(), &result));
+    return result;
+  }
+
+  // Try to import zoneinfo if it is available
+  if (internal::ImportModule("zoneinfo", &module_zoneinfo).ok()) {
+    RETURN_NOT_OK(
+        internal::ImportFromModule(module_zoneinfo.obj(), "ZoneInfo", &class_zoneinfo));
+  }
+
+  // if zoneinfo is installed and tzinfo is an instance of zoneinfo.ZoneInfo
+  if (module_zoneinfo.obj() != nullptr &&
+      PyObject_IsInstance(tzinfo, class_zoneinfo.obj())) {
+    OwnedRef key(PyObject_GetAttrString(tzinfo, "key"));
+    RETURN_IF_PYERROR();
+    std::string result;
+    RETURN_NOT_OK(internal::PyUnicode_AsStdString(key.obj(), &result));
+    return result;
+  }
+
+  // Try to import dateutil if it is available
+  if (internal::ImportModule("dateutil.tz", &module_dateutil).ok()) {
+    RETURN_NOT_OK(
+        internal::ImportFromModule(module_dateutil.obj(), "tzfile", &class_tzfile));
+  }
+
+  // if dateutil is installed and tzinfo is an instance of dateutil.tz.tzfile
+  if (module_dateutil.obj() != nullptr &&
+      PyObject_IsInstance(tzinfo, class_tzfile.obj())) {
+    OwnedRef _filename(PyObject_GetAttrString(tzinfo, "_filename"));
+    RETURN_IF_PYERROR();
+    std::string result;
+    RETURN_NOT_OK(internal::PyUnicode_AsStdString(_filename.obj(), &result));
+    // _filename returns a full path in general ('/usr/share/zoneinfo/Europe/Paris')
+    // or POSIX name on Windows ('Europe/Paris') - we need a substring in first case
+    std::size_t pos = result.find("zoneinfo/");
+    if (pos != std::string::npos) {
+      return result.substr(pos + 9);
+    }
+    return result;
+  }
+
+  // attempt to call tzinfo.tzname(None)
+  OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O", Py_None));
+  RETURN_IF_PYERROR();
+  if (PyUnicode_Check(tzname_object.obj())) {
+    std::string result;
+    RETURN_NOT_OK(internal::PyUnicode_AsStdString(tzname_object.obj(), &result));
+    return result;
+  }
+
+  // fall back to HH:MM offset string representation based on tzinfo.utcoffset(None)
+  return PyTZInfo_utcoffset_hhmm(tzinfo);
+}
+
+PyObject* MonthDayNanoIntervalToNamedTuple(
+    const MonthDayNanoIntervalType::MonthDayNanos& interval) {
+  OwnedRef tuple(PyStructSequence_New(&MonthDayNanoTupleType));
+  if (ARROW_PREDICT_FALSE(tuple.obj() == nullptr)) {
+    return nullptr;
+  }
+  PyStructSequence_SetItem(tuple.obj(), /*pos=*/0, PyLong_FromLong(interval.months));
+  PyStructSequence_SetItem(tuple.obj(), /*pos=*/1, PyLong_FromLong(interval.days));
+  PyStructSequence_SetItem(tuple.obj(), /*pos=*/2,
+                           PyLong_FromLongLong(interval.nanoseconds));
+  return tuple.detach();
+}
+
+namespace {
+
+// Wrapper around a Python list object that mimics dereference and assignment
+// operations.
+struct PyListAssigner {
+ public:
+  explicit PyListAssigner(PyObject* list) : list_(list) { DCHECK(PyList_Check(list_)); }
+
+  PyListAssigner& operator*() { return *this; }
+
+  void operator=(PyObject* obj) {
+    if (ARROW_PREDICT_FALSE(PyList_SetItem(list_, current_index_, obj) == -1)) {
+      Py_FatalError("list did not have the correct preallocated size.");
+    }
+  }
+
+  PyListAssigner& operator++() {
+    current_index_++;
+    return *this;
+  }
+
+  PyListAssigner& operator+=(int64_t offset) {
+    current_index_ += offset;
+    return *this;
+  }
+
+ private:
+  PyObject* list_;
+  int64_t current_index_ = 0;
+};
+
+}  // namespace
+
+Result<PyObject*> MonthDayNanoIntervalArrayToPyList(
+    const MonthDayNanoIntervalArray& array) {
+  OwnedRef out_list(PyList_New(array.length()));
+  RETURN_IF_PYERROR();
+  PyListAssigner out_objects(out_list.obj());
+  auto& interval_array =
+      arrow::internal::checked_cast<const MonthDayNanoIntervalArray&>(array);
+  RETURN_NOT_OK(internal::WriteArrayObjects(
+      interval_array,
+      [&](const MonthDayNanoIntervalType::MonthDayNanos& interval, PyListAssigner& out) {
+        PyObject* tuple = internal::MonthDayNanoIntervalToNamedTuple(interval);
+        if (ARROW_PREDICT_FALSE(tuple == nullptr)) {
+          RETURN_IF_PYERROR();
+        }
+
+        *out = tuple;
+        return Status::OK();
+      },
+      out_objects));
+  return out_list.detach();
+}
+
+Result<PyObject*> MonthDayNanoIntervalScalarToPyObject(
+    const MonthDayNanoIntervalScalar& scalar) {
+  if (scalar.is_valid) {
+    return internal::MonthDayNanoIntervalToNamedTuple(scalar.value);
+  } else {
+    Py_INCREF(Py_None);
+    return Py_None;
+  }
+}
+
+}  // namespace internal
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.h b/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.h
index a5cca55..327a61f 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/datetime.h
@@ -20,14 +20,14 @@
 #include <algorithm>
 #include <chrono>
 
+#include "arrow/python/platform.h"
+#include "arrow/python/visibility.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
 #include "arrow/type.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/int_util_overflow.h"
 #include "arrow/util/logging.h"
-#include "arrow/python/platform.h"
-#include "arrow/python/visibility.h"
 
 // By default, PyDateTimeAPI is a *static* variable.  This forces
 // PyDateTime_IMPORT to be called in every C/C++ module using the
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/decimal.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/decimal.cc
new file mode 100644
index 0000000..0c00fcf
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/decimal.cc
@@ -0,0 +1,246 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <limits>
+
+#include "arrow/python/common.h"
+#include "arrow/python/decimal.h"
+#include "arrow/python/helpers.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+namespace py {
+namespace internal {
+
+Status ImportDecimalType(OwnedRef* decimal_type) {
+  OwnedRef decimal_module;
+  RETURN_NOT_OK(ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(ImportFromModule(decimal_module.obj(), "Decimal", decimal_type));
+  return Status::OK();
+}
+
+Status PythonDecimalToString(PyObject* python_decimal, std::string* out) {
+  // Call Python's str(decimal_object)
+  return PyObject_StdStringStr(python_decimal, out);
+}
+
+// \brief Infer the precision and scale of a Python decimal.Decimal instance
+// \param python_decimal[in] An instance of decimal.Decimal
+// \param precision[out] The value of the inferred precision
+// \param scale[out] The value of the inferred scale
+// \return The status of the operation
+static Status InferDecimalPrecisionAndScale(PyObject* python_decimal, int32_t* precision,
+                                            int32_t* scale) {
+  DCHECK_NE(python_decimal, NULLPTR);
+  DCHECK_NE(precision, NULLPTR);
+  DCHECK_NE(scale, NULLPTR);
+
+  // TODO(phillipc): Make sure we perform PyDecimal_Check(python_decimal) as a DCHECK
+  OwnedRef as_tuple(PyObject_CallMethod(python_decimal, const_cast<char*>("as_tuple"),
+                                        const_cast<char*>("")));
+  RETURN_IF_PYERROR();
+  DCHECK(PyTuple_Check(as_tuple.obj()));
+
+  OwnedRef digits(PyObject_GetAttrString(as_tuple.obj(), "digits"));
+  RETURN_IF_PYERROR();
+  DCHECK(PyTuple_Check(digits.obj()));
+
+  const auto num_digits = static_cast<int32_t>(PyTuple_Size(digits.obj()));
+  RETURN_IF_PYERROR();
+
+  OwnedRef py_exponent(PyObject_GetAttrString(as_tuple.obj(), "exponent"));
+  RETURN_IF_PYERROR();
+  DCHECK(IsPyInteger(py_exponent.obj()));
+
+  const auto exponent = static_cast<int32_t>(PyLong_AsLong(py_exponent.obj()));
+  RETURN_IF_PYERROR();
+
+  if (exponent < 0) {
+    // If exponent > num_digits, we have a number with leading zeros
+    // such as 0.01234.  Ensure we have enough precision for leading zeros
+    // (which are not included in num_digits).
+    *precision = std::max(num_digits, -exponent);
+    *scale = -exponent;
+  } else {
+    // Trailing zeros are not included in num_digits, need to add to precision.
+    // Note we don't generate negative scales as they are poorly supported
+    // in non-Arrow systems.
+    *precision = num_digits + exponent;
+    *scale = 0;
+  }
+  return Status::OK();
+}
+
+PyObject* DecimalFromString(PyObject* decimal_constructor,
+                            const std::string& decimal_string) {
+  DCHECK_NE(decimal_constructor, nullptr);
+
+  auto string_size = decimal_string.size();
+  DCHECK_GT(string_size, 0);
+
+  auto string_bytes = decimal_string.c_str();
+  DCHECK_NE(string_bytes, nullptr);
+
+  return PyObject_CallFunction(decimal_constructor, const_cast<char*>("s#"), string_bytes,
+                               static_cast<Py_ssize_t>(string_size));
+}
+
+namespace {
+
+template <typename ArrowDecimal>
+Status DecimalFromStdString(const std::string& decimal_string,
+                            const DecimalType& arrow_type, ArrowDecimal* out) {
+  int32_t inferred_precision;
+  int32_t inferred_scale;
+
+  RETURN_NOT_OK(ArrowDecimal::FromString(decimal_string, out, &inferred_precision,
+                                         &inferred_scale));
+
+  const int32_t precision = arrow_type.precision();
+  const int32_t scale = arrow_type.scale();
+
+  if (scale != inferred_scale) {
+    DCHECK_NE(out, NULLPTR);
+    ARROW_ASSIGN_OR_RAISE(*out, out->Rescale(inferred_scale, scale));
+  }
+
+  auto inferred_scale_delta = inferred_scale - scale;
+  if (ARROW_PREDICT_FALSE((inferred_precision - inferred_scale_delta) > precision)) {
+    return Status::Invalid(
+        "Decimal type with precision ", inferred_precision,
+        " does not fit into precision inferred from first array element: ", precision);
+  }
+
+  return Status::OK();
+}
+
+template <typename ArrowDecimal>
+Status InternalDecimalFromPythonDecimal(PyObject* python_decimal,
+                                        const DecimalType& arrow_type,
+                                        ArrowDecimal* out) {
+  DCHECK_NE(python_decimal, NULLPTR);
+  DCHECK_NE(out, NULLPTR);
+
+  std::string string;
+  RETURN_NOT_OK(PythonDecimalToString(python_decimal, &string));
+  return DecimalFromStdString(string, arrow_type, out);
+}
+
+template <typename ArrowDecimal>
+Status InternalDecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type,
+                                   ArrowDecimal* out) {
+  DCHECK_NE(obj, NULLPTR);
+  DCHECK_NE(out, NULLPTR);
+
+  if (IsPyInteger(obj)) {
+    // TODO: add a fast path for small-ish ints
+    std::string string;
+    RETURN_NOT_OK(PyObject_StdStringStr(obj, &string));
+    return DecimalFromStdString(string, arrow_type, out);
+  } else if (PyDecimal_Check(obj)) {
+    return InternalDecimalFromPythonDecimal<ArrowDecimal>(obj, arrow_type, out);
+  } else {
+    return Status::TypeError("int or Decimal object expected, got ",
+                             Py_TYPE(obj)->tp_name);
+  }
+}
+
+}  // namespace
+
+Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
+                                Decimal128* out) {
+  return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out);
+}
+
+Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type,
+                           Decimal128* out) {
+  return InternalDecimalFromPyObject(obj, arrow_type, out);
+}
+
+Status DecimalFromPythonDecimal(PyObject* python_decimal, const DecimalType& arrow_type,
+                                Decimal256* out) {
+  return InternalDecimalFromPythonDecimal(python_decimal, arrow_type, out);
+}
+
+Status DecimalFromPyObject(PyObject* obj, const DecimalType& arrow_type,
+                           Decimal256* out) {
+  return InternalDecimalFromPyObject(obj, arrow_type, out);
+}
+
+bool PyDecimal_Check(PyObject* obj) {
+  static OwnedRef decimal_type;
+  if (!decimal_type.obj()) {
+    ARROW_CHECK_OK(ImportDecimalType(&decimal_type));
+    DCHECK(PyType_Check(decimal_type.obj()));
+  }
+  // PyObject_IsInstance() is slower as it has to check for virtual subclasses
+  const int result =
+      PyType_IsSubtype(Py_TYPE(obj), reinterpret_cast<PyTypeObject*>(decimal_type.obj()));
+  ARROW_CHECK_NE(result, -1) << " error during PyType_IsSubtype check";
+  return result == 1;
+}
+
+bool PyDecimal_ISNAN(PyObject* obj) {
+  DCHECK(PyDecimal_Check(obj)) << "obj is not an instance of decimal.Decimal";
+  OwnedRef is_nan(
+      PyObject_CallMethod(obj, const_cast<char*>("is_nan"), const_cast<char*>("")));
+  return PyObject_IsTrue(is_nan.obj()) == 1;
+}
+
+DecimalMetadata::DecimalMetadata()
+    : DecimalMetadata(std::numeric_limits<int32_t>::min(),
+                      std::numeric_limits<int32_t>::min()) {}
+
+DecimalMetadata::DecimalMetadata(int32_t precision, int32_t scale)
+    : precision_(precision), scale_(scale) {}
+
+Status DecimalMetadata::Update(int32_t suggested_precision, int32_t suggested_scale) {
+  const int32_t current_scale = scale_;
+  scale_ = std::max(current_scale, suggested_scale);
+
+  const int32_t current_precision = precision_;
+
+  if (current_precision == std::numeric_limits<int32_t>::min()) {
+    precision_ = suggested_precision;
+  } else {
+    auto num_digits = std::max(current_precision - current_scale,
+                               suggested_precision - suggested_scale);
+    precision_ = std::max(num_digits + scale_, current_precision);
+  }
+
+  return Status::OK();
+}
+
+Status DecimalMetadata::Update(PyObject* object) {
+  bool is_decimal = PyDecimal_Check(object);
+
+  if (ARROW_PREDICT_FALSE(!is_decimal || PyDecimal_ISNAN(object))) {
+    return Status::OK();
+  }
+
+  int32_t precision = 0;
+  int32_t scale = 0;
+  RETURN_NOT_OK(InferDecimalPrecisionAndScale(object, &precision, &scale));
+  return Update(precision, scale);
+}
+
+}  // namespace internal
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.cc
new file mode 100644
index 0000000..961a168
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.cc
@@ -0,0 +1,495 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/deserialize.h"
+
+#include "arrow/python/numpy_interop.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include "arrow/array.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/options.h"
+#include "arrow/ipc/reader.h"
+#include "arrow/ipc/util.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/table.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/value_parsing.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/datetime.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/pyarrow.h"
+#include "arrow/python/serialize.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::ParseValue;
+
+namespace py {
+
+Status CallDeserializeCallback(PyObject* context, PyObject* value,
+                               PyObject** deserialized_object);
+
+Status DeserializeTuple(PyObject* context, const Array& array, int64_t start_idx,
+                        int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                        PyObject** out);
+
+Status DeserializeList(PyObject* context, const Array& array, int64_t start_idx,
+                       int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                       PyObject** out);
+
+Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
+                      int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                      PyObject** out);
+
+Status DeserializeDict(PyObject* context, const Array& array, int64_t start_idx,
+                       int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                       PyObject** out) {
+  const auto& data = checked_cast<const StructArray&>(array);
+  OwnedRef keys, vals;
+  OwnedRef result(PyDict_New());
+  RETURN_IF_PYERROR();
+
+  DCHECK_EQ(2, data.num_fields());
+
+  RETURN_NOT_OK(DeserializeList(context, *data.field(0), start_idx, stop_idx, base, blobs,
+                                keys.ref()));
+  RETURN_NOT_OK(DeserializeList(context, *data.field(1), start_idx, stop_idx, base, blobs,
+                                vals.ref()));
+  for (int64_t i = start_idx; i < stop_idx; ++i) {
+    // PyDict_SetItem behaves differently from PyList_SetItem and PyTuple_SetItem.
+    // The latter two steal references whereas PyDict_SetItem does not. So we need
+    // to make sure the reference count is decremented by letting the OwnedRef
+    // go out of scope at the end.
+    int ret = PyDict_SetItem(result.obj(), PyList_GET_ITEM(keys.obj(), i - start_idx),
+                             PyList_GET_ITEM(vals.obj(), i - start_idx));
+    if (ret != 0) {
+      return ConvertPyError();
+    }
+  }
+  static PyObject* py_type = PyUnicode_FromString("_pytype_");
+  if (PyDict_Contains(result.obj(), py_type)) {
+    RETURN_NOT_OK(CallDeserializeCallback(context, result.obj(), out));
+  } else {
+    *out = result.detach();
+  }
+  return Status::OK();
+}
+
+Status DeserializeArray(int32_t index, PyObject* base, const SerializedPyObject& blobs,
+                        PyObject** out) {
+  RETURN_NOT_OK(py::TensorToNdarray(blobs.ndarrays[index], base, out));
+  // Mark the array as immutable
+  OwnedRef flags(PyObject_GetAttrString(*out, "flags"));
+  if (flags.obj() == NULL) {
+    return ConvertPyError();
+  }
+  if (PyObject_SetAttrString(flags.obj(), "writeable", Py_False) < 0) {
+    return ConvertPyError();
+  }
+  return Status::OK();
+}
+
+Status GetValue(PyObject* context, const Array& arr, int64_t index, int8_t type,
+                PyObject* base, const SerializedPyObject& blobs, PyObject** result) {
+  switch (type) {
+    case PythonType::NONE:
+      Py_INCREF(Py_None);
+      *result = Py_None;
+      return Status::OK();
+    case PythonType::BOOL:
+      *result = PyBool_FromLong(checked_cast<const BooleanArray&>(arr).Value(index));
+      return Status::OK();
+    case PythonType::PY2INT:
+    case PythonType::INT: {
+      *result = PyLong_FromSsize_t(checked_cast<const Int64Array&>(arr).Value(index));
+      return Status::OK();
+    }
+    case PythonType::BYTES: {
+      auto view = checked_cast<const BinaryArray&>(arr).GetView(index);
+      *result = PyBytes_FromStringAndSize(view.data(), view.length());
+      return CheckPyError();
+    }
+    case PythonType::STRING: {
+      auto view = checked_cast<const StringArray&>(arr).GetView(index);
+      *result = PyUnicode_FromStringAndSize(view.data(), view.length());
+      return CheckPyError();
+    }
+    case PythonType::HALF_FLOAT: {
+      *result = PyHalf_FromHalf(checked_cast<const HalfFloatArray&>(arr).Value(index));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    }
+    case PythonType::FLOAT:
+      *result = PyFloat_FromDouble(checked_cast<const FloatArray&>(arr).Value(index));
+      return Status::OK();
+    case PythonType::DOUBLE:
+      *result = PyFloat_FromDouble(checked_cast<const DoubleArray&>(arr).Value(index));
+      return Status::OK();
+    case PythonType::DATE64: {
+      RETURN_NOT_OK(internal::PyDateTime_from_int(
+          checked_cast<const Date64Array&>(arr).Value(index), TimeUnit::MICRO, result));
+      RETURN_IF_PYERROR();
+      return Status::OK();
+    }
+    case PythonType::LIST: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeList(context, *l.values(), l.value_offset(index),
+                             l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::DICT: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeDict(context, *l.values(), l.value_offset(index),
+                             l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::TUPLE: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeTuple(context, *l.values(), l.value_offset(index),
+                              l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::SET: {
+      const auto& l = checked_cast<const ListArray&>(arr);
+      return DeserializeSet(context, *l.values(), l.value_offset(index),
+                            l.value_offset(index + 1), base, blobs, result);
+    }
+    case PythonType::TENSOR: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      *result = wrap_tensor(blobs.tensors[ref]);
+      return Status::OK();
+    }
+    case PythonType::SPARSECOOTENSOR: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      const std::shared_ptr<SparseCOOTensor>& sparse_coo_tensor =
+          arrow::internal::checked_pointer_cast<SparseCOOTensor>(
+              blobs.sparse_tensors[ref]);
+      *result = wrap_sparse_coo_tensor(sparse_coo_tensor);
+      return Status::OK();
+    }
+    case PythonType::SPARSECSRMATRIX: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      const std::shared_ptr<SparseCSRMatrix>& sparse_csr_matrix =
+          arrow::internal::checked_pointer_cast<SparseCSRMatrix>(
+              blobs.sparse_tensors[ref]);
+      *result = wrap_sparse_csr_matrix(sparse_csr_matrix);
+      return Status::OK();
+    }
+    case PythonType::SPARSECSCMATRIX: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      const std::shared_ptr<SparseCSCMatrix>& sparse_csc_matrix =
+          arrow::internal::checked_pointer_cast<SparseCSCMatrix>(
+              blobs.sparse_tensors[ref]);
+      *result = wrap_sparse_csc_matrix(sparse_csc_matrix);
+      return Status::OK();
+    }
+    case PythonType::SPARSECSFTENSOR: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      const std::shared_ptr<SparseCSFTensor>& sparse_csf_tensor =
+          arrow::internal::checked_pointer_cast<SparseCSFTensor>(
+              blobs.sparse_tensors[ref]);
+      *result = wrap_sparse_csf_tensor(sparse_csf_tensor);
+      return Status::OK();
+    }
+    case PythonType::NDARRAY: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      return DeserializeArray(ref, base, blobs, result);
+    }
+    case PythonType::BUFFER: {
+      int32_t ref = checked_cast<const Int32Array&>(arr).Value(index);
+      *result = wrap_buffer(blobs.buffers[ref]);
+      return Status::OK();
+    }
+    default: {
+      ARROW_CHECK(false) << "union tag " << type << "' not recognized";
+    }
+  }
+  return Status::OK();
+}
+
+Status GetPythonTypes(const UnionArray& data, std::vector<int8_t>* result) {
+  ARROW_CHECK(result != nullptr);
+  auto type = data.type();
+  for (int i = 0; i < type->num_fields(); ++i) {
+    int8_t tag = 0;
+    const std::string& data = type->field(i)->name();
+    if (!ParseValue<Int8Type>(data.c_str(), data.size(), &tag)) {
+      return Status::SerializationError("Cannot convert string: \"",
+                                        type->field(i)->name(), "\" to int8_t");
+    }
+    result->push_back(tag);
+  }
+  return Status::OK();
+}
+
+template <typename CreateSequenceFn, typename SetItemFn>
+Status DeserializeSequence(PyObject* context, const Array& array, int64_t start_idx,
+                           int64_t stop_idx, PyObject* base,
+                           const SerializedPyObject& blobs,
+                           CreateSequenceFn&& create_sequence, SetItemFn&& set_item,
+                           PyObject** out) {
+  const auto& data = checked_cast<const DenseUnionArray&>(array);
+  OwnedRef result(create_sequence(stop_idx - start_idx));
+  RETURN_IF_PYERROR();
+  const int8_t* type_codes = data.raw_type_codes();
+  const int32_t* value_offsets = data.raw_value_offsets();
+  std::vector<int8_t> python_types;
+  RETURN_NOT_OK(GetPythonTypes(data, &python_types));
+  for (int64_t i = start_idx; i < stop_idx; ++i) {
+    const int64_t offset = value_offsets[i];
+    const uint8_t type = type_codes[i];
+    PyObject* value;
+    RETURN_NOT_OK(GetValue(context, *data.field(type), offset, python_types[type], base,
+                           blobs, &value));
+    RETURN_NOT_OK(set_item(result.obj(), i - start_idx, value));
+  }
+  *out = result.detach();
+  return Status::OK();
+}
+
+Status DeserializeList(PyObject* context, const Array& array, int64_t start_idx,
+                       int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                       PyObject** out) {
+  return DeserializeSequence(
+      context, array, start_idx, stop_idx, base, blobs,
+      [](int64_t size) { return PyList_New(size); },
+      [](PyObject* seq, int64_t index, PyObject* item) {
+        PyList_SET_ITEM(seq, index, item);
+        return Status::OK();
+      },
+      out);
+}
+
+Status DeserializeTuple(PyObject* context, const Array& array, int64_t start_idx,
+                        int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                        PyObject** out) {
+  return DeserializeSequence(
+      context, array, start_idx, stop_idx, base, blobs,
+      [](int64_t size) { return PyTuple_New(size); },
+      [](PyObject* seq, int64_t index, PyObject* item) {
+        PyTuple_SET_ITEM(seq, index, item);
+        return Status::OK();
+      },
+      out);
+}
+
+Status DeserializeSet(PyObject* context, const Array& array, int64_t start_idx,
+                      int64_t stop_idx, PyObject* base, const SerializedPyObject& blobs,
+                      PyObject** out) {
+  return DeserializeSequence(
+      context, array, start_idx, stop_idx, base, blobs,
+      [](int64_t size) { return PySet_New(nullptr); },
+      [](PyObject* seq, int64_t index, PyObject* item) {
+        int err = PySet_Add(seq, item);
+        Py_DECREF(item);
+        if (err < 0) {
+          RETURN_IF_PYERROR();
+        }
+        return Status::OK();
+      },
+      out);
+}
+
+Status ReadSerializedObject(io::RandomAccessFile* src, SerializedPyObject* out) {
+  int32_t num_tensors;
+  int32_t num_sparse_tensors;
+  int32_t num_ndarrays;
+  int32_t num_buffers;
+
+  // Read number of tensors
+  RETURN_NOT_OK(src->Read(sizeof(int32_t), reinterpret_cast<uint8_t*>(&num_tensors)));
+  RETURN_NOT_OK(
+      src->Read(sizeof(int32_t), reinterpret_cast<uint8_t*>(&num_sparse_tensors)));
+  RETURN_NOT_OK(src->Read(sizeof(int32_t), reinterpret_cast<uint8_t*>(&num_ndarrays)));
+  RETURN_NOT_OK(src->Read(sizeof(int32_t), reinterpret_cast<uint8_t*>(&num_buffers)));
+
+  // Align stream to 8-byte offset
+  RETURN_NOT_OK(ipc::AlignStream(src, ipc::kArrowIpcAlignment));
+  std::shared_ptr<RecordBatchReader> reader;
+  ARROW_ASSIGN_OR_RAISE(reader, ipc::RecordBatchStreamReader::Open(src));
+  RETURN_NOT_OK(reader->ReadNext(&out->batch));
+
+  /// Skip EOS marker
+  RETURN_NOT_OK(src->Advance(4));
+
+  /// Align stream so tensor bodies are 64-byte aligned
+  RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
+
+  for (int i = 0; i < num_tensors; ++i) {
+    std::shared_ptr<Tensor> tensor;
+    ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(src));
+    RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
+    out->tensors.push_back(tensor);
+  }
+
+  for (int i = 0; i < num_sparse_tensors; ++i) {
+    std::shared_ptr<SparseTensor> sparse_tensor;
+    ARROW_ASSIGN_OR_RAISE(sparse_tensor, ipc::ReadSparseTensor(src));
+    RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
+    out->sparse_tensors.push_back(sparse_tensor);
+  }
+
+  for (int i = 0; i < num_ndarrays; ++i) {
+    std::shared_ptr<Tensor> ndarray;
+    ARROW_ASSIGN_OR_RAISE(ndarray, ipc::ReadTensor(src));
+    RETURN_NOT_OK(ipc::AlignStream(src, ipc::kTensorAlignment));
+    out->ndarrays.push_back(ndarray);
+  }
+
+  ARROW_ASSIGN_OR_RAISE(int64_t offset, src->Tell());
+  for (int i = 0; i < num_buffers; ++i) {
+    int64_t size;
+    RETURN_NOT_OK(src->ReadAt(offset, sizeof(int64_t), &size));
+    offset += sizeof(int64_t);
+    ARROW_ASSIGN_OR_RAISE(auto buffer, src->ReadAt(offset, size));
+    out->buffers.push_back(buffer);
+    offset += size;
+  }
+
+  return Status::OK();
+}
+
+Status DeserializeObject(PyObject* context, const SerializedPyObject& obj, PyObject* base,
+                         PyObject** out) {
+  PyAcquireGIL lock;
+  return DeserializeList(context, *obj.batch->column(0), 0, obj.batch->num_rows(), base,
+                         obj, out);
+}
+
+Status GetSerializedFromComponents(int num_tensors,
+                                   const SparseTensorCounts& num_sparse_tensors,
+                                   int num_ndarrays, int num_buffers, PyObject* data,
+                                   SerializedPyObject* out) {
+  PyAcquireGIL gil;
+  const Py_ssize_t data_length = PyList_Size(data);
+  RETURN_IF_PYERROR();
+
+  const Py_ssize_t expected_data_length = 1 + num_tensors * 2 +
+                                          num_sparse_tensors.num_total_buffers() +
+                                          num_ndarrays * 2 + num_buffers;
+  if (data_length != expected_data_length) {
+    return Status::Invalid("Invalid number of buffers in data");
+  }
+
+  auto GetBuffer = [&data](Py_ssize_t index, std::shared_ptr<Buffer>* out) {
+    ARROW_CHECK_LE(index, PyList_Size(data));
+    PyObject* py_buf = PyList_GET_ITEM(data, index);
+    return unwrap_buffer(py_buf).Value(out);
+  };
+
+  Py_ssize_t buffer_index = 0;
+
+  // Read the union batch describing object structure
+  {
+    std::shared_ptr<Buffer> data_buffer;
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &data_buffer));
+    gil.release();
+    io::BufferReader buf_reader(data_buffer);
+    std::shared_ptr<RecordBatchReader> reader;
+    ARROW_ASSIGN_OR_RAISE(reader, ipc::RecordBatchStreamReader::Open(&buf_reader));
+    RETURN_NOT_OK(reader->ReadNext(&out->batch));
+    gil.acquire();
+  }
+
+  // Zero-copy reconstruct tensors
+  for (int i = 0; i < num_tensors; ++i) {
+    std::shared_ptr<Buffer> metadata;
+    std::shared_ptr<Buffer> body;
+    std::shared_ptr<Tensor> tensor;
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &metadata));
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
+
+    ipc::Message message(metadata, body);
+
+    ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(message));
+    out->tensors.emplace_back(std::move(tensor));
+  }
+
+  // Zero-copy reconstruct sparse tensors
+  for (int i = 0, n = num_sparse_tensors.num_total_tensors(); i < n; ++i) {
+    ipc::IpcPayload payload;
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &payload.metadata));
+
+    ARROW_ASSIGN_OR_RAISE(
+        size_t num_bodies,
+        ipc::internal::ReadSparseTensorBodyBufferCount(*payload.metadata));
+
+    payload.body_buffers.reserve(num_bodies);
+    for (size_t i = 0; i < num_bodies; ++i) {
+      std::shared_ptr<Buffer> body;
+      RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
+      payload.body_buffers.emplace_back(body);
+    }
+
+    std::shared_ptr<SparseTensor> sparse_tensor;
+    ARROW_ASSIGN_OR_RAISE(sparse_tensor, ipc::internal::ReadSparseTensorPayload(payload));
+    out->sparse_tensors.emplace_back(std::move(sparse_tensor));
+  }
+
+  // Zero-copy reconstruct tensors for numpy ndarrays
+  for (int i = 0; i < num_ndarrays; ++i) {
+    std::shared_ptr<Buffer> metadata;
+    std::shared_ptr<Buffer> body;
+    std::shared_ptr<Tensor> tensor;
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &metadata));
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &body));
+
+    ipc::Message message(metadata, body);
+
+    ARROW_ASSIGN_OR_RAISE(tensor, ipc::ReadTensor(message));
+    out->ndarrays.emplace_back(std::move(tensor));
+  }
+
+  // Unwrap and append buffers
+  for (int i = 0; i < num_buffers; ++i) {
+    std::shared_ptr<Buffer> buffer;
+    RETURN_NOT_OK(GetBuffer(buffer_index++, &buffer));
+    out->buffers.emplace_back(std::move(buffer));
+  }
+
+  return Status::OK();
+}
+
+Status DeserializeNdarray(const SerializedPyObject& object,
+                          std::shared_ptr<Tensor>* out) {
+  if (object.ndarrays.size() != 1) {
+    return Status::Invalid("Object is not an Ndarray");
+  }
+  *out = object.ndarrays[0];
+  return Status::OK();
+}
+
+Status NdarrayFromBuffer(std::shared_ptr<Buffer> src, std::shared_ptr<Tensor>* out) {
+  io::BufferReader reader(src);
+  SerializedPyObject object;
+  RETURN_NOT_OK(ReadSerializedObject(&reader, &object));
+  return DeserializeNdarray(object, out);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.h b/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.h
index ed82942..41b6a13 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/deserialize.h
@@ -21,9 +21,9 @@
 #include <memory>
 #include <vector>
 
-#include "arrow/status.h"
 #include "arrow/python/serialize.h"
 #include "arrow/python/visibility.h"
+#include "arrow/status.h"
 
 namespace arrow {
 
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.cc
new file mode 100644
index 0000000..3ccc171
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.cc
@@ -0,0 +1,217 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <sstream>
+#include <utility>
+
+#include "arrow/python/extension_type.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/pyarrow.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace py {
+
+namespace {
+
+// Serialize a Python ExtensionType instance
+Status SerializeExtInstance(PyObject* type_instance, std::string* out) {
+  OwnedRef res(
+      cpp_PyObject_CallMethod(type_instance, "__arrow_ext_serialize__", nullptr));
+  if (!res) {
+    return ConvertPyError();
+  }
+  if (!PyBytes_Check(res.obj())) {
+    return Status::TypeError(
+        "__arrow_ext_serialize__ should return bytes object, "
+        "got ",
+        internal::PyObject_StdStringRepr(res.obj()));
+  }
+  *out = internal::PyBytes_AsStdString(res.obj());
+  return Status::OK();
+}
+
+// Deserialize a Python ExtensionType instance
+PyObject* DeserializeExtInstance(PyObject* type_class,
+                                 std::shared_ptr<DataType> storage_type,
+                                 const std::string& serialized_data) {
+  OwnedRef storage_ref(wrap_data_type(storage_type));
+  if (!storage_ref) {
+    return nullptr;
+  }
+  OwnedRef data_ref(PyBytes_FromStringAndSize(
+      serialized_data.data(), static_cast<Py_ssize_t>(serialized_data.size())));
+  if (!data_ref) {
+    return nullptr;
+  }
+
+  return cpp_PyObject_CallMethod(type_class, "__arrow_ext_deserialize__", "OO",
+                                 storage_ref.obj(), data_ref.obj());
+}
+
+}  // namespace
+
+static const char* kExtensionName = "arrow.py_extension_type";
+
+std::string PyExtensionType::ToString() const {
+  PyAcquireGIL lock;
+
+  std::stringstream ss;
+  OwnedRef instance(GetInstance());
+  ss << "extension<" << this->extension_name() << "<" << Py_TYPE(instance.obj())->tp_name
+     << ">>";
+  return ss.str();
+}
+
+PyExtensionType::PyExtensionType(std::shared_ptr<DataType> storage_type, PyObject* typ,
+                                 PyObject* inst)
+    : ExtensionType(storage_type),
+      extension_name_(kExtensionName),
+      type_class_(typ),
+      type_instance_(inst) {}
+
+PyExtensionType::PyExtensionType(std::shared_ptr<DataType> storage_type,
+                                 std::string extension_name, PyObject* typ,
+                                 PyObject* inst)
+    : ExtensionType(storage_type),
+      extension_name_(std::move(extension_name)),
+      type_class_(typ),
+      type_instance_(inst) {}
+
+bool PyExtensionType::ExtensionEquals(const ExtensionType& other) const {
+  PyAcquireGIL lock;
+
+  if (other.extension_name() != extension_name()) {
+    return false;
+  }
+  const auto& other_ext = checked_cast<const PyExtensionType&>(other);
+  int res = -1;
+  if (!type_instance_) {
+    if (other_ext.type_instance_) {
+      return false;
+    }
+    // Compare Python types
+    res = PyObject_RichCompareBool(type_class_.obj(), other_ext.type_class_.obj(), Py_EQ);
+  } else {
+    if (!other_ext.type_instance_) {
+      return false;
+    }
+    // Compare Python instances
+    OwnedRef left(GetInstance());
+    OwnedRef right(other_ext.GetInstance());
+    if (!left || !right) {
+      goto error;
+    }
+    res = PyObject_RichCompareBool(left.obj(), right.obj(), Py_EQ);
+  }
+  if (res == -1) {
+    goto error;
+  }
+  return res == 1;
+
+error:
+  // Cannot propagate error
+  PyErr_WriteUnraisable(nullptr);
+  return false;
+}
+
+std::shared_ptr<Array> PyExtensionType::MakeArray(std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  return std::make_shared<ExtensionArray>(data);
+}
+
+std::string PyExtensionType::Serialize() const {
+  DCHECK(type_instance_);
+  return serialized_;
+}
+
+Result<std::shared_ptr<DataType>> PyExtensionType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized_data) const {
+  PyAcquireGIL lock;
+
+  if (import_pyarrow()) {
+    return ConvertPyError();
+  }
+  OwnedRef res(DeserializeExtInstance(type_class_.obj(), storage_type, serialized_data));
+  if (!res) {
+    return ConvertPyError();
+  }
+  return unwrap_data_type(res.obj());
+}
+
+PyObject* PyExtensionType::GetInstance() const {
+  if (!type_instance_) {
+    PyErr_SetString(PyExc_TypeError, "Not an instance");
+    return nullptr;
+  }
+  DCHECK(PyWeakref_CheckRef(type_instance_.obj()));
+  PyObject* inst = PyWeakref_GET_OBJECT(type_instance_.obj());
+  if (inst != Py_None) {
+    // Cached instance still alive
+    Py_INCREF(inst);
+    return inst;
+  } else {
+    // Must reconstruct from serialized form
+    // XXX cache again?
+    return DeserializeExtInstance(type_class_.obj(), storage_type_, serialized_);
+  }
+}
+
+Status PyExtensionType::SetInstance(PyObject* inst) const {
+  // Check we have the right type
+  PyObject* typ = reinterpret_cast<PyObject*>(Py_TYPE(inst));
+  if (typ != type_class_.obj()) {
+    return Status::TypeError("Unexpected Python ExtensionType class ",
+                             internal::PyObject_StdStringRepr(typ), " expected ",
+                             internal::PyObject_StdStringRepr(type_class_.obj()));
+  }
+
+  PyObject* wr = PyWeakref_NewRef(inst, nullptr);
+  if (wr == NULL) {
+    return ConvertPyError();
+  }
+  type_instance_.reset(wr);
+  return SerializeExtInstance(inst, &serialized_);
+}
+
+Status PyExtensionType::FromClass(const std::shared_ptr<DataType> storage_type,
+                                  const std::string extension_name, PyObject* typ,
+                                  std::shared_ptr<ExtensionType>* out) {
+  Py_INCREF(typ);
+  out->reset(new PyExtensionType(storage_type, std::move(extension_name), typ));
+  return Status::OK();
+}
+
+Status RegisterPyExtensionType(const std::shared_ptr<DataType>& type) {
+  DCHECK_EQ(type->id(), Type::EXTENSION);
+  auto ext_type = std::dynamic_pointer_cast<ExtensionType>(type);
+  return RegisterExtensionType(ext_type);
+}
+
+Status UnregisterPyExtensionType(const std::string& type_name) {
+  return UnregisterExtensionType(type_name);
+}
+
+std::string PyExtensionName() { return kExtensionName; }
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.h b/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.h
index 7fc86b9..e433d9a 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/extension_type.h
@@ -21,9 +21,9 @@
 #include <string>
 
 #include "arrow/extension_type.h"
-#include "arrow/util/macros.h"
 #include "arrow/python/common.h"
 #include "arrow/python/visibility.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 namespace py {
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.cc
new file mode 100644
index 0000000..5e9b500
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.cc
@@ -0,0 +1,206 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/filesystem.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using fs::FileInfo;
+using fs::FileSelector;
+
+namespace py {
+namespace fs {
+
+PyFileSystem::PyFileSystem(PyObject* handler, PyFileSystemVtable vtable)
+    : handler_(handler), vtable_(std::move(vtable)) {
+  Py_INCREF(handler);
+}
+
+PyFileSystem::~PyFileSystem() {}
+
+std::shared_ptr<PyFileSystem> PyFileSystem::Make(PyObject* handler,
+                                                 PyFileSystemVtable vtable) {
+  return std::make_shared<PyFileSystem>(handler, std::move(vtable));
+}
+
+std::string PyFileSystem::type_name() const {
+  std::string result;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.get_type_name(handler_.obj(), &result);
+    if (PyErr_Occurred()) {
+      PyErr_WriteUnraisable(handler_.obj());
+    }
+    return Status::OK();
+  });
+  ARROW_UNUSED(st);
+  return result;
+}
+
+bool PyFileSystem::Equals(const FileSystem& other) const {
+  bool result;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    result = vtable_.equals(handler_.obj(), other);
+    if (PyErr_Occurred()) {
+      PyErr_WriteUnraisable(handler_.obj());
+    }
+    return Status::OK();
+  });
+  ARROW_UNUSED(st);
+  return result;
+}
+
+Result<FileInfo> PyFileSystem::GetFileInfo(const std::string& path) {
+  FileInfo info;
+
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.get_file_info(handler_.obj(), path, &info);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return info;
+}
+
+Result<std::vector<FileInfo>> PyFileSystem::GetFileInfo(
+    const std::vector<std::string>& paths) {
+  std::vector<FileInfo> infos;
+
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.get_file_info_vector(handler_.obj(), paths, &infos);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return infos;
+}
+
+Result<std::vector<FileInfo>> PyFileSystem::GetFileInfo(const FileSelector& select) {
+  std::vector<FileInfo> infos;
+
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.get_file_info_selector(handler_.obj(), select, &infos);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return infos;
+}
+
+Status PyFileSystem::CreateDir(const std::string& path, bool recursive) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.create_dir(handler_.obj(), path, recursive);
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::DeleteDir(const std::string& path) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.delete_dir(handler_.obj(), path);
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::DeleteDirContents(const std::string& path, bool missing_dir_ok) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.delete_dir_contents(handler_.obj(), path, missing_dir_ok);
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::DeleteRootDirContents() {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.delete_root_dir_contents(handler_.obj());
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::DeleteFile(const std::string& path) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.delete_file(handler_.obj(), path);
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::Move(const std::string& src, const std::string& dest) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.move(handler_.obj(), src, dest);
+    return CheckPyError();
+  });
+}
+
+Status PyFileSystem::CopyFile(const std::string& src, const std::string& dest) {
+  return SafeCallIntoPython([&]() -> Status {
+    vtable_.copy_file(handler_.obj(), src, dest);
+    return CheckPyError();
+  });
+}
+
+Result<std::shared_ptr<io::InputStream>> PyFileSystem::OpenInputStream(
+    const std::string& path) {
+  std::shared_ptr<io::InputStream> stream;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.open_input_stream(handler_.obj(), path, &stream);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return stream;
+}
+
+Result<std::shared_ptr<io::RandomAccessFile>> PyFileSystem::OpenInputFile(
+    const std::string& path) {
+  std::shared_ptr<io::RandomAccessFile> stream;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.open_input_file(handler_.obj(), path, &stream);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return stream;
+}
+
+Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenOutputStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  std::shared_ptr<io::OutputStream> stream;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.open_output_stream(handler_.obj(), path, metadata, &stream);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return stream;
+}
+
+Result<std::shared_ptr<io::OutputStream>> PyFileSystem::OpenAppendStream(
+    const std::string& path, const std::shared_ptr<const KeyValueMetadata>& metadata) {
+  std::shared_ptr<io::OutputStream> stream;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.open_append_stream(handler_.obj(), path, metadata, &stream);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return stream;
+}
+
+Result<std::string> PyFileSystem::NormalizePath(std::string path) {
+  std::string normalized;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.normalize_path(handler_.obj(), path, &normalized);
+    return CheckPyError();
+  });
+  RETURN_NOT_OK(st);
+  return normalized;
+}
+
+}  // namespace fs
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.h b/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.h
index 2e5b223..003fd5c 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/filesystem.h
@@ -22,9 +22,9 @@
 #include <vector>
 
 #include "arrow/filesystem/filesystem.h"
-#include "arrow/util/macros.h"
 #include "arrow/python/common.h"
 #include "arrow/python/visibility.h"
+#include "arrow/util/macros.h"
 
 namespace arrow {
 namespace py {
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/flight.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/flight.cc
new file mode 100644
index 0000000..bf7af27
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/flight.cc
@@ -0,0 +1,388 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <signal.h>
+#include <utility>
+
+#include "arrow/python/flight.h"
+#include "arrow/util/io_util.h"
+#include "arrow/util/logging.h"
+
+using arrow::flight::FlightPayload;
+
+namespace arrow {
+namespace py {
+namespace flight {
+
+const char* kPyServerMiddlewareName = "arrow.py_server_middleware";
+
+PyServerAuthHandler::PyServerAuthHandler(PyObject* handler,
+                                         const PyServerAuthHandlerVtable& vtable)
+    : vtable_(vtable) {
+  Py_INCREF(handler);
+  handler_.reset(handler);
+}
+
+Status PyServerAuthHandler::Authenticate(arrow::flight::ServerAuthSender* outgoing,
+                                         arrow::flight::ServerAuthReader* incoming) {
+  return SafeCallIntoPython([=] {
+    const Status status = vtable_.authenticate(handler_.obj(), outgoing, incoming);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyServerAuthHandler::IsValid(const std::string& token,
+                                    std::string* peer_identity) {
+  return SafeCallIntoPython([=] {
+    const Status status = vtable_.is_valid(handler_.obj(), token, peer_identity);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+PyClientAuthHandler::PyClientAuthHandler(PyObject* handler,
+                                         const PyClientAuthHandlerVtable& vtable)
+    : vtable_(vtable) {
+  Py_INCREF(handler);
+  handler_.reset(handler);
+}
+
+Status PyClientAuthHandler::Authenticate(arrow::flight::ClientAuthSender* outgoing,
+                                         arrow::flight::ClientAuthReader* incoming) {
+  return SafeCallIntoPython([=] {
+    const Status status = vtable_.authenticate(handler_.obj(), outgoing, incoming);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyClientAuthHandler::GetToken(std::string* token) {
+  return SafeCallIntoPython([=] {
+    const Status status = vtable_.get_token(handler_.obj(), token);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+PyFlightServer::PyFlightServer(PyObject* server, const PyFlightServerVtable& vtable)
+    : vtable_(vtable) {
+  Py_INCREF(server);
+  server_.reset(server);
+}
+
+Status PyFlightServer::ListFlights(
+    const arrow::flight::ServerCallContext& context,
+    const arrow::flight::Criteria* criteria,
+    std::unique_ptr<arrow::flight::FlightListing>* listings) {
+  return SafeCallIntoPython([&] {
+    const Status status =
+        vtable_.list_flights(server_.obj(), context, criteria, listings);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::GetFlightInfo(const arrow::flight::ServerCallContext& context,
+                                     const arrow::flight::FlightDescriptor& request,
+                                     std::unique_ptr<arrow::flight::FlightInfo>* info) {
+  return SafeCallIntoPython([&] {
+    const Status status = vtable_.get_flight_info(server_.obj(), context, request, info);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::GetSchema(const arrow::flight::ServerCallContext& context,
+                                 const arrow::flight::FlightDescriptor& request,
+                                 std::unique_ptr<arrow::flight::SchemaResult>* result) {
+  return SafeCallIntoPython([&] {
+    const Status status = vtable_.get_schema(server_.obj(), context, request, result);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::DoGet(const arrow::flight::ServerCallContext& context,
+                             const arrow::flight::Ticket& request,
+                             std::unique_ptr<arrow::flight::FlightDataStream>* stream) {
+  return SafeCallIntoPython([&] {
+    const Status status = vtable_.do_get(server_.obj(), context, request, stream);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::DoPut(
+    const arrow::flight::ServerCallContext& context,
+    std::unique_ptr<arrow::flight::FlightMessageReader> reader,
+    std::unique_ptr<arrow::flight::FlightMetadataWriter> writer) {
+  return SafeCallIntoPython([&] {
+    const Status status =
+        vtable_.do_put(server_.obj(), context, std::move(reader), std::move(writer));
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::DoExchange(
+    const arrow::flight::ServerCallContext& context,
+    std::unique_ptr<arrow::flight::FlightMessageReader> reader,
+    std::unique_ptr<arrow::flight::FlightMessageWriter> writer) {
+  return SafeCallIntoPython([&] {
+    const Status status =
+        vtable_.do_exchange(server_.obj(), context, std::move(reader), std::move(writer));
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::DoAction(const arrow::flight::ServerCallContext& context,
+                                const arrow::flight::Action& action,
+                                std::unique_ptr<arrow::flight::ResultStream>* result) {
+  return SafeCallIntoPython([&] {
+    const Status status = vtable_.do_action(server_.obj(), context, action, result);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::ListActions(const arrow::flight::ServerCallContext& context,
+                                   std::vector<arrow::flight::ActionType>* actions) {
+  return SafeCallIntoPython([&] {
+    const Status status = vtable_.list_actions(server_.obj(), context, actions);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+Status PyFlightServer::ServeWithSignals() {
+  // Respect the current Python settings, i.e. only interrupt the server if there is
+  // an active signal handler for SIGINT and SIGTERM.
+  std::vector<int> signals;
+  for (const int signum : {SIGINT, SIGTERM}) {
+    ARROW_ASSIGN_OR_RAISE(auto handler, ::arrow::internal::GetSignalHandler(signum));
+    auto cb = handler.callback();
+    if (cb != SIG_DFL && cb != SIG_IGN) {
+      signals.push_back(signum);
+    }
+  }
+  RETURN_NOT_OK(SetShutdownOnSignals(signals));
+
+  // Serve until we got told to shutdown or a signal interrupted us
+  RETURN_NOT_OK(Serve());
+  int signum = GotSignal();
+  if (signum != 0) {
+    // Issue the signal again with Python's signal handlers restored
+    PyAcquireGIL lock;
+    raise(signum);
+    // XXX Ideally we would loop and serve again if no exception was raised.
+    // Unfortunately, gRPC will return immediately if Serve() is called again.
+    ARROW_UNUSED(PyErr_CheckSignals());
+  }
+
+  return Status::OK();
+}
+
+PyFlightResultStream::PyFlightResultStream(PyObject* generator,
+                                           PyFlightResultStreamCallback callback)
+    : callback_(callback) {
+  Py_INCREF(generator);
+  generator_.reset(generator);
+}
+
+arrow::Result<std::unique_ptr<arrow::flight::Result>> PyFlightResultStream::Next() {
+  return SafeCallIntoPython(
+      [=]() -> arrow::Result<std::unique_ptr<arrow::flight::Result>> {
+        std::unique_ptr<arrow::flight::Result> result;
+        const Status status = callback_(generator_.obj(), &result);
+        RETURN_NOT_OK(CheckPyError());
+        RETURN_NOT_OK(status);
+        return result;
+      });
+}
+
+PyFlightDataStream::PyFlightDataStream(
+    PyObject* data_source, std::unique_ptr<arrow::flight::FlightDataStream> stream)
+    : stream_(std::move(stream)) {
+  Py_INCREF(data_source);
+  data_source_.reset(data_source);
+}
+
+std::shared_ptr<Schema> PyFlightDataStream::schema() { return stream_->schema(); }
+
+arrow::Result<FlightPayload> PyFlightDataStream::GetSchemaPayload() {
+  return stream_->GetSchemaPayload();
+}
+
+arrow::Result<FlightPayload> PyFlightDataStream::Next() { return stream_->Next(); }
+
+PyGeneratorFlightDataStream::PyGeneratorFlightDataStream(
+    PyObject* generator, std::shared_ptr<arrow::Schema> schema,
+    PyGeneratorFlightDataStreamCallback callback, const ipc::IpcWriteOptions& options)
+    : schema_(schema), mapper_(*schema_), options_(options), callback_(callback) {
+  Py_INCREF(generator);
+  generator_.reset(generator);
+}
+
+std::shared_ptr<Schema> PyGeneratorFlightDataStream::schema() { return schema_; }
+
+arrow::Result<FlightPayload> PyGeneratorFlightDataStream::GetSchemaPayload() {
+  FlightPayload payload;
+  RETURN_NOT_OK(ipc::GetSchemaPayload(*schema_, options_, mapper_, &payload.ipc_message));
+  return payload;
+}
+
+arrow::Result<FlightPayload> PyGeneratorFlightDataStream::Next() {
+  return SafeCallIntoPython([=]() -> arrow::Result<FlightPayload> {
+    FlightPayload payload;
+    const Status status = callback_(generator_.obj(), &payload);
+    RETURN_NOT_OK(CheckPyError());
+    RETURN_NOT_OK(status);
+    return payload;
+  });
+}
+
+// Flight Server Middleware
+
+PyServerMiddlewareFactory::PyServerMiddlewareFactory(PyObject* factory,
+                                                     StartCallCallback start_call)
+    : start_call_(start_call) {
+  Py_INCREF(factory);
+  factory_.reset(factory);
+}
+
+Status PyServerMiddlewareFactory::StartCall(
+    const arrow::flight::CallInfo& info,
+    const arrow::flight::CallHeaders& incoming_headers,
+    std::shared_ptr<arrow::flight::ServerMiddleware>* middleware) {
+  return SafeCallIntoPython([&] {
+    const Status status = start_call_(factory_.obj(), info, incoming_headers, middleware);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+}
+
+PyServerMiddleware::PyServerMiddleware(PyObject* middleware, Vtable vtable)
+    : vtable_(vtable) {
+  Py_INCREF(middleware);
+  middleware_.reset(middleware);
+}
+
+void PyServerMiddleware::SendingHeaders(arrow::flight::AddCallHeaders* outgoing_headers) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = vtable_.sending_headers(middleware_.obj(), outgoing_headers);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python server middleware failed in SendingHeaders");
+}
+
+void PyServerMiddleware::CallCompleted(const Status& call_status) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = vtable_.call_completed(middleware_.obj(), call_status);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python server middleware failed in CallCompleted");
+}
+
+std::string PyServerMiddleware::name() const { return kPyServerMiddlewareName; }
+
+PyObject* PyServerMiddleware::py_object() const { return middleware_.obj(); }
+
+// Flight Client Middleware
+
+PyClientMiddlewareFactory::PyClientMiddlewareFactory(PyObject* factory,
+                                                     StartCallCallback start_call)
+    : start_call_(start_call) {
+  Py_INCREF(factory);
+  factory_.reset(factory);
+}
+
+void PyClientMiddlewareFactory::StartCall(
+    const arrow::flight::CallInfo& info,
+    std::unique_ptr<arrow::flight::ClientMiddleware>* middleware) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = start_call_(factory_.obj(), info, middleware);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python client middleware failed in StartCall");
+}
+
+PyClientMiddleware::PyClientMiddleware(PyObject* middleware, Vtable vtable)
+    : vtable_(vtable) {
+  Py_INCREF(middleware);
+  middleware_.reset(middleware);
+}
+
+void PyClientMiddleware::SendingHeaders(arrow::flight::AddCallHeaders* outgoing_headers) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = vtable_.sending_headers(middleware_.obj(), outgoing_headers);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python client middleware failed in StartCall");
+}
+
+void PyClientMiddleware::ReceivedHeaders(
+    const arrow::flight::CallHeaders& incoming_headers) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = vtable_.received_headers(middleware_.obj(), incoming_headers);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python client middleware failed in StartCall");
+}
+
+void PyClientMiddleware::CallCompleted(const Status& call_status) {
+  const Status& status = SafeCallIntoPython([&] {
+    const Status status = vtable_.call_completed(middleware_.obj(), call_status);
+    RETURN_NOT_OK(CheckPyError());
+    return status;
+  });
+
+  ARROW_WARN_NOT_OK(status, "Python client middleware failed in StartCall");
+}
+
+Status CreateFlightInfo(const std::shared_ptr<arrow::Schema>& schema,
+                        const arrow::flight::FlightDescriptor& descriptor,
+                        const std::vector<arrow::flight::FlightEndpoint>& endpoints,
+                        int64_t total_records, int64_t total_bytes,
+                        std::unique_ptr<arrow::flight::FlightInfo>* out) {
+  ARROW_ASSIGN_OR_RAISE(auto result,
+                        arrow::flight::FlightInfo::Make(*schema, descriptor, endpoints,
+                                                        total_records, total_bytes));
+  *out = std::unique_ptr<arrow::flight::FlightInfo>(
+      new arrow::flight::FlightInfo(std::move(result)));
+  return Status::OK();
+}
+
+Status CreateSchemaResult(const std::shared_ptr<arrow::Schema>& schema,
+                          std::unique_ptr<arrow::flight::SchemaResult>* out) {
+  return arrow::flight::SchemaResult::Make(*schema).Value(out);
+}
+
+}  // namespace flight
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/gdb.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/gdb.cc
new file mode 100644
index 0000000..6941769
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/gdb.cc
@@ -0,0 +1,530 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdlib>
+#include <memory>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/chunked_array.h"
+#include "arrow/datum.h"
+#include "arrow/extension_type.h"
+#include "arrow/ipc/json_simple.h"
+#include "arrow/python/gdb.h"
+#include "arrow/record_batch.h"
+#include "arrow/scalar.h"
+#include "arrow/table.h"
+#include "arrow/type.h"
+#include "arrow/util/debug.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/key_value_metadata.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+
+namespace arrow {
+
+using ipc::internal::json::ArrayFromJSON;
+using ipc::internal::json::ChunkedArrayFromJSON;
+using ipc::internal::json::ScalarFromJSON;
+
+namespace gdb {
+
+// Add a nested `arrow` namespace to exercise type lookup from GDB (ARROW-15652)
+namespace arrow {
+void DummyFunction() {}
+}  // namespace arrow
+
+namespace {
+
+class CustomStatusDetail : public StatusDetail {
+ public:
+  const char* type_id() const override { return "custom-detail-id"; }
+  std::string ToString() const override { return "This is a detail"; }
+};
+
+class UuidType : public ExtensionType {
+ public:
+  UuidType() : ExtensionType(fixed_size_binary(16)) {}
+
+  std::string extension_name() const override { return "uuid"; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override {
+    return (other.extension_name() == this->extension_name());
+  }
+
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const override {
+    return std::make_shared<ExtensionArray>(data);
+  }
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override {
+    return Status::NotImplemented("");
+  }
+
+  std::string Serialize() const override { return "uuid-serialized"; }
+};
+
+std::shared_ptr<Array> SliceArrayFromJSON(const std::shared_ptr<DataType>& ty,
+                                          std::string_view json, int64_t offset = 0,
+                                          int64_t length = -1) {
+  auto array = *ArrayFromJSON(ty, json);
+  if (length != -1) {
+    return array->Slice(offset, length);
+  } else {
+    return array->Slice(offset);
+  }
+}
+
+}  // namespace
+
+void TestSession() {
+  // We define local variables for all types for which we want to test
+  // pretty-printing.
+  // Then, at the end of this function, we trap to the debugger, so that
+  // test instrumentation can print values from this frame by interacting
+  // with the debugger.
+  // The test instrumentation is in pyarrow/tests/test_gdb.py
+
+#ifdef __clang__
+  _Pragma("clang diagnostic push");
+  _Pragma("clang diagnostic ignored \"-Wunused-variable\"");
+#elif defined(__GNUC__)
+  _Pragma("GCC diagnostic push");
+  _Pragma("GCC diagnostic ignored \"-Wunused-variable\"");
+#endif
+
+  arrow::DummyFunction();
+
+  // Status & Result
+  auto ok_status = Status::OK();
+  auto error_status = Status::IOError("This is an error");
+  auto error_detail_status =
+      error_status.WithDetail(std::make_shared<CustomStatusDetail>());
+  auto ok_result = Result<int>(42);
+  auto error_result = Result<int>(error_status);
+  auto error_detail_result = Result<int>(error_detail_status);
+
+  // String views
+  std::string_view string_view_abc{"abc"};
+  std::string special_chars = std::string("foo\"bar") + '\x00' + "\r\n\t\x1f";
+  std::string_view string_view_special_chars(special_chars);
+
+  // Buffers
+  Buffer buffer_null{nullptr, 0};
+  Buffer buffer_abc{string_view_abc};
+  Buffer buffer_special_chars{string_view_special_chars};
+  char mutable_array[3] = {'a', 'b', 'c'};
+  MutableBuffer buffer_mutable{reinterpret_cast<uint8_t*>(mutable_array), 3};
+  auto heap_buffer = std::make_shared<Buffer>(string_view_abc);
+  auto heap_buffer_mutable = *AllocateBuffer(buffer_abc.size());
+  memcpy(heap_buffer_mutable->mutable_data(), buffer_abc.data(), buffer_abc.size());
+
+  // KeyValueMetadata
+  auto empty_metadata = key_value_metadata({}, {});
+  auto metadata = key_value_metadata(
+      {"key_text", "key_binary"}, {"some value", std::string("z") + '\x00' + "\x1f\xff"});
+
+  // Decimals
+  Decimal128 decimal128_zero{};
+  Decimal128 decimal128_pos{"98765432109876543210987654321098765432"};
+  Decimal128 decimal128_neg{"-98765432109876543210987654321098765432"};
+  BasicDecimal128 basic_decimal128_zero{};
+  BasicDecimal128 basic_decimal128_pos{decimal128_pos.native_endian_array()};
+  BasicDecimal128 basic_decimal128_neg{decimal128_neg.native_endian_array()};
+  Decimal256 decimal256_zero{};
+  Decimal256 decimal256_pos{
+      "9876543210987654321098765432109876543210987654321098765432109876543210987654"};
+  Decimal256 decimal256_neg{
+      "-9876543210987654321098765432109876543210987654321098765432109876543210987654"};
+  BasicDecimal256 basic_decimal256_zero{};
+  BasicDecimal256 basic_decimal256_pos{decimal256_pos.native_endian_array()};
+  BasicDecimal256 basic_decimal256_neg{decimal256_neg.native_endian_array()};
+
+  // Data types
+  NullType null_type;
+  auto heap_null_type = null();
+  BooleanType bool_type;
+  auto heap_bool_type = boolean();
+
+  Date32Type date32_type;
+  Date64Type date64_type;
+  Time32Type time_type_s(TimeUnit::SECOND);
+  Time32Type time_type_ms(TimeUnit::MILLI);
+  Time64Type time_type_us(TimeUnit::MICRO);
+  Time64Type time_type_ns(TimeUnit::NANO);
+  auto heap_time_type_ns = time64(TimeUnit::NANO);
+
+  TimestampType timestamp_type_s(TimeUnit::SECOND);
+  TimestampType timestamp_type_ms_timezone(TimeUnit::MILLI, "Europe/Paris");
+  TimestampType timestamp_type_us(TimeUnit::MICRO);
+  TimestampType timestamp_type_ns_timezone(TimeUnit::NANO, "Europe/Paris");
+  auto heap_timestamp_type_ns_timezone = timestamp(TimeUnit::NANO, "Europe/Paris");
+
+  DayTimeIntervalType day_time_interval_type;
+  MonthIntervalType month_interval_type;
+  MonthDayNanoIntervalType month_day_nano_interval_type;
+
+  DurationType duration_type_s(TimeUnit::SECOND);
+  DurationType duration_type_ns(TimeUnit::NANO);
+
+  BinaryType binary_type;
+  StringType string_type;
+  LargeBinaryType large_binary_type;
+  LargeStringType large_string_type;
+  FixedSizeBinaryType fixed_size_binary_type(10);
+  auto heap_fixed_size_binary_type = fixed_size_binary(10);
+
+  Decimal128Type decimal128_type(16, 5);
+  Decimal256Type decimal256_type(42, 12);
+  auto heap_decimal128_type = decimal128(16, 5);
+
+  ListType list_type(uint8());
+  LargeListType large_list_type(large_utf8());
+  auto heap_list_type = list(uint8());
+  auto heap_large_list_type = large_list(large_utf8());
+
+  FixedSizeListType fixed_size_list_type(float64(), 3);
+  auto heap_fixed_size_list_type = fixed_size_list(float64(), 3);
+
+  DictionaryType dict_type_unordered(int16(), utf8());
+  DictionaryType dict_type_ordered(int16(), utf8(), /*ordered=*/true);
+  auto heap_dict_type = dictionary(int16(), utf8());
+
+  MapType map_type_unsorted(utf8(), binary());
+  MapType map_type_sorted(utf8(), binary(), /*keys_sorted=*/true);
+  auto heap_map_type = map(utf8(), binary());
+
+  StructType struct_type_empty({});
+  StructType struct_type(
+      {field("ints", int8()), field("strs", utf8(), /*nullable=*/false)});
+  auto heap_struct_type =
+      struct_({field("ints", int8()), field("strs", utf8(), /*nullable=*/false)});
+
+  std::vector<int8_t> union_type_codes({7, 42});
+  FieldVector union_fields(
+      {field("ints", int8()), field("strs", utf8(), /*nullable=*/false)});
+  SparseUnionType sparse_union_type(union_fields, union_type_codes);
+  DenseUnionType dense_union_type(union_fields, union_type_codes);
+
+  UuidType uuid_type{};
+  std::shared_ptr<DataType> heap_uuid_type = std::make_shared<UuidType>();
+
+  // Schema
+  auto schema_empty = schema({});
+  auto schema_non_empty = schema({field("ints", int8()), field("strs", utf8())});
+  auto schema_with_metadata = schema_non_empty->WithMetadata(
+      key_value_metadata({"key1", "key2"}, {"value1", "value2"}));
+
+  // Fields
+  Field int_field("ints", int64());
+  Field float_field("floats", float32(), /*nullable=*/false);
+  auto heap_int_field = field("ints", int64());
+
+  // Scalars
+  NullScalar null_scalar;
+  auto heap_null_scalar = MakeNullScalar(null());
+
+  BooleanScalar bool_scalar_null{};
+  BooleanScalar bool_scalar{true};
+  auto heap_bool_scalar = *MakeScalar(boolean(), true);
+
+  Int8Scalar int8_scalar_null{};
+  UInt8Scalar uint8_scalar_null{};
+  Int64Scalar int64_scalar_null{};
+  UInt64Scalar uint64_scalar_null{};
+  Int8Scalar int8_scalar{-42};
+  UInt8Scalar uint8_scalar{234};
+  Int64Scalar int64_scalar{-9223372036854775807LL - 1};
+  UInt64Scalar uint64_scalar{18446744073709551615ULL};
+  HalfFloatScalar half_float_scalar{48640};  // -1.5
+  FloatScalar float_scalar{1.25f};
+  DoubleScalar double_scalar{2.5};
+
+  Time32Scalar time_scalar_s{100, TimeUnit::SECOND};
+  Time32Scalar time_scalar_ms{1000, TimeUnit::MILLI};
+  Time64Scalar time_scalar_us{10000, TimeUnit::MICRO};
+  Time64Scalar time_scalar_ns{100000, TimeUnit::NANO};
+  Time64Scalar time_scalar_null{time64(TimeUnit::NANO)};
+
+  DurationScalar duration_scalar_s{-100, TimeUnit::SECOND};
+  DurationScalar duration_scalar_ms{-1000, TimeUnit::MILLI};
+  DurationScalar duration_scalar_us{-10000, TimeUnit::MICRO};
+  DurationScalar duration_scalar_ns{-100000, TimeUnit::NANO};
+  DurationScalar duration_scalar_null{duration(TimeUnit::NANO)};
+
+  TimestampScalar timestamp_scalar_s{12345, timestamp(TimeUnit::SECOND)};
+  TimestampScalar timestamp_scalar_ms{-123456, timestamp(TimeUnit::MILLI)};
+  TimestampScalar timestamp_scalar_us{1234567, timestamp(TimeUnit::MICRO)};
+  TimestampScalar timestamp_scalar_ns{-12345678, timestamp(TimeUnit::NANO)};
+  TimestampScalar timestamp_scalar_null{timestamp(TimeUnit::NANO)};
+
+  TimestampScalar timestamp_scalar_s_tz{12345,
+                                        timestamp(TimeUnit::SECOND, "Europe/Paris")};
+  TimestampScalar timestamp_scalar_ms_tz{-123456,
+                                         timestamp(TimeUnit::MILLI, "Europe/Paris")};
+  TimestampScalar timestamp_scalar_us_tz{1234567,
+                                         timestamp(TimeUnit::MICRO, "Europe/Paris")};
+  TimestampScalar timestamp_scalar_ns_tz{-12345678,
+                                         timestamp(TimeUnit::NANO, "Europe/Paris")};
+  TimestampScalar timestamp_scalar_null_tz{timestamp(TimeUnit::NANO, "Europe/Paris")};
+
+  MonthIntervalScalar month_interval_scalar{23};
+  MonthIntervalScalar month_interval_scalar_null{};
+  DayTimeIntervalScalar day_time_interval_scalar{{23, -456}};
+  DayTimeIntervalScalar day_time_interval_scalar_null{};
+  MonthDayNanoIntervalScalar month_day_nano_interval_scalar{{1, 23, -456}};
+  MonthDayNanoIntervalScalar month_day_nano_interval_scalar_null{};
+
+  Date32Scalar date32_scalar{23};
+  Date32Scalar date32_scalar_null{};
+  Date64Scalar date64_scalar{45 * 86400000LL};
+  Date64Scalar date64_scalar_null{};
+
+  Decimal128Scalar decimal128_scalar_pos_scale_pos{Decimal128("1234567"),
+                                                   decimal128(10, 4)};
+  Decimal128Scalar decimal128_scalar_pos_scale_neg{Decimal128("-1234567"),
+                                                   decimal128(10, 4)};
+  Decimal128Scalar decimal128_scalar_neg_scale_pos{Decimal128("1234567"),
+                                                   decimal128(10, -4)};
+  Decimal128Scalar decimal128_scalar_neg_scale_neg{Decimal128("-1234567"),
+                                                   decimal128(10, -4)};
+  Decimal128Scalar decimal128_scalar_null{decimal128(10, 4)};
+  auto heap_decimal128_scalar = *MakeScalar(decimal128(10, 4), Decimal128("1234567"));
+
+  Decimal256Scalar decimal256_scalar_pos_scale_pos{
+      Decimal256("1234567890123456789012345678901234567890123456"), decimal256(50, 4)};
+  Decimal256Scalar decimal256_scalar_pos_scale_neg{
+      Decimal256("-1234567890123456789012345678901234567890123456"), decimal256(50, 4)};
+  Decimal256Scalar decimal256_scalar_neg_scale_pos{
+      Decimal256("1234567890123456789012345678901234567890123456"), decimal256(50, -4)};
+  Decimal256Scalar decimal256_scalar_neg_scale_neg{
+      Decimal256("-1234567890123456789012345678901234567890123456"), decimal256(50, -4)};
+  Decimal256Scalar decimal256_scalar_null{decimal256(50, 4)};
+  auto heap_decimal256_scalar = *MakeScalar(
+      decimal256(50, 4), Decimal256("1234567890123456789012345678901234567890123456"));
+
+  BinaryScalar binary_scalar_null{};
+  BinaryScalar binary_scalar_unallocated{std::shared_ptr<Buffer>{nullptr}};
+  BinaryScalar binary_scalar_empty{Buffer::FromString("")};
+  BinaryScalar binary_scalar_abc{Buffer::FromString("abc")};
+  BinaryScalar binary_scalar_bytes{
+      Buffer::FromString(std::string() + '\x00' + "\x1f\xff")};
+
+  StringScalar string_scalar_null{};
+  StringScalar string_scalar_unallocated{std::shared_ptr<Buffer>{nullptr}};
+  StringScalar string_scalar_empty{Buffer::FromString("")};
+  StringScalar string_scalar_hehe{Buffer::FromString("héhé")};
+  StringScalar string_scalar_invalid_chars{
+      Buffer::FromString(std::string("abc") + '\x00' + "def\xffghi")};
+
+  LargeBinaryScalar large_binary_scalar_abc{Buffer::FromString("abc")};
+  LargeStringScalar large_string_scalar_hehe{Buffer::FromString("héhé")};
+
+  FixedSizeBinaryScalar fixed_size_binary_scalar{Buffer::FromString("abc"),
+                                                 fixed_size_binary(3)};
+  FixedSizeBinaryScalar fixed_size_binary_scalar_null{
+      Buffer::FromString("   "), fixed_size_binary(3), /*is_valid=*/false};
+
+  std::shared_ptr<Array> dict_array;
+  dict_array = *ArrayFromJSON(utf8(), R"(["foo", "bar", "quux"])");
+  DictionaryScalar dict_scalar{{std::make_shared<Int8Scalar>(42), dict_array},
+                               dictionary(int8(), utf8())};
+  DictionaryScalar dict_scalar_null{dictionary(int8(), utf8())};
+
+  std::shared_ptr<Array> list_value_array = *ArrayFromJSON(int32(), R"([4, 5, 6])");
+  std::shared_ptr<Array> list_zero_length = *ArrayFromJSON(int32(), R"([])");
+  ListScalar list_scalar{list_value_array};
+  ListScalar list_scalar_null{list_zero_length, list(int32()), /*is_valid=*/false};
+  LargeListScalar large_list_scalar{list_value_array};
+  LargeListScalar large_list_scalar_null{list_zero_length, large_list(int32()),
+                                         /*is_valid=*/false};
+  FixedSizeListScalar fixed_size_list_scalar{list_value_array};
+  FixedSizeListScalar fixed_size_list_scalar_null{
+      list_value_array, fixed_size_list(int32(), 3), /*is_valid=*/false};
+
+  auto struct_scalar_type = struct_({field("ints", int32()), field("strs", utf8())});
+  StructScalar struct_scalar{
+      ScalarVector{MakeScalar(int32_t(42)), MakeScalar("some text")}, struct_scalar_type};
+  StructScalar struct_scalar_null{struct_scalar.value, struct_scalar_type,
+                                  /*is_valid=*/false};
+
+  auto sparse_union_scalar_type =
+      sparse_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
+  auto dense_union_scalar_type =
+      dense_union(FieldVector{field("ints", int32()), field("strs", utf8())}, {7, 42});
+  std::vector<std::shared_ptr<Scalar>> union_values = {MakeScalar(int32_t(43)),
+                                                       MakeNullScalar(utf8())};
+  SparseUnionScalar sparse_union_scalar{union_values, 7, sparse_union_scalar_type};
+  DenseUnionScalar dense_union_scalar{union_values[0], 7, dense_union_scalar_type};
+
+  union_values[0] = MakeNullScalar(int32());
+  SparseUnionScalar sparse_union_scalar_null{union_values, 7, sparse_union_scalar_type};
+  DenseUnionScalar dense_union_scalar_null{union_values[0], 7, dense_union_scalar_type};
+
+  auto extension_scalar_type = std::make_shared<UuidType>();
+  ExtensionScalar extension_scalar{
+      std::make_shared<FixedSizeBinaryScalar>(Buffer::FromString("0123456789abcdef"),
+                                              extension_scalar_type->storage_type()),
+      extension_scalar_type};
+  ExtensionScalar extension_scalar_null{extension_scalar.value, extension_scalar_type,
+                                        /*is_valid=*/false};
+
+  std::shared_ptr<Scalar> heap_map_scalar;
+  ARROW_CHECK_OK(
+      ScalarFromJSON(map(utf8(), int32()), R"([["a", 5], ["b", 6]])", &heap_map_scalar));
+  auto heap_map_scalar_null = MakeNullScalar(heap_map_scalar->type);
+
+  // Array and ArrayData
+  auto heap_null_array = SliceArrayFromJSON(null(), "[null, null]");
+
+  auto heap_int32_array = SliceArrayFromJSON(int32(), "[-5, 6, null, 42]");
+  ArrayData int32_array_data{*heap_int32_array->data()};
+  Int32Array int32_array{heap_int32_array->data()->Copy()};
+
+  auto heap_int32_array_no_nulls = SliceArrayFromJSON(int32(), "[-5, 6, 3, 42]");
+
+  const char* json_int32_array = "[-1, 2, -3, 4, null, -5, 6, -7, 8, null, -9, -10]";
+  auto heap_int32_array_sliced_1_9 = SliceArrayFromJSON(int32(), json_int32_array, 1, 9);
+  auto heap_int32_array_sliced_2_6 = SliceArrayFromJSON(int32(), json_int32_array, 2, 6);
+  auto heap_int32_array_sliced_8_4 = SliceArrayFromJSON(int32(), json_int32_array, 8, 4);
+  auto heap_int32_array_sliced_empty =
+      SliceArrayFromJSON(int32(), json_int32_array, 6, 0);
+
+  const char* json_bool_array =
+      "[false, false, true, true, null, null, false, false, true, true, "
+      "null, null, false, false, true, true, null, null]";
+  auto heap_bool_array = SliceArrayFromJSON(boolean(), json_bool_array);
+  auto heap_bool_array_sliced_1_9 = SliceArrayFromJSON(boolean(), json_bool_array, 1, 9);
+  auto heap_bool_array_sliced_2_6 = SliceArrayFromJSON(boolean(), json_bool_array, 2, 6);
+  auto heap_bool_array_sliced_empty =
+      SliceArrayFromJSON(boolean(), json_bool_array, 6, 0);
+
+  auto heap_list_array = SliceArrayFromJSON(list(int64()), "[[1, 2], null, []]");
+  ListArray list_array{heap_list_array->data()};
+
+  const char* json_double_array = "[-1.5, null]";
+  auto heap_double_array = SliceArrayFromJSON(float64(), json_double_array);
+
+  const char* json_float16_array = "[0, 48640]";
+  auto heap_float16_array =
+      *SliceArrayFromJSON(uint16(), json_float16_array)->View(float16());
+
+  auto heap_date32_array =
+      SliceArrayFromJSON(date32(), "[0, null, 18336, -9004, -719162, -719163]");
+  auto heap_date64_array = SliceArrayFromJSON(
+      date64(), "[1584230400000, -777945600000, -62135596800000, -62135683200000, 123]");
+
+  const char* json_time_array = "[null, -123, 456]";
+  auto heap_time32_array_s =
+      SliceArrayFromJSON(time32(TimeUnit::SECOND), json_time_array);
+  auto heap_time32_array_ms =
+      SliceArrayFromJSON(time32(TimeUnit::MILLI), json_time_array);
+  auto heap_time64_array_us =
+      SliceArrayFromJSON(time64(TimeUnit::MICRO), json_time_array);
+  auto heap_time64_array_ns = SliceArrayFromJSON(time64(TimeUnit::NANO), json_time_array);
+
+  auto heap_month_interval_array =
+      SliceArrayFromJSON(month_interval(), "[123, -456, null]");
+  auto heap_day_time_interval_array =
+      SliceArrayFromJSON(day_time_interval(), "[[1, -600], null]");
+  auto heap_month_day_nano_interval_array =
+      SliceArrayFromJSON(month_day_nano_interval(), "[[1, -600, 5000], null]");
+
+  const char* json_duration_array = "[null, -1234567890123456789]";
+  auto heap_duration_array_s =
+      SliceArrayFromJSON(duration(TimeUnit::SECOND), json_duration_array);
+  auto heap_duration_array_ns =
+      SliceArrayFromJSON(duration(TimeUnit::NANO), json_duration_array);
+
+  auto heap_timestamp_array_s = SliceArrayFromJSON(
+      timestamp(TimeUnit::SECOND),
+      R"([null, "1970-01-01 00:00:00", "1900-02-28 12:34:56", "3989-07-14 00:00:00"])");
+  auto heap_timestamp_array_ms = SliceArrayFromJSON(
+      timestamp(TimeUnit::MILLI),
+      R"([null, "1900-02-28 12:34:56.123", "3989-07-14 00:00:00.789"])");
+  auto heap_timestamp_array_us = SliceArrayFromJSON(
+      timestamp(TimeUnit::MICRO),
+      R"([null, "1900-02-28 12:34:56.654321", "3989-07-14 00:00:00.456789"])");
+  auto heap_timestamp_array_ns = SliceArrayFromJSON(
+      timestamp(TimeUnit::NANO), R"([null, "1900-02-28 12:34:56.987654321"])");
+
+  auto heap_decimal128_array = SliceArrayFromJSON(
+      decimal128(30, 6),
+      R"([null, "-1234567890123456789.012345", "1234567890123456789.012345"])");
+  auto heap_decimal256_array = SliceArrayFromJSON(
+      decimal256(50, 6), R"([null, "-123456789012345678901234567890123456789.012345"])");
+  auto heap_decimal128_array_sliced = heap_decimal128_array->Slice(1, 1);
+
+  auto heap_fixed_size_binary_array =
+      SliceArrayFromJSON(fixed_size_binary(3), "[null, \"abc\", \"\\u0000\\u001f\xff\"]");
+  auto heap_fixed_size_binary_array_zero_width =
+      SliceArrayFromJSON(fixed_size_binary(0), R"([null, ""])");
+  auto heap_fixed_size_binary_array_sliced = heap_fixed_size_binary_array->Slice(1, 1);
+
+  const char* json_binary_array = "[null, \"abcd\", \"\\u0000\\u001f\xff\"]";
+  auto heap_binary_array = SliceArrayFromJSON(binary(), json_binary_array);
+  auto heap_large_binary_array = SliceArrayFromJSON(large_binary(), json_binary_array);
+  const char* json_string_array = "[null, \"héhé\", \"invalid \xff char\"]";
+  auto heap_string_array = SliceArrayFromJSON(utf8(), json_string_array);
+  auto heap_large_string_array = SliceArrayFromJSON(large_utf8(), json_string_array);
+  auto heap_binary_array_sliced = heap_binary_array->Slice(1, 1);
+
+  // ChunkedArray
+  ArrayVector array_chunks(2);
+  array_chunks[0] = *ArrayFromJSON(int32(), "[1, 2]");
+  array_chunks[1] = *ArrayFromJSON(int32(), "[3, null, 4]");
+  ChunkedArray chunked_array{array_chunks};
+
+  // RecordBatch
+  auto batch_schema = schema({field("ints", int32()), field("strs", utf8())});
+  ArrayVector batch_columns{2};
+  batch_columns[0] = *ArrayFromJSON(int32(), "[1, 2, 3]");
+  batch_columns[1] = *ArrayFromJSON(utf8(), R"(["abc", null, "def"])");
+  auto batch = RecordBatch::Make(batch_schema, /*num_rows=*/3, batch_columns);
+  auto batch_with_metadata = batch->ReplaceSchemaMetadata(
+      key_value_metadata({"key1", "key2", "key3"}, {"value1", "value2", "value3"}));
+
+  // Table
+  ChunkedArrayVector table_columns{2};
+  ARROW_CHECK_OK(
+      ChunkedArrayFromJSON(int32(), {"[1, 2, 3]", "[4, 5]"}, &table_columns[0]));
+  ARROW_CHECK_OK(ChunkedArrayFromJSON(
+      utf8(), {R"(["abc", null])", R"(["def"])", R"(["ghi", "jkl"])"},
+      &table_columns[1]));
+  auto table = Table::Make(batch_schema, table_columns);
+
+  // Datum
+  Datum empty_datum{};
+  Datum scalar_datum{MakeNullScalar(boolean())};
+  Datum array_datum{heap_int32_array};
+  Datum chunked_array_datum{chunked_array};
+  Datum batch_datum{batch};
+  Datum table_datum{table};
+
+#ifdef __clang__
+  _Pragma("clang diagnostic pop");
+#elif defined(__GNUC__)
+  _Pragma("GCC diagnostic pop");
+#endif
+
+  // Hook into debugger
+  ::arrow::internal::DebugTrap();
+}
+
+}  // namespace gdb
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.cc
new file mode 100644
index 0000000..c266abc
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.cc
@@ -0,0 +1,470 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// helpers.h includes a NumPy header, so we include this first
+#include "arrow/python/numpy_interop.h"
+
+#include "arrow/python/helpers.h"
+
+#include <cmath>
+#include <limits>
+#include <sstream>
+#include <type_traits>
+
+#include "arrow/python/common.h"
+#include "arrow/python/decimal.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace py {
+
+#define GET_PRIMITIVE_TYPE(NAME, FACTORY) \
+  case Type::NAME:                        \
+    return FACTORY()
+
+std::shared_ptr<DataType> GetPrimitiveType(Type::type type) {
+  switch (type) {
+    case Type::NA:
+      return null();
+      GET_PRIMITIVE_TYPE(UINT8, uint8);
+      GET_PRIMITIVE_TYPE(INT8, int8);
+      GET_PRIMITIVE_TYPE(UINT16, uint16);
+      GET_PRIMITIVE_TYPE(INT16, int16);
+      GET_PRIMITIVE_TYPE(UINT32, uint32);
+      GET_PRIMITIVE_TYPE(INT32, int32);
+      GET_PRIMITIVE_TYPE(UINT64, uint64);
+      GET_PRIMITIVE_TYPE(INT64, int64);
+      GET_PRIMITIVE_TYPE(DATE32, date32);
+      GET_PRIMITIVE_TYPE(DATE64, date64);
+      GET_PRIMITIVE_TYPE(BOOL, boolean);
+      GET_PRIMITIVE_TYPE(HALF_FLOAT, float16);
+      GET_PRIMITIVE_TYPE(FLOAT, float32);
+      GET_PRIMITIVE_TYPE(DOUBLE, float64);
+      GET_PRIMITIVE_TYPE(BINARY, binary);
+      GET_PRIMITIVE_TYPE(STRING, utf8);
+      GET_PRIMITIVE_TYPE(LARGE_BINARY, large_binary);
+      GET_PRIMITIVE_TYPE(LARGE_STRING, large_utf8);
+      GET_PRIMITIVE_TYPE(INTERVAL_MONTH_DAY_NANO, month_day_nano_interval);
+    default:
+      return nullptr;
+  }
+}
+
+PyObject* PyHalf_FromHalf(npy_half value) {
+  PyObject* result = PyArrayScalar_New(Half);
+  if (result != NULL) {
+    PyArrayScalar_ASSIGN(result, Half, value);
+  }
+  return result;
+}
+
+Status PyFloat_AsHalf(PyObject* obj, npy_half* out) {
+  if (PyArray_IsScalar(obj, Half)) {
+    *out = PyArrayScalar_VAL(obj, Half);
+    return Status::OK();
+  } else {
+    // XXX: cannot use npy_double_to_half() without linking with Numpy
+    return Status::TypeError("Expected np.float16 instance");
+  }
+}
+
+namespace internal {
+
+std::string PyBytes_AsStdString(PyObject* obj) {
+  DCHECK(PyBytes_Check(obj));
+  return std::string(PyBytes_AS_STRING(obj), PyBytes_GET_SIZE(obj));
+}
+
+Status PyUnicode_AsStdString(PyObject* obj, std::string* out) {
+  DCHECK(PyUnicode_Check(obj));
+  Py_ssize_t size;
+  // The utf-8 representation is cached on the unicode object
+  const char* data = PyUnicode_AsUTF8AndSize(obj, &size);
+  RETURN_IF_PYERROR();
+  *out = std::string(data, size);
+  return Status::OK();
+}
+
+std::string PyObject_StdStringRepr(PyObject* obj) {
+  OwnedRef unicode_ref(PyObject_Repr(obj));
+  OwnedRef bytes_ref;
+
+  if (unicode_ref) {
+    bytes_ref.reset(
+        PyUnicode_AsEncodedString(unicode_ref.obj(), "utf8", "backslashreplace"));
+  }
+  if (!bytes_ref) {
+    PyErr_Clear();
+    std::stringstream ss;
+    ss << "<object of type '" << Py_TYPE(obj)->tp_name << "' repr() failed>";
+    return ss.str();
+  }
+  return PyBytes_AsStdString(bytes_ref.obj());
+}
+
+Status PyObject_StdStringStr(PyObject* obj, std::string* out) {
+  OwnedRef string_ref(PyObject_Str(obj));
+  RETURN_IF_PYERROR();
+  return PyUnicode_AsStdString(string_ref.obj(), out);
+}
+
+Result<bool> IsModuleImported(const std::string& module_name) {
+  // PyImport_GetModuleDict returns with a borrowed reference
+  OwnedRef key(PyUnicode_FromString(module_name.c_str()));
+  auto is_imported = PyDict_Contains(PyImport_GetModuleDict(), key.obj());
+  RETURN_IF_PYERROR();
+  return is_imported;
+}
+
+Status ImportModule(const std::string& module_name, OwnedRef* ref) {
+  PyObject* module = PyImport_ImportModule(module_name.c_str());
+  RETURN_IF_PYERROR();
+  ref->reset(module);
+  return Status::OK();
+}
+
+Status ImportFromModule(PyObject* module, const std::string& name, OwnedRef* ref) {
+  PyObject* attr = PyObject_GetAttrString(module, name.c_str());
+  RETURN_IF_PYERROR();
+  ref->reset(attr);
+  return Status::OK();
+}
+
+namespace {
+
+Status IntegerOverflowStatus(PyObject* obj, const std::string& overflow_message) {
+  if (overflow_message.empty()) {
+    std::string obj_as_stdstring;
+    RETURN_NOT_OK(PyObject_StdStringStr(obj, &obj_as_stdstring));
+    return Status::Invalid("Value ", obj_as_stdstring,
+                           " too large to fit in C integer type");
+  } else {
+    return Status::Invalid(overflow_message);
+  }
+}
+
+Result<OwnedRef> PyObjectToPyInt(PyObject* obj) {
+  // Try to call __index__ or __int__ on `obj`
+  // (starting from Python 3.10, the latter isn't done anymore by PyLong_AsLong*).
+  OwnedRef ref(PyNumber_Index(obj));
+  if (ref) {
+    return std::move(ref);
+  }
+  PyErr_Clear();
+  const auto nb = Py_TYPE(obj)->tp_as_number;
+  if (nb && nb->nb_int) {
+    ref.reset(nb->nb_int(obj));
+    if (!ref) {
+      RETURN_IF_PYERROR();
+    }
+    DCHECK(ref);
+    return std::move(ref);
+  }
+  return Status::TypeError(
+      "object of type ",
+      PyObject_StdStringRepr(reinterpret_cast<PyObject*>(Py_TYPE(obj))),
+      " cannot be converted to int");
+}
+
+// Extract C signed int from Python object
+template <typename Int, enable_if_t<std::is_signed<Int>::value, Int> = 0>
+Status CIntFromPythonImpl(PyObject* obj, Int* out, const std::string& overflow_message) {
+  static_assert(sizeof(Int) <= sizeof(long long),  // NOLINT
+                "integer type larger than long long");
+
+  OwnedRef ref;
+  if (!PyLong_Check(obj)) {
+    ARROW_ASSIGN_OR_RAISE(ref, PyObjectToPyInt(obj));
+    obj = ref.obj();
+  }
+
+  if (sizeof(Int) > sizeof(long)) {  // NOLINT
+    const auto value = PyLong_AsLongLong(obj);
+    if (ARROW_PREDICT_FALSE(value == -1)) {
+      RETURN_IF_PYERROR();
+    }
+    if (ARROW_PREDICT_FALSE(value < std::numeric_limits<Int>::min() ||
+                            value > std::numeric_limits<Int>::max())) {
+      return IntegerOverflowStatus(obj, overflow_message);
+    }
+    *out = static_cast<Int>(value);
+  } else {
+    const auto value = PyLong_AsLong(obj);
+    if (ARROW_PREDICT_FALSE(value == -1)) {
+      RETURN_IF_PYERROR();
+    }
+    if (ARROW_PREDICT_FALSE(value < std::numeric_limits<Int>::min() ||
+                            value > std::numeric_limits<Int>::max())) {
+      return IntegerOverflowStatus(obj, overflow_message);
+    }
+    *out = static_cast<Int>(value);
+  }
+  return Status::OK();
+}
+
+// Extract C unsigned int from Python object
+template <typename Int, enable_if_t<std::is_unsigned<Int>::value, Int> = 0>
+Status CIntFromPythonImpl(PyObject* obj, Int* out, const std::string& overflow_message) {
+  static_assert(sizeof(Int) <= sizeof(unsigned long long),  // NOLINT
+                "integer type larger than unsigned long long");
+
+  OwnedRef ref;
+  if (!PyLong_Check(obj)) {
+    ARROW_ASSIGN_OR_RAISE(ref, PyObjectToPyInt(obj));
+    obj = ref.obj();
+  }
+
+  if (sizeof(Int) > sizeof(unsigned long)) {  // NOLINT
+    const auto value = PyLong_AsUnsignedLongLong(obj);
+    if (ARROW_PREDICT_FALSE(value == static_cast<decltype(value)>(-1))) {
+      RETURN_IF_PYERROR();
+    }
+    if (ARROW_PREDICT_FALSE(value > std::numeric_limits<Int>::max())) {
+      return IntegerOverflowStatus(obj, overflow_message);
+    }
+    *out = static_cast<Int>(value);
+  } else {
+    const auto value = PyLong_AsUnsignedLong(obj);
+    if (ARROW_PREDICT_FALSE(value == static_cast<decltype(value)>(-1))) {
+      RETURN_IF_PYERROR();
+    }
+    if (ARROW_PREDICT_FALSE(value > std::numeric_limits<Int>::max())) {
+      return IntegerOverflowStatus(obj, overflow_message);
+    }
+    *out = static_cast<Int>(value);
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+template <typename Int>
+Status CIntFromPython(PyObject* obj, Int* out, const std::string& overflow_message) {
+  if (PyBool_Check(obj)) {
+    return Status::TypeError("Expected integer, got bool");
+  }
+  return CIntFromPythonImpl(obj, out, overflow_message);
+}
+
+template Status CIntFromPython(PyObject*, int8_t*, const std::string&);
+template Status CIntFromPython(PyObject*, int16_t*, const std::string&);
+template Status CIntFromPython(PyObject*, int32_t*, const std::string&);
+template Status CIntFromPython(PyObject*, int64_t*, const std::string&);
+template Status CIntFromPython(PyObject*, uint8_t*, const std::string&);
+template Status CIntFromPython(PyObject*, uint16_t*, const std::string&);
+template Status CIntFromPython(PyObject*, uint32_t*, const std::string&);
+template Status CIntFromPython(PyObject*, uint64_t*, const std::string&);
+
+inline bool MayHaveNaN(PyObject* obj) {
+  // Some core types can be very quickly type-checked and do not allow NaN values
+  const int64_t non_nan_tpflags = Py_TPFLAGS_LONG_SUBCLASS | Py_TPFLAGS_LIST_SUBCLASS |
+                                  Py_TPFLAGS_TUPLE_SUBCLASS | Py_TPFLAGS_BYTES_SUBCLASS |
+                                  Py_TPFLAGS_UNICODE_SUBCLASS | Py_TPFLAGS_DICT_SUBCLASS |
+                                  Py_TPFLAGS_BASE_EXC_SUBCLASS | Py_TPFLAGS_TYPE_SUBCLASS;
+  return !PyType_HasFeature(Py_TYPE(obj), non_nan_tpflags);
+}
+
+bool PyFloat_IsNaN(PyObject* obj) {
+  return PyFloat_Check(obj) && std::isnan(PyFloat_AsDouble(obj));
+}
+
+namespace {
+
+static bool pandas_static_initialized = false;
+
+// Once initialized, these variables hold borrowed references to Pandas static data.
+// We should not use OwnedRef here because Python destructors would be
+// called on a finalized interpreter.
+static PyObject* pandas_NA = nullptr;
+static PyObject* pandas_NaT = nullptr;
+static PyObject* pandas_Timedelta = nullptr;
+static PyObject* pandas_Timestamp = nullptr;
+static PyTypeObject* pandas_NaTType = nullptr;
+static PyObject* pandas_DateOffset = nullptr;
+
+}  // namespace
+
+void InitPandasStaticData() {
+  // NOTE: This is called with the GIL held.  We needn't (and shouldn't,
+  // to avoid deadlocks) use an additional C++ lock (ARROW-10519).
+  if (pandas_static_initialized) {
+    return;
+  }
+
+  OwnedRef pandas;
+
+  // Import pandas
+  Status s = ImportModule("pandas", &pandas);
+  if (!s.ok()) {
+    return;
+  }
+
+  // Since ImportModule can release the GIL, another thread could have
+  // already initialized the static data.
+  if (pandas_static_initialized) {
+    return;
+  }
+  OwnedRef ref;
+
+  // set NaT sentinel and its type
+  if (ImportFromModule(pandas.obj(), "NaT", &ref).ok()) {
+    pandas_NaT = ref.obj();
+    // PyObject_Type returns a new reference but we trust that pandas.NaT will
+    // outlive our use of this PyObject*
+    pandas_NaTType = Py_TYPE(ref.obj());
+  }
+
+  // retain a reference to Timedelta
+  if (ImportFromModule(pandas.obj(), "Timedelta", &ref).ok()) {
+    pandas_Timedelta = ref.obj();
+  }
+
+  // retain a reference to Timestamp
+  if (ImportFromModule(pandas.obj(), "Timestamp", &ref).ok()) {
+    pandas_Timestamp = ref.obj();
+  }
+
+  // if pandas.NA exists, retain a reference to it
+  if (ImportFromModule(pandas.obj(), "NA", &ref).ok()) {
+    pandas_NA = ref.obj();
+  }
+
+  // Import DateOffset type
+  if (ImportFromModule(pandas.obj(), "DateOffset", &ref).ok()) {
+    pandas_DateOffset = ref.obj();
+  }
+
+  pandas_static_initialized = true;
+}
+
+bool PandasObjectIsNull(PyObject* obj) {
+  if (!MayHaveNaN(obj)) {
+    return false;
+  }
+  if (obj == Py_None) {
+    return true;
+  }
+  if (PyFloat_IsNaN(obj) || (pandas_NA && obj == pandas_NA) ||
+      (pandas_NaTType && PyObject_TypeCheck(obj, pandas_NaTType)) ||
+      (internal::PyDecimal_Check(obj) && internal::PyDecimal_ISNAN(obj))) {
+    return true;
+  }
+  return false;
+}
+
+bool IsPandasTimedelta(PyObject* obj) {
+  return pandas_Timedelta && PyObject_IsInstance(obj, pandas_Timedelta);
+}
+
+bool IsPandasTimestamp(PyObject* obj) {
+  return pandas_Timestamp && PyObject_IsInstance(obj, pandas_Timestamp);
+}
+
+PyObject* BorrowPandasDataOffsetType() { return pandas_DateOffset; }
+
+Status InvalidValue(PyObject* obj, const std::string& why) {
+  auto obj_as_str = PyObject_StdStringRepr(obj);
+  return Status::Invalid("Could not convert ", std::move(obj_as_str), " with type ",
+                         Py_TYPE(obj)->tp_name, ": ", why);
+}
+
+Status InvalidType(PyObject* obj, const std::string& why) {
+  auto obj_as_str = PyObject_StdStringRepr(obj);
+  return Status::TypeError("Could not convert ", std::move(obj_as_str), " with type ",
+                           Py_TYPE(obj)->tp_name, ": ", why);
+}
+
+Status UnboxIntegerAsInt64(PyObject* obj, int64_t* out) {
+  if (PyLong_Check(obj)) {
+    int overflow = 0;
+    *out = PyLong_AsLongLongAndOverflow(obj, &overflow);
+    if (overflow) {
+      return Status::Invalid("PyLong is too large to fit int64");
+    }
+  } else if (PyArray_IsScalar(obj, Byte)) {
+    *out = reinterpret_cast<PyByteScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, UByte)) {
+    *out = reinterpret_cast<PyUByteScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, Short)) {
+    *out = reinterpret_cast<PyShortScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, UShort)) {
+    *out = reinterpret_cast<PyUShortScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, Int)) {
+    *out = reinterpret_cast<PyIntScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, UInt)) {
+    *out = reinterpret_cast<PyUIntScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, Long)) {
+    *out = reinterpret_cast<PyLongScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, ULong)) {
+    *out = reinterpret_cast<PyULongScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, LongLong)) {
+    *out = reinterpret_cast<PyLongLongScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, Int64)) {
+    *out = reinterpret_cast<PyInt64ScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, ULongLong)) {
+    *out = reinterpret_cast<PyULongLongScalarObject*>(obj)->obval;
+  } else if (PyArray_IsScalar(obj, UInt64)) {
+    *out = reinterpret_cast<PyUInt64ScalarObject*>(obj)->obval;
+  } else {
+    return Status::Invalid("Integer scalar type not recognized");
+  }
+  return Status::OK();
+}
+
+Status IntegerScalarToDoubleSafe(PyObject* obj, double* out) {
+  int64_t value = 0;
+  RETURN_NOT_OK(UnboxIntegerAsInt64(obj, &value));
+
+  constexpr int64_t kDoubleMax = 1LL << 53;
+  constexpr int64_t kDoubleMin = -(1LL << 53);
+
+  if (value < kDoubleMin || value > kDoubleMax) {
+    return Status::Invalid("Integer value ", value, " is outside of the range exactly",
+                           " representable by a IEEE 754 double precision value");
+  }
+  *out = static_cast<double>(value);
+  return Status::OK();
+}
+
+Status IntegerScalarToFloat32Safe(PyObject* obj, float* out) {
+  int64_t value = 0;
+  RETURN_NOT_OK(UnboxIntegerAsInt64(obj, &value));
+
+  constexpr int64_t kFloatMax = 1LL << 24;
+  constexpr int64_t kFloatMin = -(1LL << 24);
+
+  if (value < kFloatMin || value > kFloatMax) {
+    return Status::Invalid("Integer value ", value, " is outside of the range exactly",
+                           " representable by a IEEE 754 single precision value");
+  }
+  *out = static_cast<float>(value);
+  return Status::OK();
+}
+
+void DebugPrint(PyObject* obj) {
+  std::string repr = PyObject_StdStringRepr(obj);
+  PySys_WriteStderr("%s\n", repr.c_str());
+}
+
+}  // namespace internal
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.h b/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.h
index 84455d2..a8e5f80 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/helpers.h
@@ -28,9 +28,9 @@
 
 #include <numpy/halffloat.h>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/python/visibility.h"
 
 namespace arrow {
 
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/inference.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/inference.cc
new file mode 100644
index 0000000..3407b32
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/inference.cc
@@ -0,0 +1,748 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/inference.h"
+#include "arrow/python/numpy_interop.h"
+
+#include <datetime.h>
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/datetime.h"
+#include "arrow/python/decimal.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/iterators.h"
+#include "arrow/python/numpy_convert.h"
+
+namespace arrow {
+namespace py {
+namespace {
+// Assigns a tuple to interval_types_tuple containing the nametuple for
+// MonthDayNanoIntervalType and if present dateutil's relativedelta and
+// pandas DateOffset.
+Status ImportPresentIntervalTypes(OwnedRefNoGIL* interval_types_tuple) {
+  OwnedRef relative_delta_module;
+  // These are Optional imports so swallow errors.
+  OwnedRef relative_delta_type;
+  // Try to import pandas to get types.
+  internal::InitPandasStaticData();
+  if (internal::ImportModule("dateutil.relativedelta", &relative_delta_module).ok()) {
+    RETURN_NOT_OK(internal::ImportFromModule(relative_delta_module.obj(), "relativedelta",
+                                             &relative_delta_type));
+  }
+
+  PyObject* date_offset_type = internal::BorrowPandasDataOffsetType();
+  interval_types_tuple->reset(
+      PyTuple_New(1 + (date_offset_type != nullptr ? 1 : 0) +
+                  (relative_delta_type.obj() != nullptr ? 1 : 0)));
+  RETURN_IF_PYERROR();
+  int index = 0;
+  PyTuple_SetItem(interval_types_tuple->obj(), index++,
+                  internal::NewMonthDayNanoTupleType());
+  RETURN_IF_PYERROR();
+  if (date_offset_type != nullptr) {
+    Py_XINCREF(date_offset_type);
+    PyTuple_SetItem(interval_types_tuple->obj(), index++, date_offset_type);
+    RETURN_IF_PYERROR();
+  }
+  if (relative_delta_type.obj() != nullptr) {
+    PyTuple_SetItem(interval_types_tuple->obj(), index++, relative_delta_type.detach());
+    RETURN_IF_PYERROR();
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+#define _NUMPY_UNIFY_NOOP(DTYPE) \
+  case NPY_##DTYPE:              \
+    return OK;
+
+#define _NUMPY_UNIFY_PROMOTE(DTYPE) \
+  case NPY_##DTYPE:                 \
+    current_type_num_ = dtype;      \
+    current_dtype_ = descr;         \
+    return OK;
+
+#define _NUMPY_UNIFY_PROMOTE_TO(DTYPE, NEW_TYPE)               \
+  case NPY_##DTYPE:                                            \
+    current_type_num_ = NPY_##NEW_TYPE;                        \
+    current_dtype_ = PyArray_DescrFromType(current_type_num_); \
+    return OK;
+
+// Form a consensus NumPy dtype to use for Arrow conversion for a
+// collection of dtype objects observed one at a time
+class NumPyDtypeUnifier {
+ public:
+  enum Action { OK, INVALID };
+
+  NumPyDtypeUnifier() : current_type_num_(-1), current_dtype_(nullptr) {}
+
+  Status InvalidMix(int new_dtype) {
+    return Status::Invalid("Cannot mix NumPy dtypes ",
+                           GetNumPyTypeName(current_type_num_), " and ",
+                           GetNumPyTypeName(new_dtype));
+  }
+
+  int Observe_BOOL(PyArray_Descr* descr, int dtype) { return INVALID; }
+
+  int Observe_INT8(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_PROMOTE(INT16);
+      _NUMPY_UNIFY_PROMOTE(INT32);
+      _NUMPY_UNIFY_PROMOTE(INT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT32);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_INT16(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(INT8);
+      _NUMPY_UNIFY_PROMOTE(INT32);
+      _NUMPY_UNIFY_PROMOTE(INT64);
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_PROMOTE(FLOAT32);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_INT32(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(INT8);
+      _NUMPY_UNIFY_NOOP(INT16);
+      _NUMPY_UNIFY_PROMOTE(INT32);
+      _NUMPY_UNIFY_PROMOTE(INT64);
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_PROMOTE_TO(FLOAT32, FLOAT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_INT64(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(INT8);
+      _NUMPY_UNIFY_NOOP(INT16);
+      _NUMPY_UNIFY_NOOP(INT32);
+      _NUMPY_UNIFY_NOOP(INT64);
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_NOOP(UINT32);
+      _NUMPY_UNIFY_PROMOTE_TO(FLOAT32, FLOAT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_UINT8(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_PROMOTE(UINT16);
+      _NUMPY_UNIFY_PROMOTE(UINT32);
+      _NUMPY_UNIFY_PROMOTE(UINT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT32);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_UINT16(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_PROMOTE(UINT32);
+      _NUMPY_UNIFY_PROMOTE(UINT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT32);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_UINT32(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_PROMOTE(UINT64);
+      _NUMPY_UNIFY_PROMOTE_TO(FLOAT32, FLOAT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_UINT64(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_NOOP(UINT32);
+      _NUMPY_UNIFY_PROMOTE_TO(FLOAT32, FLOAT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_FLOAT16(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_PROMOTE(FLOAT32);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_FLOAT32(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(INT8);
+      _NUMPY_UNIFY_NOOP(INT16);
+      _NUMPY_UNIFY_NOOP(INT32);
+      _NUMPY_UNIFY_NOOP(INT64);
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_NOOP(UINT32);
+      _NUMPY_UNIFY_NOOP(UINT64);
+      _NUMPY_UNIFY_PROMOTE(FLOAT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_FLOAT64(PyArray_Descr* descr, int dtype) {
+    switch (dtype) {
+      _NUMPY_UNIFY_NOOP(INT8);
+      _NUMPY_UNIFY_NOOP(INT16);
+      _NUMPY_UNIFY_NOOP(INT32);
+      _NUMPY_UNIFY_NOOP(INT64);
+      _NUMPY_UNIFY_NOOP(UINT8);
+      _NUMPY_UNIFY_NOOP(UINT16);
+      _NUMPY_UNIFY_NOOP(UINT32);
+      _NUMPY_UNIFY_NOOP(UINT64);
+      default:
+        return INVALID;
+    }
+  }
+
+  int Observe_DATETIME(PyArray_Descr* dtype_obj) {
+    // TODO: check that units are all the same
+    return OK;
+  }
+
+  Status Observe(PyArray_Descr* descr) {
+    int dtype = fix_numpy_type_num(descr->type_num);
+
+    if (current_type_num_ == -1) {
+      current_dtype_ = descr;
+      current_type_num_ = dtype;
+      return Status::OK();
+    } else if (current_type_num_ == dtype) {
+      return Status::OK();
+    }
+
+#define OBSERVE_CASE(DTYPE)                 \
+  case NPY_##DTYPE:                         \
+    action = Observe_##DTYPE(descr, dtype); \
+    break;
+
+    int action = OK;
+    switch (current_type_num_) {
+      OBSERVE_CASE(BOOL);
+      OBSERVE_CASE(INT8);
+      OBSERVE_CASE(INT16);
+      OBSERVE_CASE(INT32);
+      OBSERVE_CASE(INT64);
+      OBSERVE_CASE(UINT8);
+      OBSERVE_CASE(UINT16);
+      OBSERVE_CASE(UINT32);
+      OBSERVE_CASE(UINT64);
+      OBSERVE_CASE(FLOAT16);
+      OBSERVE_CASE(FLOAT32);
+      OBSERVE_CASE(FLOAT64);
+      case NPY_DATETIME:
+        action = Observe_DATETIME(descr);
+        break;
+      default:
+        return Status::NotImplemented("Unsupported numpy type ", GetNumPyTypeName(dtype));
+    }
+
+    if (action == INVALID) {
+      return InvalidMix(dtype);
+    }
+    return Status::OK();
+  }
+
+  bool dtype_was_observed() const { return current_type_num_ != -1; }
+
+  PyArray_Descr* current_dtype() const { return current_dtype_; }
+
+  int current_type_num() const { return current_type_num_; }
+
+ private:
+  int current_type_num_;
+  PyArray_Descr* current_dtype_;
+};
+
+class TypeInferrer {
+  // A type inference visitor for Python values
+ public:
+  // \param validate_interval the number of elements to observe before checking
+  // whether the data is mixed type or has other problems. This helps avoid
+  // excess computation for each element while also making sure we "bail out"
+  // early with long sequences that may have problems up front
+  // \param make_unions permit mixed-type data by creating union types (not yet
+  // implemented)
+  explicit TypeInferrer(bool pandas_null_sentinels = false,
+                        int64_t validate_interval = 100, bool make_unions = false)
+      : pandas_null_sentinels_(pandas_null_sentinels),
+        validate_interval_(validate_interval),
+        make_unions_(make_unions),
+        total_count_(0),
+        none_count_(0),
+        bool_count_(0),
+        int_count_(0),
+        date_count_(0),
+        time_count_(0),
+        timestamp_micro_count_(0),
+        duration_count_(0),
+        float_count_(0),
+        binary_count_(0),
+        unicode_count_(0),
+        decimal_count_(0),
+        list_count_(0),
+        struct_count_(0),
+        arrow_scalar_count_(0),
+        numpy_dtype_count_(0),
+        interval_count_(0),
+        max_decimal_metadata_(std::numeric_limits<int32_t>::min(),
+                              std::numeric_limits<int32_t>::min()),
+        decimal_type_() {
+    ARROW_CHECK_OK(internal::ImportDecimalType(&decimal_type_));
+    ARROW_CHECK_OK(ImportPresentIntervalTypes(&interval_types_));
+  }
+
+  /// \param[in] obj a Python object in the sequence
+  /// \param[out] keep_going if sufficient information has been gathered to
+  /// attempt to begin converting the sequence, *keep_going will be set to true
+  /// to signal to the calling visitor loop to terminate
+  Status Visit(PyObject* obj, bool* keep_going) {
+    ++total_count_;
+
+    if (obj == Py_None || (pandas_null_sentinels_ && internal::PandasObjectIsNull(obj))) {
+      ++none_count_;
+    } else if (PyBool_Check(obj)) {
+      ++bool_count_;
+      *keep_going = make_unions_;
+    } else if (PyFloat_Check(obj)) {
+      ++float_count_;
+      *keep_going = make_unions_;
+    } else if (internal::IsPyInteger(obj)) {
+      ++int_count_;
+    } else if (PyDateTime_Check(obj)) {
+      // infer timezone from the first encountered datetime object
+      if (!timestamp_micro_count_) {
+        OwnedRef tzinfo(PyObject_GetAttrString(obj, "tzinfo"));
+        if (tzinfo.obj() != nullptr && tzinfo.obj() != Py_None) {
+          ARROW_ASSIGN_OR_RAISE(timezone_, internal::TzinfoToString(tzinfo.obj()));
+        }
+      }
+      ++timestamp_micro_count_;
+      *keep_going = make_unions_;
+    } else if (PyDelta_Check(obj)) {
+      ++duration_count_;
+      *keep_going = make_unions_;
+    } else if (PyDate_Check(obj)) {
+      ++date_count_;
+      *keep_going = make_unions_;
+    } else if (PyTime_Check(obj)) {
+      ++time_count_;
+      *keep_going = make_unions_;
+    } else if (internal::IsPyBinary(obj)) {
+      ++binary_count_;
+      *keep_going = make_unions_;
+    } else if (PyUnicode_Check(obj)) {
+      ++unicode_count_;
+      *keep_going = make_unions_;
+    } else if (arrow::py::is_scalar(obj)) {
+      RETURN_NOT_OK(VisitArrowScalar(obj, keep_going));
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      RETURN_NOT_OK(VisitDType(PyArray_DescrFromScalar(obj), keep_going));
+    } else if (PySet_Check(obj) || (Py_TYPE(obj) == &PyDictValues_Type)) {
+      RETURN_NOT_OK(VisitSet(obj, keep_going));
+    } else if (PyArray_Check(obj)) {
+      RETURN_NOT_OK(VisitNdarray(obj, keep_going));
+    } else if (PyDict_Check(obj)) {
+      RETURN_NOT_OK(VisitDict(obj));
+    } else if (PyList_Check(obj) ||
+               (PyTuple_Check(obj) &&
+                !PyObject_IsInstance(obj, PyTuple_GetItem(interval_types_.obj(), 0)))) {
+      RETURN_NOT_OK(VisitList(obj, keep_going));
+    } else if (PyObject_IsInstance(obj, decimal_type_.obj())) {
+      RETURN_NOT_OK(max_decimal_metadata_.Update(obj));
+      ++decimal_count_;
+    } else if (PyObject_IsInstance(obj, interval_types_.obj())) {
+      ++interval_count_;
+    } else {
+      return internal::InvalidValue(obj,
+                                    "did not recognize Python value type when inferring "
+                                    "an Arrow data type");
+    }
+
+    if (total_count_ % validate_interval_ == 0) {
+      RETURN_NOT_OK(Validate());
+    }
+
+    return Status::OK();
+  }
+
+  // Infer value type from a sequence of values
+  Status VisitSequence(PyObject* obj, PyObject* mask = nullptr) {
+    if (mask == nullptr || mask == Py_None) {
+      return internal::VisitSequence(
+          obj, /*offset=*/0,
+          [this](PyObject* value, bool* keep_going) { return Visit(value, keep_going); });
+    } else {
+      return internal::VisitSequenceMasked(
+          obj, mask, /*offset=*/0,
+          [this](PyObject* value, uint8_t masked, bool* keep_going) {
+            if (!masked) {
+              return Visit(value, keep_going);
+            } else {
+              return Status::OK();
+            }
+          });
+    }
+  }
+
+  // Infer value type from a sequence of values
+  Status VisitIterable(PyObject* obj) {
+    return internal::VisitIterable(obj, [this](PyObject* value, bool* keep_going) {
+      return Visit(value, keep_going);
+    });
+  }
+
+  Status GetType(std::shared_ptr<DataType>* out) {
+    // TODO(wesm): handling forming unions
+    if (make_unions_) {
+      return Status::NotImplemented("Creating union types not yet supported");
+    }
+
+    RETURN_NOT_OK(Validate());
+
+    if (arrow_scalar_count_ > 0 && arrow_scalar_count_ + none_count_ != total_count_) {
+      return Status::Invalid(
+          "pyarrow scalars cannot be mixed "
+          "with other Python scalar values currently");
+    }
+
+    if (numpy_dtype_count_ > 0) {
+      // All NumPy scalars and Nones/nulls
+      if (numpy_dtype_count_ + none_count_ == total_count_) {
+        std::shared_ptr<DataType> type;
+        RETURN_NOT_OK(NumPyDtypeToArrow(numpy_unifier_.current_dtype(), &type));
+        *out = type;
+        return Status::OK();
+      }
+
+      // The "bad path": data contains a mix of NumPy scalars and
+      // other kinds of scalars. Note this can happen innocuously
+      // because numpy.nan is not a NumPy scalar (it's a built-in
+      // PyFloat)
+
+      // TODO(ARROW-5564): Merge together type unification so this
+      // hack is not necessary
+      switch (numpy_unifier_.current_type_num()) {
+        case NPY_BOOL:
+          bool_count_ += numpy_dtype_count_;
+          break;
+        case NPY_INT8:
+        case NPY_INT16:
+        case NPY_INT32:
+        case NPY_INT64:
+        case NPY_UINT8:
+        case NPY_UINT16:
+        case NPY_UINT32:
+        case NPY_UINT64:
+          int_count_ += numpy_dtype_count_;
+          break;
+        case NPY_FLOAT32:
+        case NPY_FLOAT64:
+          float_count_ += numpy_dtype_count_;
+          break;
+        case NPY_DATETIME:
+          return Status::Invalid(
+              "numpy.datetime64 scalars cannot be mixed "
+              "with other Python scalar values currently");
+      }
+    }
+
+    if (list_count_) {
+      std::shared_ptr<DataType> value_type;
+      RETURN_NOT_OK(list_inferrer_->GetType(&value_type));
+      *out = list(value_type);
+    } else if (struct_count_) {
+      RETURN_NOT_OK(GetStructType(out));
+    } else if (decimal_count_) {
+      if (max_decimal_metadata_.precision() > Decimal128Type::kMaxPrecision) {
+        // the default constructor does not validate the precision and scale
+        ARROW_ASSIGN_OR_RAISE(*out,
+                              Decimal256Type::Make(max_decimal_metadata_.precision(),
+                                                   max_decimal_metadata_.scale()));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(*out,
+                              Decimal128Type::Make(max_decimal_metadata_.precision(),
+                                                   max_decimal_metadata_.scale()));
+      }
+    } else if (float_count_) {
+      // Prioritize floats before integers
+      *out = float64();
+    } else if (int_count_) {
+      *out = int64();
+    } else if (date_count_) {
+      *out = date32();
+    } else if (time_count_) {
+      *out = time64(TimeUnit::MICRO);
+    } else if (timestamp_micro_count_) {
+      *out = timestamp(TimeUnit::MICRO, timezone_);
+    } else if (duration_count_) {
+      *out = duration(TimeUnit::MICRO);
+    } else if (bool_count_) {
+      *out = boolean();
+    } else if (binary_count_) {
+      *out = binary();
+    } else if (unicode_count_) {
+      *out = utf8();
+    } else if (interval_count_) {
+      *out = month_day_nano_interval();
+    } else if (arrow_scalar_count_) {
+      *out = scalar_type_;
+    } else {
+      *out = null();
+    }
+    return Status::OK();
+  }
+
+  int64_t total_count() const { return total_count_; }
+
+ protected:
+  Status Validate() const {
+    if (list_count_ > 0) {
+      if (list_count_ + none_count_ != total_count_) {
+        return Status::Invalid("cannot mix list and non-list, non-null values");
+      }
+      RETURN_NOT_OK(list_inferrer_->Validate());
+    } else if (struct_count_ > 0) {
+      if (struct_count_ + none_count_ != total_count_) {
+        return Status::Invalid("cannot mix struct and non-struct, non-null values");
+      }
+      for (const auto& it : struct_inferrers_) {
+        RETURN_NOT_OK(it.second.Validate());
+      }
+    }
+    return Status::OK();
+  }
+
+  Status VisitArrowScalar(PyObject* obj, bool* keep_going /* unused */) {
+    ARROW_ASSIGN_OR_RAISE(auto scalar, arrow::py::unwrap_scalar(obj));
+    // Check that all the scalar types for the sequence are the same
+    if (arrow_scalar_count_ > 0 && *scalar->type != *scalar_type_) {
+      return internal::InvalidValue(obj, "cannot mix scalars with different types");
+    }
+    scalar_type_ = scalar->type;
+    ++arrow_scalar_count_;
+    return Status::OK();
+  }
+
+  Status VisitDType(PyArray_Descr* dtype, bool* keep_going) {
+    // Continue visiting dtypes for now.
+    // TODO(wesm): devise approach for unions
+    ++numpy_dtype_count_;
+    *keep_going = true;
+    return numpy_unifier_.Observe(dtype);
+  }
+
+  Status VisitList(PyObject* obj, bool* keep_going /* unused */) {
+    if (!list_inferrer_) {
+      list_inferrer_.reset(
+          new TypeInferrer(pandas_null_sentinels_, validate_interval_, make_unions_));
+    }
+    ++list_count_;
+    return list_inferrer_->VisitSequence(obj);
+  }
+
+  Status VisitSet(PyObject* obj, bool* keep_going /* unused */) {
+    if (!list_inferrer_) {
+      list_inferrer_.reset(
+          new TypeInferrer(pandas_null_sentinels_, validate_interval_, make_unions_));
+    }
+    ++list_count_;
+    return list_inferrer_->VisitIterable(obj);
+  }
+
+  Status VisitNdarray(PyObject* obj, bool* keep_going) {
+    PyArray_Descr* dtype = PyArray_DESCR(reinterpret_cast<PyArrayObject*>(obj));
+    if (dtype->type_num == NPY_OBJECT) {
+      return VisitList(obj, keep_going);
+    }
+    // Not an object array: infer child Arrow type from dtype
+    if (!list_inferrer_) {
+      list_inferrer_.reset(
+          new TypeInferrer(pandas_null_sentinels_, validate_interval_, make_unions_));
+    }
+    ++list_count_;
+
+    // XXX(wesm): In ARROW-4324 I added accounting to check whether
+    // all of the non-null values have NumPy dtypes, but the
+    // total_count not not being properly incremented here
+    ++(*list_inferrer_).total_count_;
+    return list_inferrer_->VisitDType(dtype, keep_going);
+  }
+
+  Status VisitDict(PyObject* obj) {
+    PyObject* key_obj;
+    PyObject* value_obj;
+    Py_ssize_t pos = 0;
+
+    while (PyDict_Next(obj, &pos, &key_obj, &value_obj)) {
+      std::string key;
+      if (PyUnicode_Check(key_obj)) {
+        RETURN_NOT_OK(internal::PyUnicode_AsStdString(key_obj, &key));
+      } else if (PyBytes_Check(key_obj)) {
+        key = internal::PyBytes_AsStdString(key_obj);
+      } else {
+        return Status::TypeError("Expected dict key of type str or bytes, got '",
+                                 Py_TYPE(key_obj)->tp_name, "'");
+      }
+      // Get or create visitor for this key
+      auto it = struct_inferrers_.find(key);
+      if (it == struct_inferrers_.end()) {
+        it = struct_inferrers_
+                 .insert(
+                     std::make_pair(key, TypeInferrer(pandas_null_sentinels_,
+                                                      validate_interval_, make_unions_)))
+                 .first;
+      }
+      TypeInferrer* visitor = &it->second;
+
+      // We ignore termination signals from child visitors for now
+      //
+      // TODO(wesm): keep track of whether type inference has terminated for
+      // the child visitors to avoid doing unneeded work
+      bool keep_going = true;
+      RETURN_NOT_OK(visitor->Visit(value_obj, &keep_going));
+    }
+
+    // We do not terminate visiting dicts since we want the union of all
+    // observed keys
+    ++struct_count_;
+    return Status::OK();
+  }
+
+  Status GetStructType(std::shared_ptr<DataType>* out) {
+    std::vector<std::shared_ptr<Field>> fields;
+    for (auto&& it : struct_inferrers_) {
+      std::shared_ptr<DataType> field_type;
+      RETURN_NOT_OK(it.second.GetType(&field_type));
+      fields.emplace_back(field(it.first, field_type));
+    }
+    *out = struct_(fields);
+    return Status::OK();
+  }
+
+ private:
+  bool pandas_null_sentinels_;
+  int64_t validate_interval_;
+  bool make_unions_;
+  int64_t total_count_;
+  int64_t none_count_;
+  int64_t bool_count_;
+  int64_t int_count_;
+  int64_t date_count_;
+  int64_t time_count_;
+  int64_t timestamp_micro_count_;
+  std::string timezone_;
+  int64_t duration_count_;
+  int64_t float_count_;
+  int64_t binary_count_;
+  int64_t unicode_count_;
+  int64_t decimal_count_;
+  int64_t list_count_;
+  int64_t struct_count_;
+  int64_t arrow_scalar_count_;
+  int64_t numpy_dtype_count_;
+  int64_t interval_count_;
+  std::unique_ptr<TypeInferrer> list_inferrer_;
+  std::map<std::string, TypeInferrer> struct_inferrers_;
+  std::shared_ptr<DataType> scalar_type_;
+
+  // If we observe a strongly-typed value in e.g. a NumPy array, we can store
+  // it here to skip the type counting logic above
+  NumPyDtypeUnifier numpy_unifier_;
+
+  internal::DecimalMetadata max_decimal_metadata_;
+
+  OwnedRefNoGIL decimal_type_;
+  OwnedRefNoGIL interval_types_;
+};
+
+// Non-exhaustive type inference
+Result<std::shared_ptr<DataType>> InferArrowType(PyObject* obj, PyObject* mask,
+                                                 bool pandas_null_sentinels) {
+  if (pandas_null_sentinels) {
+    // ARROW-842: If pandas is not installed then null checks will be less
+    // comprehensive, but that is okay.
+    internal::InitPandasStaticData();
+  }
+
+  std::shared_ptr<DataType> out_type;
+  TypeInferrer inferrer(pandas_null_sentinels);
+  RETURN_NOT_OK(inferrer.VisitSequence(obj, mask));
+  RETURN_NOT_OK(inferrer.GetType(&out_type));
+  if (out_type == nullptr) {
+    return Status::TypeError("Unable to determine data type");
+  } else {
+    return std::move(out_type);
+  }
+}
+
+ARROW_PYTHON_EXPORT
+bool IsPyBool(PyObject* obj) { return internal::PyBoolScalar_Check(obj); }
+
+ARROW_PYTHON_EXPORT
+bool IsPyInt(PyObject* obj) { return internal::PyIntScalar_Check(obj); }
+
+ARROW_PYTHON_EXPORT
+bool IsPyFloat(PyObject* obj) { return internal::PyFloatScalar_Check(obj); }
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/inference.h b/src/vendored/apache-arrow-12.0.1/arrow/python/inference.h
index 1d6516b..983384d 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/inference.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/inference.h
@@ -24,9 +24,9 @@
 
 #include <memory>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/python/visibility.h"
 
 #include "common.h"
 
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/init.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/init.cc
new file mode 100644
index 0000000..dba293b
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/init.cc
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Trigger the array import (inversion of NO_IMPORT_ARRAY)
+#define NUMPY_IMPORT_ARRAY
+
+#include "arrow/python/init.h"
+#include "arrow/python/numpy_interop.h"
+
+int arrow_init_numpy() { return arrow::py::import_numpy(); }
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/io.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/io.cc
new file mode 100644
index 0000000..43f8297
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/io.cc
@@ -0,0 +1,384 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "io.h"
+
+#include <cstdint>
+#include <cstdlib>
+#include <memory>
+#include <mutex>
+#include <string>
+
+#include "arrow/io/memory.h"
+#include "arrow/memory_pool.h"
+#include "arrow/status.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/pyarrow.h"
+
+namespace arrow {
+
+using arrow::io::TransformInputStream;
+
+namespace py {
+
+// ----------------------------------------------------------------------
+// Python file
+
+// A common interface to a Python file-like object. Must acquire GIL before
+// calling any methods
+class PythonFile {
+ public:
+  explicit PythonFile(PyObject* file) : file_(file), checked_read_buffer_(false) {
+    Py_INCREF(file);
+  }
+
+  Status CheckClosed() const {
+    if (!file_) {
+      return Status::Invalid("operation on closed Python file");
+    }
+    return Status::OK();
+  }
+
+  Status Close() {
+    if (file_) {
+      PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "close", "()");
+      Py_XDECREF(result);
+      file_.reset();
+      PY_RETURN_IF_ERROR(StatusCode::IOError);
+    }
+    return Status::OK();
+  }
+
+  Status Abort() {
+    file_.reset();
+    return Status::OK();
+  }
+
+  bool closed() const {
+    if (!file_) {
+      return true;
+    }
+    PyObject* result = PyObject_GetAttrString(file_.obj(), "closed");
+    if (result == NULL) {
+      // Can't propagate the error, so write it out and return an arbitrary value
+      PyErr_WriteUnraisable(NULL);
+      return true;
+    }
+    int ret = PyObject_IsTrue(result);
+    Py_XDECREF(result);
+    if (ret < 0) {
+      PyErr_WriteUnraisable(NULL);
+      return true;
+    }
+    return ret != 0;
+  }
+
+  Status Seek(int64_t position, int whence) {
+    RETURN_NOT_OK(CheckClosed());
+
+    // whence: 0 for relative to start of file, 2 for end of file
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "seek", "(ni)",
+                                               static_cast<Py_ssize_t>(position), whence);
+    Py_XDECREF(result);
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    return Status::OK();
+  }
+
+  Status Read(int64_t nbytes, PyObject** out) {
+    RETURN_NOT_OK(CheckClosed());
+
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read", "(n)",
+                                               static_cast<Py_ssize_t>(nbytes));
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    *out = result;
+    return Status::OK();
+  }
+
+  Status ReadBuffer(int64_t nbytes, PyObject** out) {
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "read_buffer", "(n)",
+                                               static_cast<Py_ssize_t>(nbytes));
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    *out = result;
+    return Status::OK();
+  }
+
+  Status Write(const void* data, int64_t nbytes) {
+    RETURN_NOT_OK(CheckClosed());
+
+    // Since the data isn't owned, we have to make a copy
+    PyObject* py_data =
+        PyBytes_FromStringAndSize(reinterpret_cast<const char*>(data), nbytes);
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "write", "(O)", py_data);
+    Py_XDECREF(py_data);
+    Py_XDECREF(result);
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    return Status::OK();
+  }
+
+  Status Write(const std::shared_ptr<Buffer>& buffer) {
+    RETURN_NOT_OK(CheckClosed());
+
+    PyObject* py_data = wrap_buffer(buffer);
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "write", "(O)", py_data);
+    Py_XDECREF(py_data);
+    Py_XDECREF(result);
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    return Status::OK();
+  }
+
+  Result<int64_t> Tell() {
+    RETURN_NOT_OK(CheckClosed());
+
+    PyObject* result = cpp_PyObject_CallMethod(file_.obj(), "tell", "()");
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+
+    int64_t position = PyLong_AsLongLong(result);
+    Py_DECREF(result);
+
+    // PyLong_AsLongLong can raise OverflowError
+    PY_RETURN_IF_ERROR(StatusCode::IOError);
+    return position;
+  }
+
+  std::mutex& lock() { return lock_; }
+
+  bool HasReadBuffer() {
+    if (!checked_read_buffer_) {  // we don't want to check this each time
+      has_read_buffer_ = PyObject_HasAttrString(file_.obj(), "read_buffer") == 1;
+      checked_read_buffer_ = true;
+    }
+    return has_read_buffer_;
+  }
+
+ private:
+  std::mutex lock_;
+  OwnedRefNoGIL file_;
+  bool has_read_buffer_;
+  bool checked_read_buffer_;
+};
+
+// ----------------------------------------------------------------------
+// Seekable input stream
+
+PyReadableFile::PyReadableFile(PyObject* file) { file_.reset(new PythonFile(file)); }
+
+// The destructor does not close the underlying Python file object, as
+// there may be multiple references to it.  Instead let the Python
+// destructor do its job.
+PyReadableFile::~PyReadableFile() {}
+
+Status PyReadableFile::Abort() {
+  return SafeCallIntoPython([this]() { return file_->Abort(); });
+}
+
+Status PyReadableFile::Close() {
+  return SafeCallIntoPython([this]() { return file_->Close(); });
+}
+
+bool PyReadableFile::closed() const {
+  bool res;
+  Status st = SafeCallIntoPython([this, &res]() {
+    res = file_->closed();
+    return Status::OK();
+  });
+  return res;
+}
+
+Status PyReadableFile::Seek(int64_t position) {
+  return SafeCallIntoPython([=] { return file_->Seek(position, 0); });
+}
+
+Result<int64_t> PyReadableFile::Tell() const {
+  return SafeCallIntoPython([=]() -> Result<int64_t> { return file_->Tell(); });
+}
+
+Result<int64_t> PyReadableFile::Read(int64_t nbytes, void* out) {
+  return SafeCallIntoPython([=]() -> Result<int64_t> {
+    OwnedRef bytes;
+    RETURN_NOT_OK(file_->Read(nbytes, bytes.ref()));
+    PyObject* bytes_obj = bytes.obj();
+    DCHECK(bytes_obj != NULL);
+
+    Py_buffer py_buf;
+    if (!PyObject_GetBuffer(bytes_obj, &py_buf, PyBUF_ANY_CONTIGUOUS)) {
+      const uint8_t* data = reinterpret_cast<const uint8_t*>(py_buf.buf);
+      std::memcpy(out, data, py_buf.len);
+      int64_t len = py_buf.len;
+      PyBuffer_Release(&py_buf);
+      return len;
+    } else {
+      return Status::TypeError(
+          "Python file read() should have returned a bytes object or an object "
+          "supporting the buffer protocol, got '",
+          Py_TYPE(bytes_obj)->tp_name, "' (did you open the file in binary mode?)");
+    }
+  });
+}
+
+Result<std::shared_ptr<Buffer>> PyReadableFile::Read(int64_t nbytes) {
+  return SafeCallIntoPython([=]() -> Result<std::shared_ptr<Buffer>> {
+    OwnedRef buffer_obj;
+    if (file_->HasReadBuffer()) {
+      RETURN_NOT_OK(file_->ReadBuffer(nbytes, buffer_obj.ref()));
+    } else {
+      RETURN_NOT_OK(file_->Read(nbytes, buffer_obj.ref()));
+    }
+    DCHECK(buffer_obj.obj() != NULL);
+
+    return PyBuffer::FromPyObject(buffer_obj.obj());
+  });
+}
+
+Result<int64_t> PyReadableFile::ReadAt(int64_t position, int64_t nbytes, void* out) {
+  std::lock_guard<std::mutex> guard(file_->lock());
+  return SafeCallIntoPython([=]() -> Result<int64_t> {
+    RETURN_NOT_OK(Seek(position));
+    return Read(nbytes, out);
+  });
+}
+
+Result<std::shared_ptr<Buffer>> PyReadableFile::ReadAt(int64_t position, int64_t nbytes) {
+  std::lock_guard<std::mutex> guard(file_->lock());
+  return SafeCallIntoPython([=]() -> Result<std::shared_ptr<Buffer>> {
+    RETURN_NOT_OK(Seek(position));
+    return Read(nbytes);
+  });
+}
+
+Result<int64_t> PyReadableFile::GetSize() {
+  return SafeCallIntoPython([=]() -> Result<int64_t> {
+    ARROW_ASSIGN_OR_RAISE(int64_t current_position, file_->Tell());
+    RETURN_NOT_OK(file_->Seek(0, 2));
+
+    ARROW_ASSIGN_OR_RAISE(int64_t file_size, file_->Tell());
+    // Restore previous file position
+    RETURN_NOT_OK(file_->Seek(current_position, 0));
+
+    return file_size;
+  });
+}
+
+// ----------------------------------------------------------------------
+// Output stream
+
+PyOutputStream::PyOutputStream(PyObject* file) : position_(0) {
+  file_.reset(new PythonFile(file));
+}
+
+// The destructor does not close the underlying Python file object, as
+// there may be multiple references to it.  Instead let the Python
+// destructor do its job.
+PyOutputStream::~PyOutputStream() {}
+
+Status PyOutputStream::Abort() {
+  return SafeCallIntoPython([=]() { return file_->Abort(); });
+}
+
+Status PyOutputStream::Close() {
+  return SafeCallIntoPython([=]() { return file_->Close(); });
+}
+
+bool PyOutputStream::closed() const {
+  bool res;
+  Status st = SafeCallIntoPython([this, &res]() {
+    res = file_->closed();
+    return Status::OK();
+  });
+  return res;
+}
+
+Result<int64_t> PyOutputStream::Tell() const { return position_; }
+
+Status PyOutputStream::Write(const void* data, int64_t nbytes) {
+  return SafeCallIntoPython([=]() {
+    position_ += nbytes;
+    return file_->Write(data, nbytes);
+  });
+}
+
+Status PyOutputStream::Write(const std::shared_ptr<Buffer>& buffer) {
+  return SafeCallIntoPython([=]() {
+    position_ += buffer->size();
+    return file_->Write(buffer);
+  });
+}
+
+// ----------------------------------------------------------------------
+// Foreign buffer
+
+Status PyForeignBuffer::Make(const uint8_t* data, int64_t size, PyObject* base,
+                             std::shared_ptr<Buffer>* out) {
+  PyForeignBuffer* buf = new PyForeignBuffer(data, size, base);
+  if (buf == NULL) {
+    return Status::OutOfMemory("could not allocate foreign buffer object");
+  } else {
+    *out = std::shared_ptr<Buffer>(buf);
+    return Status::OK();
+  }
+}
+
+// ----------------------------------------------------------------------
+// TransformInputStream::TransformFunc wrapper
+
+struct TransformFunctionWrapper {
+  TransformFunctionWrapper(TransformCallback cb, PyObject* arg)
+      : cb_(std::move(cb)), arg_(std::make_shared<OwnedRefNoGIL>(arg)) {
+    Py_INCREF(arg);
+  }
+
+  Result<std::shared_ptr<Buffer>> operator()(const std::shared_ptr<Buffer>& src) {
+    return SafeCallIntoPython([=]() -> Result<std::shared_ptr<Buffer>> {
+      std::shared_ptr<Buffer> dest;
+      cb_(arg_->obj(), src, &dest);
+      RETURN_NOT_OK(CheckPyError());
+      return dest;
+    });
+  }
+
+ protected:
+  // Need to wrap OwnedRefNoGIL because std::function needs the callable
+  // to be copy-constructible...
+  TransformCallback cb_;
+  std::shared_ptr<OwnedRefNoGIL> arg_;
+};
+
+std::shared_ptr<::arrow::io::InputStream> MakeTransformInputStream(
+    std::shared_ptr<::arrow::io::InputStream> wrapped, TransformInputStreamVTable vtable,
+    PyObject* handler) {
+  TransformInputStream::TransformFunc transform(
+      TransformFunctionWrapper{std::move(vtable.transform), handler});
+  return std::make_shared<TransformInputStream>(std::move(wrapped), std::move(transform));
+}
+
+std::shared_ptr<StreamWrapFunc> MakeStreamTransformFunc(TransformInputStreamVTable vtable,
+                                                        PyObject* handler) {
+  TransformInputStream::TransformFunc transform(
+      TransformFunctionWrapper{std::move(vtable.transform), handler});
+  StreamWrapFunc func = [transform](std::shared_ptr<::arrow::io::InputStream> wrapped) {
+    return std::make_shared<TransformInputStream>(wrapped, transform);
+  };
+  return std::make_shared<StreamWrapFunc>(func);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.cc
new file mode 100644
index 0000000..9348182
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.cc
@@ -0,0 +1,67 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "ipc.h"
+
+#include <memory>
+
+#include "arrow/python/pyarrow.h"
+
+namespace arrow {
+namespace py {
+
+PyRecordBatchReader::PyRecordBatchReader() {}
+
+Status PyRecordBatchReader::Init(std::shared_ptr<Schema> schema, PyObject* iterable) {
+  schema_ = std::move(schema);
+
+  iterator_.reset(PyObject_GetIter(iterable));
+  return CheckPyError();
+}
+
+std::shared_ptr<Schema> PyRecordBatchReader::schema() const { return schema_; }
+
+Status PyRecordBatchReader::ReadNext(std::shared_ptr<RecordBatch>* batch) {
+  PyAcquireGIL lock;
+
+  if (!iterator_) {
+    // End of stream
+    batch->reset();
+    return Status::OK();
+  }
+
+  OwnedRef py_batch(PyIter_Next(iterator_.obj()));
+  if (!py_batch) {
+    RETURN_IF_PYERROR();
+    // End of stream
+    batch->reset();
+    iterator_.reset();
+    return Status::OK();
+  }
+
+  return unwrap_batch(py_batch.obj()).Value(batch);
+}
+
+Result<std::shared_ptr<RecordBatchReader>> PyRecordBatchReader::Make(
+    std::shared_ptr<Schema> schema, PyObject* iterable) {
+  auto reader = std::shared_ptr<PyRecordBatchReader>(new PyRecordBatchReader());
+  RETURN_NOT_OK(reader->Init(std::move(schema), iterable));
+  return reader;
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.h b/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.h
index 57eabfe..92232ed 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/ipc.h
@@ -19,11 +19,11 @@
 
 #include <memory>
 
+#include "arrow/python/common.h"
+#include "arrow/python/visibility.h"
 #include "arrow/record_batch.h"
 #include "arrow/result.h"
 #include "arrow/util/macros.h"
-#include "arrow/python/common.h"
-#include "arrow/python/visibility.h"
 
 namespace arrow {
 namespace py {
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.cc
new file mode 100644
index 0000000..4970680
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.cc
@@ -0,0 +1,562 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/numpy_interop.h"
+
+#include "arrow/python/numpy_convert.h"
+
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "arrow/buffer.h"
+#include "arrow/sparse_tensor.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/pyarrow.h"
+#include "arrow/python/type_traits.h"
+
+namespace arrow {
+namespace py {
+
+NumPyBuffer::NumPyBuffer(PyObject* ao) : Buffer(nullptr, 0) {
+  PyAcquireGIL lock;
+  arr_ = ao;
+  Py_INCREF(ao);
+
+  if (PyArray_Check(ao)) {
+    PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
+    auto ptr = reinterpret_cast<uint8_t*>(PyArray_DATA(ndarray));
+    data_ = const_cast<const uint8_t*>(ptr);
+    size_ = PyArray_SIZE(ndarray) * PyArray_DESCR(ndarray)->elsize;
+    capacity_ = size_;
+    is_mutable_ = !!(PyArray_FLAGS(ndarray) & NPY_ARRAY_WRITEABLE);
+  }
+}
+
+NumPyBuffer::~NumPyBuffer() {
+  PyAcquireGIL lock;
+  Py_XDECREF(arr_);
+}
+
+#define TO_ARROW_TYPE_CASE(NPY_NAME, FACTORY) \
+  case NPY_##NPY_NAME:                        \
+    *out = FACTORY();                         \
+    break;
+
+namespace {
+
+Status GetTensorType(PyObject* dtype, std::shared_ptr<DataType>* out) {
+  if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
+    return Status::TypeError("Did not pass numpy.dtype object");
+  }
+  PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
+  int type_num = fix_numpy_type_num(descr->type_num);
+
+  switch (type_num) {
+    TO_ARROW_TYPE_CASE(BOOL, uint8);
+    TO_ARROW_TYPE_CASE(INT8, int8);
+    TO_ARROW_TYPE_CASE(INT16, int16);
+    TO_ARROW_TYPE_CASE(INT32, int32);
+    TO_ARROW_TYPE_CASE(INT64, int64);
+    TO_ARROW_TYPE_CASE(UINT8, uint8);
+    TO_ARROW_TYPE_CASE(UINT16, uint16);
+    TO_ARROW_TYPE_CASE(UINT32, uint32);
+    TO_ARROW_TYPE_CASE(UINT64, uint64);
+    TO_ARROW_TYPE_CASE(FLOAT16, float16);
+    TO_ARROW_TYPE_CASE(FLOAT32, float32);
+    TO_ARROW_TYPE_CASE(FLOAT64, float64);
+    default: {
+      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
+    }
+  }
+  return Status::OK();
+}
+
+Status GetNumPyType(const DataType& type, int* type_num) {
+#define NUMPY_TYPE_CASE(ARROW_NAME, NPY_NAME) \
+  case Type::ARROW_NAME:                      \
+    *type_num = NPY_##NPY_NAME;               \
+    break;
+
+  switch (type.id()) {
+    NUMPY_TYPE_CASE(UINT8, UINT8);
+    NUMPY_TYPE_CASE(INT8, INT8);
+    NUMPY_TYPE_CASE(UINT16, UINT16);
+    NUMPY_TYPE_CASE(INT16, INT16);
+    NUMPY_TYPE_CASE(UINT32, UINT32);
+    NUMPY_TYPE_CASE(INT32, INT32);
+    NUMPY_TYPE_CASE(UINT64, UINT64);
+    NUMPY_TYPE_CASE(INT64, INT64);
+    NUMPY_TYPE_CASE(HALF_FLOAT, FLOAT16);
+    NUMPY_TYPE_CASE(FLOAT, FLOAT32);
+    NUMPY_TYPE_CASE(DOUBLE, FLOAT64);
+    default: {
+      return Status::NotImplemented("Unsupported tensor type: ", type.ToString());
+    }
+  }
+#undef NUMPY_TYPE_CASE
+
+  return Status::OK();
+}
+
+}  // namespace
+
+Status NumPyDtypeToArrow(PyObject* dtype, std::shared_ptr<DataType>* out) {
+  if (!PyObject_TypeCheck(dtype, &PyArrayDescr_Type)) {
+    return Status::TypeError("Did not pass numpy.dtype object");
+  }
+  PyArray_Descr* descr = reinterpret_cast<PyArray_Descr*>(dtype);
+  return NumPyDtypeToArrow(descr, out);
+}
+
+Status NumPyDtypeToArrow(PyArray_Descr* descr, std::shared_ptr<DataType>* out) {
+  int type_num = fix_numpy_type_num(descr->type_num);
+
+  switch (type_num) {
+    TO_ARROW_TYPE_CASE(BOOL, boolean);
+    TO_ARROW_TYPE_CASE(INT8, int8);
+    TO_ARROW_TYPE_CASE(INT16, int16);
+    TO_ARROW_TYPE_CASE(INT32, int32);
+    TO_ARROW_TYPE_CASE(INT64, int64);
+    TO_ARROW_TYPE_CASE(UINT8, uint8);
+    TO_ARROW_TYPE_CASE(UINT16, uint16);
+    TO_ARROW_TYPE_CASE(UINT32, uint32);
+    TO_ARROW_TYPE_CASE(UINT64, uint64);
+    TO_ARROW_TYPE_CASE(FLOAT16, float16);
+    TO_ARROW_TYPE_CASE(FLOAT32, float32);
+    TO_ARROW_TYPE_CASE(FLOAT64, float64);
+    TO_ARROW_TYPE_CASE(STRING, binary);
+    TO_ARROW_TYPE_CASE(UNICODE, utf8);
+    case NPY_DATETIME: {
+      auto date_dtype =
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+      switch (date_dtype->meta.base) {
+        case NPY_FR_s:
+          *out = timestamp(TimeUnit::SECOND);
+          break;
+        case NPY_FR_ms:
+          *out = timestamp(TimeUnit::MILLI);
+          break;
+        case NPY_FR_us:
+          *out = timestamp(TimeUnit::MICRO);
+          break;
+        case NPY_FR_ns:
+          *out = timestamp(TimeUnit::NANO);
+          break;
+        case NPY_FR_D:
+          *out = date32();
+          break;
+        case NPY_FR_GENERIC:
+          return Status::NotImplemented("Unbound or generic datetime64 time unit");
+        default:
+          return Status::NotImplemented("Unsupported datetime64 time unit");
+      }
+    } break;
+    case NPY_TIMEDELTA: {
+      auto timedelta_dtype =
+          reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(descr->c_metadata);
+      switch (timedelta_dtype->meta.base) {
+        case NPY_FR_s:
+          *out = duration(TimeUnit::SECOND);
+          break;
+        case NPY_FR_ms:
+          *out = duration(TimeUnit::MILLI);
+          break;
+        case NPY_FR_us:
+          *out = duration(TimeUnit::MICRO);
+          break;
+        case NPY_FR_ns:
+          *out = duration(TimeUnit::NANO);
+          break;
+        case NPY_FR_GENERIC:
+          return Status::NotImplemented("Unbound or generic timedelta64 time unit");
+        default:
+          return Status::NotImplemented("Unsupported timedelta64 time unit");
+      }
+    } break;
+    default: {
+      return Status::NotImplemented("Unsupported numpy type ", descr->type_num);
+    }
+  }
+
+  return Status::OK();
+}
+
+#undef TO_ARROW_TYPE_CASE
+
+Status NdarrayToTensor(MemoryPool* pool, PyObject* ao,
+                       const std::vector<std::string>& dim_names,
+                       std::shared_ptr<Tensor>* out) {
+  if (!PyArray_Check(ao)) {
+    return Status::TypeError("Did not pass ndarray object");
+  }
+
+  PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(ao);
+
+  // TODO(wesm): What do we want to do with non-contiguous memory and negative strides?
+
+  int ndim = PyArray_NDIM(ndarray);
+
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(ao);
+  std::vector<int64_t> shape(ndim);
+  std::vector<int64_t> strides(ndim);
+
+  npy_intp* array_strides = PyArray_STRIDES(ndarray);
+  npy_intp* array_shape = PyArray_SHAPE(ndarray);
+  for (int i = 0; i < ndim; ++i) {
+    if (array_strides[i] < 0) {
+      return Status::Invalid("Negative ndarray strides not supported");
+    }
+    shape[i] = array_shape[i];
+    strides[i] = array_strides[i];
+  }
+
+  std::shared_ptr<DataType> type;
+  RETURN_NOT_OK(
+      GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray)), &type));
+  *out = std::make_shared<Tensor>(type, data, shape, strides, dim_names);
+  return Status::OK();
+}
+
+Status TensorToNdarray(const std::shared_ptr<Tensor>& tensor, PyObject* base,
+                       PyObject** out) {
+  int type_num = 0;
+  RETURN_NOT_OK(GetNumPyType(*tensor->type(), &type_num));
+  PyArray_Descr* dtype = PyArray_DescrNewFromType(type_num);
+  RETURN_IF_PYERROR();
+
+  const int ndim = tensor->ndim();
+  std::vector<npy_intp> npy_shape(ndim);
+  std::vector<npy_intp> npy_strides(ndim);
+
+  for (int i = 0; i < ndim; ++i) {
+    npy_shape[i] = tensor->shape()[i];
+    npy_strides[i] = tensor->strides()[i];
+  }
+
+  const void* immutable_data = nullptr;
+  if (tensor->data()) {
+    immutable_data = tensor->data()->data();
+  }
+
+  // Remove const =(
+  void* mutable_data = const_cast<void*>(immutable_data);
+
+  int array_flags = 0;
+  if (tensor->is_row_major()) {
+    array_flags |= NPY_ARRAY_C_CONTIGUOUS;
+  }
+  if (tensor->is_column_major()) {
+    array_flags |= NPY_ARRAY_F_CONTIGUOUS;
+  }
+  if (tensor->is_mutable()) {
+    array_flags |= NPY_ARRAY_WRITEABLE;
+  }
+
+  PyObject* result =
+      PyArray_NewFromDescr(&PyArray_Type, dtype, ndim, npy_shape.data(),
+                           npy_strides.data(), mutable_data, array_flags, nullptr);
+  RETURN_IF_PYERROR();
+
+  if (base == Py_None || base == nullptr) {
+    base = py::wrap_tensor(tensor);
+  } else {
+    Py_XINCREF(base);
+  }
+  PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(result), base);
+  *out = result;
+  return Status::OK();
+}
+
+// Wrap the dense data of a sparse tensor in a ndarray
+static Status SparseTensorDataToNdarray(const SparseTensor& sparse_tensor,
+                                        std::vector<npy_intp> data_shape, PyObject* base,
+                                        PyObject** out_data) {
+  int type_num_data = 0;
+  RETURN_NOT_OK(GetNumPyType(*sparse_tensor.type(), &type_num_data));
+  PyArray_Descr* dtype_data = PyArray_DescrNewFromType(type_num_data);
+  RETURN_IF_PYERROR();
+
+  const void* immutable_data = sparse_tensor.data()->data();
+  // Remove const =(
+  void* mutable_data = const_cast<void*>(immutable_data);
+  int array_flags = NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS;
+  if (sparse_tensor.is_mutable()) {
+    array_flags |= NPY_ARRAY_WRITEABLE;
+  }
+
+  *out_data = PyArray_NewFromDescr(&PyArray_Type, dtype_data,
+                                   static_cast<int>(data_shape.size()), data_shape.data(),
+                                   nullptr, mutable_data, array_flags, nullptr);
+  RETURN_IF_PYERROR();
+  Py_XINCREF(base);
+  PyArray_SetBaseObject(reinterpret_cast<PyArrayObject*>(*out_data), base);
+  return Status::OK();
+}
+
+Status SparseCOOTensorToNdarray(const std::shared_ptr<SparseCOOTensor>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_coords) {
+  const auto& sparse_index = arrow::internal::checked_cast<const SparseCOOIndex&>(
+      *sparse_tensor->sparse_index());
+
+  // Wrap tensor data
+  OwnedRef result_data;
+  RETURN_NOT_OK(SparseTensorDataToNdarray(
+      *sparse_tensor, {static_cast<npy_intp>(sparse_tensor->non_zero_length()), 1}, base,
+      result_data.ref()));
+
+  // Wrap indices
+  PyObject* result_coords;
+  RETURN_NOT_OK(TensorToNdarray(sparse_index.indices(), base, &result_coords));
+
+  *out_data = result_data.detach();
+  *out_coords = result_coords;
+  return Status::OK();
+}
+
+Status SparseCSXMatrixToNdarray(const std::shared_ptr<SparseTensor>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_indptr, PyObject** out_indices) {
+  // Wrap indices
+  OwnedRef result_indptr;
+  OwnedRef result_indices;
+
+  switch (sparse_tensor->format_id()) {
+    case SparseTensorFormat::CSR: {
+      const auto& sparse_index = arrow::internal::checked_cast<const SparseCSRIndex&>(
+          *sparse_tensor->sparse_index());
+      RETURN_NOT_OK(TensorToNdarray(sparse_index.indptr(), base, result_indptr.ref()));
+      RETURN_NOT_OK(TensorToNdarray(sparse_index.indices(), base, result_indices.ref()));
+      break;
+    }
+    case SparseTensorFormat::CSC: {
+      const auto& sparse_index = arrow::internal::checked_cast<const SparseCSCIndex&>(
+          *sparse_tensor->sparse_index());
+      RETURN_NOT_OK(TensorToNdarray(sparse_index.indptr(), base, result_indptr.ref()));
+      RETURN_NOT_OK(TensorToNdarray(sparse_index.indices(), base, result_indices.ref()));
+      break;
+    }
+    default:
+      return Status::NotImplemented("Invalid SparseTensor type.");
+  }
+
+  // Wrap tensor data
+  OwnedRef result_data;
+  RETURN_NOT_OK(SparseTensorDataToNdarray(
+      *sparse_tensor, {static_cast<npy_intp>(sparse_tensor->non_zero_length()), 1}, base,
+      result_data.ref()));
+
+  *out_data = result_data.detach();
+  *out_indptr = result_indptr.detach();
+  *out_indices = result_indices.detach();
+  return Status::OK();
+}
+
+Status SparseCSRMatrixToNdarray(const std::shared_ptr<SparseCSRMatrix>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_indptr, PyObject** out_indices) {
+  return SparseCSXMatrixToNdarray(sparse_tensor, base, out_data, out_indptr, out_indices);
+}
+
+Status SparseCSCMatrixToNdarray(const std::shared_ptr<SparseCSCMatrix>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_indptr, PyObject** out_indices) {
+  return SparseCSXMatrixToNdarray(sparse_tensor, base, out_data, out_indptr, out_indices);
+}
+
+Status SparseCSFTensorToNdarray(const std::shared_ptr<SparseCSFTensor>& sparse_tensor,
+                                PyObject* base, PyObject** out_data,
+                                PyObject** out_indptr, PyObject** out_indices) {
+  const auto& sparse_index = arrow::internal::checked_cast<const SparseCSFIndex&>(
+      *sparse_tensor->sparse_index());
+
+  // Wrap tensor data
+  OwnedRef result_data;
+  RETURN_NOT_OK(SparseTensorDataToNdarray(
+      *sparse_tensor, {static_cast<npy_intp>(sparse_tensor->non_zero_length()), 1}, base,
+      result_data.ref()));
+
+  // Wrap indices
+  int ndim = static_cast<int>(sparse_index.indices().size());
+  OwnedRef indptr(PyList_New(ndim - 1));
+  OwnedRef indices(PyList_New(ndim));
+  RETURN_IF_PYERROR();
+
+  for (int i = 0; i < ndim - 1; ++i) {
+    PyObject* item;
+    RETURN_NOT_OK(TensorToNdarray(sparse_index.indptr()[i], base, &item));
+    if (PyList_SetItem(indptr.obj(), i, item) < 0) {
+      Py_XDECREF(item);
+      RETURN_IF_PYERROR();
+    }
+  }
+  for (int i = 0; i < ndim; ++i) {
+    PyObject* item;
+    RETURN_NOT_OK(TensorToNdarray(sparse_index.indices()[i], base, &item));
+    if (PyList_SetItem(indices.obj(), i, item) < 0) {
+      Py_XDECREF(item);
+      RETURN_IF_PYERROR();
+    }
+  }
+
+  *out_indptr = indptr.detach();
+  *out_indices = indices.detach();
+  *out_data = result_data.detach();
+  return Status::OK();
+}
+
+Status NdarraysToSparseCOOTensor(MemoryPool* pool, PyObject* data_ao, PyObject* coords_ao,
+                                 const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseCOOTensor>* out) {
+  if (!PyArray_Check(data_ao) || !PyArray_Check(coords_ao)) {
+    return Status::TypeError("Did not pass ndarray object");
+  }
+
+  PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
+  std::shared_ptr<DataType> type_data;
+  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
+                              &type_data));
+
+  std::shared_ptr<Tensor> coords;
+  RETURN_NOT_OK(NdarrayToTensor(pool, coords_ao, {}, &coords));
+  ARROW_CHECK_EQ(coords->type_id(), Type::INT64);  // Should be ensured by caller
+
+  ARROW_ASSIGN_OR_RAISE(std::shared_ptr<SparseCOOIndex> sparse_index,
+                        SparseCOOIndex::Make(coords));
+  *out = std::make_shared<SparseTensorImpl<SparseCOOIndex>>(sparse_index, type_data, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+template <class IndexType>
+Status NdarraysToSparseCSXMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
+                                 PyObject* indices_ao, const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseTensorImpl<IndexType>>* out) {
+  if (!PyArray_Check(data_ao) || !PyArray_Check(indptr_ao) ||
+      !PyArray_Check(indices_ao)) {
+    return Status::TypeError("Did not pass ndarray object");
+  }
+
+  PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
+  std::shared_ptr<DataType> type_data;
+  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
+                              &type_data));
+
+  std::shared_ptr<Tensor> indptr, indices;
+  RETURN_NOT_OK(NdarrayToTensor(pool, indptr_ao, {}, &indptr));
+  RETURN_NOT_OK(NdarrayToTensor(pool, indices_ao, {}, &indices));
+  ARROW_CHECK_EQ(indptr->type_id(), Type::INT64);   // Should be ensured by caller
+  ARROW_CHECK_EQ(indices->type_id(), Type::INT64);  // Should be ensured by caller
+
+  auto sparse_index = std::make_shared<IndexType>(
+      std::static_pointer_cast<NumericTensor<Int64Type>>(indptr),
+      std::static_pointer_cast<NumericTensor<Int64Type>>(indices));
+  *out = std::make_shared<SparseTensorImpl<IndexType>>(sparse_index, type_data, data,
+                                                       shape, dim_names);
+  return Status::OK();
+}
+
+Status NdarraysToSparseCSFTensor(MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
+                                 PyObject* indices_ao, const std::vector<int64_t>& shape,
+                                 const std::vector<int64_t>& axis_order,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseCSFTensor>* out) {
+  if (!PyArray_Check(data_ao)) {
+    return Status::TypeError("Did not pass ndarray object for data");
+  }
+  const int ndim = static_cast<const int>(shape.size());
+  PyArrayObject* ndarray_data = reinterpret_cast<PyArrayObject*>(data_ao);
+  std::shared_ptr<Buffer> data = std::make_shared<NumPyBuffer>(data_ao);
+  std::shared_ptr<DataType> type_data;
+  RETURN_NOT_OK(GetTensorType(reinterpret_cast<PyObject*>(PyArray_DESCR(ndarray_data)),
+                              &type_data));
+
+  std::vector<std::shared_ptr<Tensor>> indptr(ndim - 1);
+  std::vector<std::shared_ptr<Tensor>> indices(ndim);
+
+  for (int i = 0; i < ndim - 1; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(indptr_ao, i);
+    if (!PyArray_Check(item)) {
+      return Status::TypeError("Did not pass ndarray object for indptr");
+    }
+    RETURN_NOT_OK(NdarrayToTensor(pool, item, {}, &indptr[i]));
+    ARROW_CHECK_EQ(indptr[i]->type_id(), Type::INT64);  // Should be ensured by caller
+  }
+
+  for (int i = 0; i < ndim; ++i) {
+    PyObject* item = PySequence_Fast_GET_ITEM(indices_ao, i);
+    if (!PyArray_Check(item)) {
+      return Status::TypeError("Did not pass ndarray object for indices");
+    }
+    RETURN_NOT_OK(NdarrayToTensor(pool, item, {}, &indices[i]));
+    ARROW_CHECK_EQ(indices[i]->type_id(), Type::INT64);  // Should be ensured by caller
+  }
+
+  auto sparse_index = std::make_shared<SparseCSFIndex>(indptr, indices, axis_order);
+  *out = std::make_shared<SparseTensorImpl<SparseCSFIndex>>(sparse_index, type_data, data,
+                                                            shape, dim_names);
+  return Status::OK();
+}
+
+Status NdarraysToSparseCSRMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
+                                 PyObject* indices_ao, const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseCSRMatrix>* out) {
+  return NdarraysToSparseCSXMatrix<SparseCSRIndex>(pool, data_ao, indptr_ao, indices_ao,
+                                                   shape, dim_names, out);
+}
+
+Status NdarraysToSparseCSCMatrix(MemoryPool* pool, PyObject* data_ao, PyObject* indptr_ao,
+                                 PyObject* indices_ao, const std::vector<int64_t>& shape,
+                                 const std::vector<std::string>& dim_names,
+                                 std::shared_ptr<SparseCSCMatrix>* out) {
+  return NdarraysToSparseCSXMatrix<SparseCSCIndex>(pool, data_ao, indptr_ao, indices_ao,
+                                                   shape, dim_names, out);
+}
+
+Status TensorToSparseCOOTensor(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseCOOTensor>* out) {
+  return SparseCOOTensor::Make(*tensor).Value(out);
+}
+
+Status TensorToSparseCSRMatrix(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseCSRMatrix>* out) {
+  return SparseCSRMatrix::Make(*tensor).Value(out);
+}
+
+Status TensorToSparseCSCMatrix(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseCSCMatrix>* out) {
+  return SparseCSCMatrix::Make(*tensor).Value(out);
+}
+
+Status TensorToSparseCSFTensor(const std::shared_ptr<Tensor>& tensor,
+                               std::shared_ptr<SparseCSFTensor>* out) {
+  return SparseCSFTensor::Make(*tensor).Value(out);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.h b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.h
index 69a7dd3..1045107 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_convert.h
@@ -27,8 +27,8 @@
 #include <vector>
 
 #include "arrow/buffer.h"
-#include "arrow/sparse_tensor.h"
 #include "arrow/python/visibility.h"
+#include "arrow/sparse_tensor.h"
 
 namespace arrow {
 
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_internal.h b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_internal.h
new file mode 100644
index 0000000..b9b632f
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_internal.h
@@ -0,0 +1,182 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Internal utilities for dealing with NumPy
+
+#pragma once
+
+#include "arrow/python/numpy_interop.h"
+
+#include "arrow/status.h"
+
+#include "arrow/python/platform.h"
+
+#include <cstdint>
+#include <sstream>
+#include <string>
+
+namespace arrow {
+namespace py {
+
+/// Indexing convenience for interacting with strided 1-dim ndarray objects
+template <typename T>
+class Ndarray1DIndexer {
+ public:
+  typedef int64_t size_type;
+
+  Ndarray1DIndexer() : arr_(NULLPTR), data_(NULLPTR) {}
+
+  explicit Ndarray1DIndexer(PyArrayObject* arr) : Ndarray1DIndexer() {
+    arr_ = arr;
+    DCHECK_EQ(1, PyArray_NDIM(arr)) << "Only works with 1-dimensional arrays";
+    data_ = reinterpret_cast<uint8_t*>(PyArray_DATA(arr));
+    stride_ = PyArray_STRIDES(arr)[0];
+  }
+
+  ~Ndarray1DIndexer() = default;
+
+  int64_t size() const { return PyArray_SIZE(arr_); }
+
+  const T* data() const { return reinterpret_cast<const T*>(data_); }
+
+  bool is_strided() const { return stride_ != sizeof(T); }
+
+  T& operator[](size_type index) {
+    return *reinterpret_cast<T*>(data_ + index * stride_);
+  }
+  const T& operator[](size_type index) const {
+    return *reinterpret_cast<const T*>(data_ + index * stride_);
+  }
+
+ private:
+  PyArrayObject* arr_;
+  uint8_t* data_;
+  int64_t stride_;
+};
+
+// Handling of Numpy Types by their static numbers
+// (the NPY_TYPES enum and related defines)
+
+static inline std::string GetNumPyTypeName(int npy_type) {
+#define TYPE_CASE(TYPE, NAME) \
+  case NPY_##TYPE:            \
+    return NAME;
+
+  switch (npy_type) {
+    TYPE_CASE(BOOL, "bool")
+    TYPE_CASE(INT8, "int8")
+    TYPE_CASE(INT16, "int16")
+    TYPE_CASE(INT32, "int32")
+    TYPE_CASE(INT64, "int64")
+#if !NPY_INT32_IS_INT
+    TYPE_CASE(INT, "intc")
+#endif
+#if !NPY_INT64_IS_LONG_LONG
+    TYPE_CASE(LONGLONG, "longlong")
+#endif
+    TYPE_CASE(UINT8, "uint8")
+    TYPE_CASE(UINT16, "uint16")
+    TYPE_CASE(UINT32, "uint32")
+    TYPE_CASE(UINT64, "uint64")
+#if !NPY_INT32_IS_INT
+    TYPE_CASE(UINT, "uintc")
+#endif
+#if !NPY_INT64_IS_LONG_LONG
+    TYPE_CASE(ULONGLONG, "ulonglong")
+#endif
+    TYPE_CASE(FLOAT16, "float16")
+    TYPE_CASE(FLOAT32, "float32")
+    TYPE_CASE(FLOAT64, "float64")
+    TYPE_CASE(DATETIME, "datetime64")
+    TYPE_CASE(TIMEDELTA, "timedelta64")
+    TYPE_CASE(OBJECT, "object")
+    TYPE_CASE(VOID, "void")
+    default:
+      break;
+  }
+
+#undef TYPE_CASE
+  std::stringstream ss;
+  ss << "unrecognized type (" << npy_type << ") in GetNumPyTypeName";
+  return ss.str();
+}
+
+#define TYPE_VISIT_INLINE(TYPE) \
+  case NPY_##TYPE:              \
+    return visitor->template Visit<NPY_##TYPE>(arr);
+
+template <typename VISITOR>
+inline Status VisitNumpyArrayInline(PyArrayObject* arr, VISITOR* visitor) {
+  switch (PyArray_TYPE(arr)) {
+    TYPE_VISIT_INLINE(BOOL);
+    TYPE_VISIT_INLINE(INT8);
+    TYPE_VISIT_INLINE(UINT8);
+    TYPE_VISIT_INLINE(INT16);
+    TYPE_VISIT_INLINE(UINT16);
+    TYPE_VISIT_INLINE(INT32);
+    TYPE_VISIT_INLINE(UINT32);
+    TYPE_VISIT_INLINE(INT64);
+    TYPE_VISIT_INLINE(UINT64);
+#if !NPY_INT32_IS_INT
+    TYPE_VISIT_INLINE(INT);
+    TYPE_VISIT_INLINE(UINT);
+#endif
+#if !NPY_INT64_IS_LONG_LONG
+    TYPE_VISIT_INLINE(LONGLONG);
+    TYPE_VISIT_INLINE(ULONGLONG);
+#endif
+    TYPE_VISIT_INLINE(FLOAT16);
+    TYPE_VISIT_INLINE(FLOAT32);
+    TYPE_VISIT_INLINE(FLOAT64);
+    TYPE_VISIT_INLINE(DATETIME);
+    TYPE_VISIT_INLINE(TIMEDELTA);
+    TYPE_VISIT_INLINE(OBJECT);
+  }
+  return Status::NotImplemented("NumPy type not implemented: ",
+                                GetNumPyTypeName(PyArray_TYPE(arr)));
+}
+
+#undef TYPE_VISIT_INLINE
+
+namespace internal {
+
+inline bool PyFloatScalar_Check(PyObject* obj) {
+  return PyFloat_Check(obj) || PyArray_IsScalar(obj, Floating);
+}
+
+inline bool PyIntScalar_Check(PyObject* obj) {
+  return PyLong_Check(obj) || PyArray_IsScalar(obj, Integer);
+}
+
+inline bool PyBoolScalar_Check(PyObject* obj) {
+  return PyBool_Check(obj) || PyArray_IsScalar(obj, Bool);
+}
+
+static inline PyArray_Descr* GetSafeNumPyDtype(int type) {
+  if (type == NPY_DATETIME || type == NPY_TIMEDELTA) {
+    // It is not safe to mutate the result of DescrFromType for datetime and
+    // timedelta descriptors
+    return PyArray_DescrNewFromType(type);
+  } else {
+    return PyArray_DescrFromType(type);
+  }
+}
+
+}  // namespace internal
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_to_arrow.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_to_arrow.cc
new file mode 100644
index 0000000..2727ce3
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/numpy_to_arrow.cc
@@ -0,0 +1,870 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Functions for pandas conversion via NumPy
+
+#include "arrow/python/numpy_to_arrow.h"
+#include "arrow/python/numpy_interop.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/status.h"
+#include "arrow/table.h"
+#include "arrow/type_fwd.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/bit_util.h"
+#include "arrow/util/bitmap_generate.h"
+#include "arrow/util/bitmap_ops.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/endian.h"
+#include "arrow/util/logging.h"
+#include "arrow/util/macros.h"
+#include "arrow/util/string.h"
+#include "arrow/util/utf8.h"
+#include "arrow/visit_type_inline.h"
+
+#include "arrow/compute/api_scalar.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/datetime.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/iterators.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_internal.h"
+#include "arrow/python/python_to_arrow.h"
+#include "arrow/python/type_traits.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::CopyBitmap;
+using internal::GenerateBitsUnrolled;
+
+namespace py {
+
+using internal::NumPyTypeSize;
+
+// ----------------------------------------------------------------------
+// Conversion utilities
+
+namespace {
+
+Status AllocateNullBitmap(MemoryPool* pool, int64_t length,
+                          std::shared_ptr<ResizableBuffer>* out) {
+  int64_t null_bytes = bit_util::BytesForBits(length);
+  ARROW_ASSIGN_OR_RAISE(auto null_bitmap, AllocateResizableBuffer(null_bytes, pool));
+
+  // Padding zeroed by AllocateResizableBuffer
+  memset(null_bitmap->mutable_data(), 0, static_cast<size_t>(null_bytes));
+  *out = std::move(null_bitmap);
+  return Status::OK();
+}
+
+// ----------------------------------------------------------------------
+// Conversion from NumPy-in-Pandas to Arrow null bitmap
+
+template <int TYPE>
+inline int64_t ValuesToBitmap(PyArrayObject* arr, uint8_t* bitmap) {
+  typedef internal::npy_traits<TYPE> traits;
+  typedef typename traits::value_type T;
+
+  int64_t null_count = 0;
+
+  Ndarray1DIndexer<T> values(arr);
+  for (int i = 0; i < values.size(); ++i) {
+    if (traits::isnull(values[i])) {
+      ++null_count;
+    } else {
+      bit_util::SetBit(bitmap, i);
+    }
+  }
+
+  return null_count;
+}
+
+class NumPyNullsConverter {
+ public:
+  /// Convert the given array's null values to a null bitmap.
+  /// The null bitmap is only allocated if null values are ever possible.
+  static Status Convert(MemoryPool* pool, PyArrayObject* arr, bool from_pandas,
+                        std::shared_ptr<ResizableBuffer>* out_null_bitmap_,
+                        int64_t* out_null_count) {
+    NumPyNullsConverter converter(pool, arr, from_pandas);
+    RETURN_NOT_OK(VisitNumpyArrayInline(arr, &converter));
+    *out_null_bitmap_ = converter.null_bitmap_;
+    *out_null_count = converter.null_count_;
+    return Status::OK();
+  }
+
+  template <int TYPE>
+  Status Visit(PyArrayObject* arr) {
+    typedef internal::npy_traits<TYPE> traits;
+
+    const bool null_sentinels_possible =
+        // Always treat Numpy's NaT as null
+        TYPE == NPY_DATETIME || TYPE == NPY_TIMEDELTA ||
+        // Observing pandas's null sentinels
+        (from_pandas_ && traits::supports_nulls);
+
+    if (null_sentinels_possible) {
+      RETURN_NOT_OK(AllocateNullBitmap(pool_, PyArray_SIZE(arr), &null_bitmap_));
+      null_count_ = ValuesToBitmap<TYPE>(arr, null_bitmap_->mutable_data());
+    }
+    return Status::OK();
+  }
+
+ protected:
+  NumPyNullsConverter(MemoryPool* pool, PyArrayObject* arr, bool from_pandas)
+      : pool_(pool),
+        arr_(arr),
+        from_pandas_(from_pandas),
+        null_bitmap_data_(nullptr),
+        null_count_(0) {}
+
+  MemoryPool* pool_;
+  PyArrayObject* arr_;
+  bool from_pandas_;
+  std::shared_ptr<ResizableBuffer> null_bitmap_;
+  uint8_t* null_bitmap_data_;
+  int64_t null_count_;
+};
+
+// Returns null count
+int64_t MaskToBitmap(PyArrayObject* mask, int64_t length, uint8_t* bitmap) {
+  int64_t null_count = 0;
+
+  if (!PyArray_Check(mask)) return -1;
+
+  Ndarray1DIndexer<uint8_t> mask_values(mask);
+  for (int i = 0; i < length; ++i) {
+    if (mask_values[i]) {
+      ++null_count;
+      bit_util::ClearBit(bitmap, i);
+    } else {
+      bit_util::SetBit(bitmap, i);
+    }
+  }
+  return null_count;
+}
+
+}  // namespace
+
+// ----------------------------------------------------------------------
+// Conversion from NumPy arrays (possibly originating from pandas) to Arrow
+// format. Does not handle NPY_OBJECT dtype arrays; use ConvertPySequence for
+// that
+
+class NumPyConverter {
+ public:
+  NumPyConverter(MemoryPool* pool, PyObject* arr, PyObject* mo,
+                 const std::shared_ptr<DataType>& type, bool from_pandas,
+                 const compute::CastOptions& cast_options = compute::CastOptions())
+      : pool_(pool),
+        type_(type),
+        arr_(reinterpret_cast<PyArrayObject*>(arr)),
+        dtype_(PyArray_DESCR(arr_)),
+        mask_(nullptr),
+        from_pandas_(from_pandas),
+        cast_options_(cast_options),
+        null_bitmap_data_(nullptr),
+        null_count_(0) {
+    if (mo != nullptr && mo != Py_None) {
+      mask_ = reinterpret_cast<PyArrayObject*>(mo);
+    }
+    length_ = static_cast<int64_t>(PyArray_SIZE(arr_));
+    itemsize_ = static_cast<int>(PyArray_DESCR(arr_)->elsize);
+    stride_ = static_cast<int64_t>(PyArray_STRIDES(arr_)[0]);
+  }
+
+  bool is_strided() const { return itemsize_ != stride_; }
+
+  Status Convert();
+
+  const ArrayVector& result() const { return out_arrays_; }
+
+  template <typename T>
+  enable_if_primitive_ctype<T, Status> Visit(const T& type) {
+    return VisitNative<T>();
+  }
+
+  Status Visit(const HalfFloatType& type) { return VisitNative<UInt16Type>(); }
+
+  Status Visit(const Date32Type& type) { return VisitNative<Date32Type>(); }
+  Status Visit(const Date64Type& type) { return VisitNative<Date64Type>(); }
+  Status Visit(const TimestampType& type) { return VisitNative<TimestampType>(); }
+  Status Visit(const Time32Type& type) { return VisitNative<Int32Type>(); }
+  Status Visit(const Time64Type& type) { return VisitNative<Int64Type>(); }
+  Status Visit(const DurationType& type) { return VisitNative<DurationType>(); }
+
+  Status Visit(const NullType& type) { return TypeNotImplemented(type.ToString()); }
+
+  // NumPy ascii string arrays
+  Status Visit(const BinaryType& type);
+
+  // NumPy unicode arrays
+  Status Visit(const StringType& type);
+
+  Status Visit(const StructType& type);
+
+  Status Visit(const FixedSizeBinaryType& type);
+
+  // Default case
+  Status Visit(const DataType& type) { return TypeNotImplemented(type.ToString()); }
+
+ protected:
+  Status InitNullBitmap() {
+    RETURN_NOT_OK(AllocateNullBitmap(pool_, length_, &null_bitmap_));
+    null_bitmap_data_ = null_bitmap_->mutable_data();
+    return Status::OK();
+  }
+
+  // Called before ConvertData to ensure Numpy input buffer is in expected
+  // Arrow layout
+  template <typename ArrowType>
+  Status PrepareInputData(std::shared_ptr<Buffer>* data);
+
+  // ----------------------------------------------------------------------
+  // Traditional visitor conversion for non-object arrays
+
+  template <typename ArrowType>
+  Status ConvertData(std::shared_ptr<Buffer>* data);
+
+  template <typename T>
+  Status PushBuilderResult(T* builder) {
+    std::shared_ptr<Array> out;
+    RETURN_NOT_OK(builder->Finish(&out));
+    out_arrays_.emplace_back(out);
+    return Status::OK();
+  }
+
+  Status PushArray(const std::shared_ptr<ArrayData>& data) {
+    out_arrays_.emplace_back(MakeArray(data));
+    return Status::OK();
+  }
+
+  template <typename ArrowType>
+  Status VisitNative() {
+    if (mask_ != nullptr) {
+      RETURN_NOT_OK(InitNullBitmap());
+      null_count_ = MaskToBitmap(mask_, length_, null_bitmap_data_);
+      if (null_count_ == -1) return Status::Invalid("Invalid mask type");
+    } else {
+      RETURN_NOT_OK(NumPyNullsConverter::Convert(pool_, arr_, from_pandas_, &null_bitmap_,
+                                                 &null_count_));
+    }
+
+    std::shared_ptr<Buffer> data;
+    RETURN_NOT_OK(ConvertData<ArrowType>(&data));
+
+    auto arr_data = ArrayData::Make(type_, length_, {null_bitmap_, data}, null_count_, 0);
+    return PushArray(arr_data);
+  }
+
+  Status TypeNotImplemented(std::string type_name) {
+    return Status::NotImplemented("NumPyConverter doesn't implement <", type_name,
+                                  "> conversion. ");
+  }
+
+  MemoryPool* pool_;
+  std::shared_ptr<DataType> type_;
+  PyArrayObject* arr_;
+  PyArray_Descr* dtype_;
+  PyArrayObject* mask_;
+  int64_t length_;
+  int64_t stride_;
+  int itemsize_;
+
+  bool from_pandas_;
+  compute::CastOptions cast_options_;
+
+  // Used in visitor pattern
+  ArrayVector out_arrays_;
+
+  std::shared_ptr<ResizableBuffer> null_bitmap_;
+  uint8_t* null_bitmap_data_;
+  int64_t null_count_;
+};
+
+Status NumPyConverter::Convert() {
+  if (PyArray_NDIM(arr_) != 1) {
+    return Status::Invalid("only handle 1-dimensional arrays");
+  }
+
+  if (dtype_->type_num == NPY_OBJECT) {
+    // If an object array, convert it like a normal Python sequence
+    PyConversionOptions py_options;
+    py_options.type = type_;
+    py_options.from_pandas = from_pandas_;
+    ARROW_ASSIGN_OR_RAISE(
+        auto chunked_array,
+        ConvertPySequence(reinterpret_cast<PyObject*>(arr_),
+                          reinterpret_cast<PyObject*>(mask_), py_options, pool_));
+    out_arrays_ = chunked_array->chunks();
+    return Status::OK();
+  }
+
+  if (type_ == nullptr) {
+    return Status::Invalid("Must pass data type for non-object arrays");
+  }
+
+  // Visit the type to perform conversion
+  return VisitTypeInline(*type_, this);
+}
+
+namespace {
+
+Status CastBuffer(const std::shared_ptr<DataType>& in_type,
+                  const std::shared_ptr<Buffer>& input, const int64_t length,
+                  const std::shared_ptr<Buffer>& valid_bitmap, const int64_t null_count,
+                  const std::shared_ptr<DataType>& out_type,
+                  const compute::CastOptions& cast_options, MemoryPool* pool,
+                  std::shared_ptr<Buffer>* out) {
+  // Must cast
+  auto tmp_data = ArrayData::Make(in_type, length, {valid_bitmap, input}, null_count);
+  compute::ExecContext context(pool);
+  ARROW_ASSIGN_OR_RAISE(
+      std::shared_ptr<Array> casted_array,
+      compute::Cast(*MakeArray(tmp_data), out_type, cast_options, &context));
+  *out = casted_array->data()->buffers[1];
+  return Status::OK();
+}
+
+template <typename FromType, typename ToType>
+Status StaticCastBuffer(const Buffer& input, const int64_t length, MemoryPool* pool,
+                        std::shared_ptr<Buffer>* out) {
+  ARROW_ASSIGN_OR_RAISE(auto result, AllocateBuffer(sizeof(ToType) * length, pool));
+
+  auto in_values = reinterpret_cast<const FromType*>(input.data());
+  auto out_values = reinterpret_cast<ToType*>(result->mutable_data());
+  for (int64_t i = 0; i < length; ++i) {
+    *out_values++ = static_cast<ToType>(*in_values++);
+  }
+  *out = std::move(result);
+  return Status::OK();
+}
+
+template <typename T>
+void CopyStridedBytewise(int8_t* input_data, int64_t length, int64_t stride,
+                         T* output_data) {
+  // Passing input_data as non-const is a concession to PyObject*
+  for (int64_t i = 0; i < length; ++i) {
+    memcpy(output_data + i, input_data, sizeof(T));
+    input_data += stride;
+  }
+}
+
+template <typename T>
+void CopyStridedNatural(T* input_data, int64_t length, int64_t stride, T* output_data) {
+  // Passing input_data as non-const is a concession to PyObject*
+  int64_t j = 0;
+  for (int64_t i = 0; i < length; ++i) {
+    output_data[i] = input_data[j];
+    j += stride;
+  }
+}
+
+class NumPyStridedConverter {
+ public:
+  static Status Convert(PyArrayObject* arr, int64_t length, MemoryPool* pool,
+                        std::shared_ptr<Buffer>* out) {
+    NumPyStridedConverter converter(arr, length, pool);
+    RETURN_NOT_OK(VisitNumpyArrayInline(arr, &converter));
+    *out = converter.buffer_;
+    return Status::OK();
+  }
+  template <int TYPE>
+  Status Visit(PyArrayObject* arr) {
+    using traits = internal::npy_traits<TYPE>;
+    using T = typename traits::value_type;
+
+    ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(sizeof(T) * length_, pool_));
+
+    const int64_t stride = PyArray_STRIDES(arr)[0];
+    // ARROW-16013: convert sizeof(T) to signed int64 first, otherwise dividing by it
+    // would do an unsigned division. This cannot be caught by tests without ubsan, since
+    // common signed overflow behavior and the fact that the sizeof(T) is currently always
+    // a power of two here cause CopyStridedNatural to still produce correct results
+    const int64_t element_size = sizeof(T);
+    if (stride % element_size == 0) {
+      const int64_t stride_elements = stride / element_size;
+      CopyStridedNatural(reinterpret_cast<T*>(PyArray_DATA(arr)), length_,
+                         stride_elements, reinterpret_cast<T*>(buffer_->mutable_data()));
+    } else {
+      CopyStridedBytewise(reinterpret_cast<int8_t*>(PyArray_DATA(arr)), length_, stride,
+                          reinterpret_cast<T*>(buffer_->mutable_data()));
+    }
+    return Status::OK();
+  }
+
+ protected:
+  NumPyStridedConverter(PyArrayObject* arr, int64_t length, MemoryPool* pool)
+      : arr_(arr), length_(length), pool_(pool), buffer_(nullptr) {}
+  PyArrayObject* arr_;
+  int64_t length_;
+  MemoryPool* pool_;
+  std::shared_ptr<Buffer> buffer_;
+};
+
+}  // namespace
+
+template <typename ArrowType>
+inline Status NumPyConverter::PrepareInputData(std::shared_ptr<Buffer>* data) {
+  if (PyArray_ISBYTESWAPPED(arr_)) {
+    // TODO
+    return Status::NotImplemented("Byte-swapped arrays not supported");
+  }
+
+  if (dtype_->type_num == NPY_BOOL) {
+    int64_t nbytes = bit_util::BytesForBits(length_);
+    ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateBuffer(nbytes, pool_));
+
+    Ndarray1DIndexer<uint8_t> values(arr_);
+    int64_t i = 0;
+    const auto generate = [&values, &i]() -> bool { return values[i++] > 0; };
+    GenerateBitsUnrolled(buffer->mutable_data(), 0, length_, generate);
+
+    *data = std::move(buffer);
+  } else if (is_strided()) {
+    RETURN_NOT_OK(NumPyStridedConverter::Convert(arr_, length_, pool_, data));
+  } else {
+    // Can zero-copy
+    *data = std::make_shared<NumPyBuffer>(reinterpret_cast<PyObject*>(arr_));
+  }
+
+  return Status::OK();
+}
+
+template <typename ArrowType>
+inline Status NumPyConverter::ConvertData(std::shared_ptr<Buffer>* data) {
+  RETURN_NOT_OK(PrepareInputData<ArrowType>(data));
+
+  std::shared_ptr<DataType> input_type;
+  RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+
+  if (!input_type->Equals(*type_)) {
+    RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_, type_,
+                             cast_options_, pool_, data));
+  }
+
+  return Status::OK();
+}
+
+template <>
+inline Status NumPyConverter::ConvertData<Date32Type>(std::shared_ptr<Buffer>* data) {
+  std::shared_ptr<DataType> input_type;
+
+  RETURN_NOT_OK(PrepareInputData<Date32Type>(data));
+
+  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  if (dtype_->type_num == NPY_DATETIME) {
+    // If we have inbound datetime64[D] data, this needs to be downcasted
+    // separately here from int64_t to int32_t, because this data is not
+    // supported in compute::Cast
+    if (date_dtype->meta.base == NPY_FR_D) {
+      // TODO(wesm): How pedantic do we really want to be about checking for int32
+      // overflow here?
+      Status s = StaticCastBuffer<int64_t, int32_t>(**data, length_, pool_, data);
+      RETURN_NOT_OK(s);
+    } else {
+      RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+      if (!input_type->Equals(*type_)) {
+        // The null bitmap was already computed in VisitNative()
+        RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                                 type_, cast_options_, pool_, data));
+      }
+    }
+  } else {
+    RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+    if (!input_type->Equals(*type_)) {
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
+    }
+  }
+
+  return Status::OK();
+}
+
+template <>
+inline Status NumPyConverter::ConvertData<Date64Type>(std::shared_ptr<Buffer>* data) {
+  constexpr int64_t kMillisecondsInDay = 86400000;
+  std::shared_ptr<DataType> input_type;
+
+  RETURN_NOT_OK(PrepareInputData<Date64Type>(data));
+
+  auto date_dtype = reinterpret_cast<PyArray_DatetimeDTypeMetaData*>(dtype_->c_metadata);
+  if (dtype_->type_num == NPY_DATETIME) {
+    // If we have inbound datetime64[D] data, this needs to be downcasted
+    // separately here from int64_t to int32_t, because this data is not
+    // supported in compute::Cast
+    if (date_dtype->meta.base == NPY_FR_D) {
+      ARROW_ASSIGN_OR_RAISE(auto result,
+                            AllocateBuffer(sizeof(int64_t) * length_, pool_));
+
+      auto in_values = reinterpret_cast<const int64_t*>((*data)->data());
+      auto out_values = reinterpret_cast<int64_t*>(result->mutable_data());
+      for (int64_t i = 0; i < length_; ++i) {
+        *out_values++ = kMillisecondsInDay * (*in_values++);
+      }
+      *data = std::move(result);
+    } else {
+      RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+      if (!input_type->Equals(*type_)) {
+        // The null bitmap was already computed in VisitNative()
+        RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                                 type_, cast_options_, pool_, data));
+      }
+    }
+  } else {
+    RETURN_NOT_OK(NumPyDtypeToArrow(reinterpret_cast<PyObject*>(dtype_), &input_type));
+    if (!input_type->Equals(*type_)) {
+      RETURN_NOT_OK(CastBuffer(input_type, *data, length_, null_bitmap_, null_count_,
+                               type_, cast_options_, pool_, data));
+    }
+  }
+
+  return Status::OK();
+}
+
+// Create 16MB chunks for binary data
+constexpr int32_t kBinaryChunksize = 1 << 24;
+
+Status NumPyConverter::Visit(const BinaryType& type) {
+  ::arrow::internal::ChunkedBinaryBuilder builder(kBinaryChunksize, pool_);
+
+  auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
+
+  auto AppendNotNull = [&builder, this](const uint8_t* data) {
+    // This is annoying. NumPy allows strings to have nul-terminators, so
+    // we must check for them here
+    const size_t item_size =
+        strnlen(reinterpret_cast<const char*>(data), static_cast<size_t>(itemsize_));
+    return builder.Append(data, static_cast<int32_t>(item_size));
+  };
+
+  if (mask_ != nullptr) {
+    Ndarray1DIndexer<uint8_t> mask_values(mask_);
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        RETURN_NOT_OK(AppendNotNull(data));
+      }
+      data += stride_;
+    }
+  } else {
+    for (int64_t i = 0; i < length_; ++i) {
+      RETURN_NOT_OK(AppendNotNull(data));
+      data += stride_;
+    }
+  }
+
+  ArrayVector result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  for (auto arr : result) {
+    RETURN_NOT_OK(PushArray(arr->data()));
+  }
+  return Status::OK();
+}
+
+Status NumPyConverter::Visit(const FixedSizeBinaryType& type) {
+  auto byte_width = type.byte_width();
+
+  if (itemsize_ != byte_width) {
+    return Status::Invalid("Got bytestring of length ", itemsize_, " (expected ",
+                           byte_width, ")");
+  }
+
+  FixedSizeBinaryBuilder builder(::arrow::fixed_size_binary(byte_width), pool_);
+  auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
+
+  if (mask_ != nullptr) {
+    Ndarray1DIndexer<uint8_t> mask_values(mask_);
+    RETURN_NOT_OK(builder.Reserve(length_));
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        RETURN_NOT_OK(builder.Append(data));
+      }
+      data += stride_;
+    }
+  } else {
+    for (int64_t i = 0; i < length_; ++i) {
+      RETURN_NOT_OK(builder.Append(data));
+      data += stride_;
+    }
+  }
+
+  std::shared_ptr<Array> result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  return PushArray(result->data());
+}
+
+namespace {
+
+// NumPy unicode is UCS4/UTF32 always
+constexpr int kNumPyUnicodeSize = 4;
+
+Status AppendUTF32(const char* data, int itemsize, int byteorder,
+                   ::arrow::internal::ChunkedStringBuilder* builder) {
+  // The binary \x00\x00\x00\x00 indicates a nul terminator in NumPy unicode,
+  // so we need to detect that here to truncate if necessary. Yep.
+  int actual_length = 0;
+  for (; actual_length < itemsize / kNumPyUnicodeSize; ++actual_length) {
+    const char* code_point = data + actual_length * kNumPyUnicodeSize;
+    if ((*code_point == '\0') && (*(code_point + 1) == '\0') &&
+        (*(code_point + 2) == '\0') && (*(code_point + 3) == '\0')) {
+      break;
+    }
+  }
+
+  OwnedRef unicode_obj(PyUnicode_DecodeUTF32(data, actual_length * kNumPyUnicodeSize,
+                                             nullptr, &byteorder));
+  RETURN_IF_PYERROR();
+  OwnedRef utf8_obj(PyUnicode_AsUTF8String(unicode_obj.obj()));
+  if (utf8_obj.obj() == NULL) {
+    PyErr_Clear();
+    return Status::Invalid("failed converting UTF32 to UTF8");
+  }
+
+  const int32_t length = static_cast<int32_t>(PyBytes_GET_SIZE(utf8_obj.obj()));
+  return builder->Append(
+      reinterpret_cast<const uint8_t*>(PyBytes_AS_STRING(utf8_obj.obj())), length);
+}
+
+}  // namespace
+
+Status NumPyConverter::Visit(const StringType& type) {
+  util::InitializeUTF8();
+
+  ::arrow::internal::ChunkedStringBuilder builder(kBinaryChunksize, pool_);
+
+  auto data = reinterpret_cast<const uint8_t*>(PyArray_DATA(arr_));
+
+  char numpy_byteorder = dtype_->byteorder;
+
+  // For Python C API, -1 is little-endian, 1 is big-endian
+#if ARROW_LITTLE_ENDIAN
+  // Yield little-endian from both '|' (native) and '<'
+  int byteorder = numpy_byteorder == '>' ? 1 : -1;
+#else
+  // Yield big-endian from both '|' (native) and '>'
+  int byteorder = numpy_byteorder == '<' ? -1 : 1;
+#endif
+
+  PyAcquireGIL gil_lock;
+
+  const bool is_binary_type = dtype_->type_num == NPY_STRING;
+  const bool is_unicode_type = dtype_->type_num == NPY_UNICODE;
+
+  if (!is_binary_type && !is_unicode_type) {
+    const bool is_float_type = dtype_->kind == 'f';
+    if (from_pandas_ && is_float_type) {
+      // in case of from_pandas=True, accept an all-NaN float array as input
+      RETURN_NOT_OK(NumPyNullsConverter::Convert(pool_, arr_, from_pandas_, &null_bitmap_,
+                                                 &null_count_));
+      if (null_count_ == length_) {
+        auto arr = std::make_shared<NullArray>(length_);
+        compute::ExecContext context(pool_);
+        ARROW_ASSIGN_OR_RAISE(
+            std::shared_ptr<Array> out,
+            compute::Cast(*arr, arrow::utf8(), cast_options_, &context));
+        out_arrays_.emplace_back(out);
+        return Status::OK();
+      }
+    }
+    std::string dtype_string;
+    RETURN_NOT_OK(internal::PyObject_StdStringStr(reinterpret_cast<PyObject*>(dtype_),
+                                                  &dtype_string));
+    return Status::TypeError("Expected a string or bytes dtype, got ", dtype_string);
+  }
+
+  auto AppendNonNullValue = [&](const uint8_t* data) {
+    if (is_binary_type) {
+      if (ARROW_PREDICT_TRUE(util::ValidateUTF8(data, itemsize_))) {
+        return builder.Append(data, itemsize_);
+      } else {
+        return Status::Invalid("Encountered non-UTF8 binary value: ",
+                               HexEncode(data, itemsize_));
+      }
+    } else {
+      // is_unicode_type case
+      return AppendUTF32(reinterpret_cast<const char*>(data), itemsize_, byteorder,
+                         &builder);
+    }
+  };
+
+  if (mask_ != nullptr) {
+    Ndarray1DIndexer<uint8_t> mask_values(mask_);
+    for (int64_t i = 0; i < length_; ++i) {
+      if (mask_values[i]) {
+        RETURN_NOT_OK(builder.AppendNull());
+      } else {
+        RETURN_NOT_OK(AppendNonNullValue(data));
+      }
+      data += stride_;
+    }
+  } else {
+    for (int64_t i = 0; i < length_; ++i) {
+      RETURN_NOT_OK(AppendNonNullValue(data));
+      data += stride_;
+    }
+  }
+
+  ArrayVector result;
+  RETURN_NOT_OK(builder.Finish(&result));
+  for (auto arr : result) {
+    RETURN_NOT_OK(PushArray(arr->data()));
+  }
+  return Status::OK();
+}
+
+Status NumPyConverter::Visit(const StructType& type) {
+  std::vector<NumPyConverter> sub_converters;
+  std::vector<OwnedRefNoGIL> sub_arrays;
+
+  {
+    PyAcquireGIL gil_lock;
+
+    // Create converters for each struct type field
+    if (dtype_->fields == NULL || !PyDict_Check(dtype_->fields)) {
+      return Status::TypeError("Expected struct array");
+    }
+
+    for (auto field : type.fields()) {
+      PyObject* tup = PyDict_GetItemString(dtype_->fields, field->name().c_str());
+      if (tup == NULL) {
+        return Status::Invalid("Missing field '", field->name(), "' in struct array");
+      }
+      PyArray_Descr* sub_dtype =
+          reinterpret_cast<PyArray_Descr*>(PyTuple_GET_ITEM(tup, 0));
+      DCHECK(PyObject_TypeCheck(sub_dtype, &PyArrayDescr_Type));
+      int offset = static_cast<int>(PyLong_AsLong(PyTuple_GET_ITEM(tup, 1)));
+      RETURN_IF_PYERROR();
+      Py_INCREF(sub_dtype); /* PyArray_GetField() steals ref */
+      PyObject* sub_array = PyArray_GetField(arr_, sub_dtype, offset);
+      RETURN_IF_PYERROR();
+      sub_arrays.emplace_back(sub_array);
+      sub_converters.emplace_back(pool_, sub_array, nullptr /* mask */, field->type(),
+                                  from_pandas_);
+    }
+  }
+
+  std::vector<ArrayVector> groups;
+  int64_t null_count = 0;
+
+  // Compute null bitmap and store it as a Boolean Array to include it
+  // in the rechunking below
+  {
+    if (mask_ != nullptr) {
+      RETURN_NOT_OK(InitNullBitmap());
+      null_count = MaskToBitmap(mask_, length_, null_bitmap_data_);
+      if (null_count_ == -1) return Status::Invalid("Invalid mask type");
+    }
+    groups.push_back({std::make_shared<BooleanArray>(length_, null_bitmap_)});
+  }
+
+  // Convert child data
+  for (auto& converter : sub_converters) {
+    RETURN_NOT_OK(converter.Convert());
+    groups.push_back(converter.result());
+  }
+  // Ensure the different array groups are chunked consistently
+  groups = ::arrow::internal::RechunkArraysConsistently(groups);
+
+  // Make struct array chunks by combining groups
+  size_t ngroups = groups.size();
+  size_t nchunks = groups[0].size();
+  for (size_t chunk = 0; chunk < nchunks; chunk++) {
+    // First group has the null bitmaps as Boolean Arrays
+    const auto& null_data = groups[0][chunk]->data();
+    DCHECK_EQ(null_data->type->id(), Type::BOOL);
+    DCHECK_EQ(null_data->buffers.size(), 2);
+    const auto& null_buffer = null_data->buffers[1];
+    // Careful: the rechunked null bitmap may have a non-zero offset
+    // to its buffer, and it may not even start on a byte boundary
+    int64_t null_offset = null_data->offset;
+    std::shared_ptr<Buffer> fixed_null_buffer;
+
+    if (!null_buffer) {
+      fixed_null_buffer = null_buffer;
+    } else if (null_offset % 8 == 0) {
+      fixed_null_buffer =
+          std::make_shared<Buffer>(null_buffer,
+                                   // byte offset
+                                   null_offset / 8,
+                                   // byte size
+                                   bit_util::BytesForBits(null_data->length));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          fixed_null_buffer,
+          CopyBitmap(pool_, null_buffer->data(), null_offset, null_data->length));
+    }
+
+    // Create struct array chunk and populate it
+    auto arr_data =
+        ArrayData::Make(type_, null_data->length, null_count ? kUnknownNullCount : 0, 0);
+    arr_data->buffers.push_back(fixed_null_buffer);
+    // Append child chunks
+    for (size_t i = 1; i < ngroups; i++) {
+      arr_data->child_data.push_back(groups[i][chunk]->data());
+    }
+    RETURN_NOT_OK(PushArray(arr_data));
+  }
+
+  return Status::OK();
+}
+
+Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
+                      const std::shared_ptr<DataType>& type,
+                      const compute::CastOptions& cast_options,
+                      std::shared_ptr<ChunkedArray>* out) {
+  if (!PyArray_Check(ao)) {
+    // This code path cannot be reached by Python unit tests currently so this
+    // is only a sanity check.
+    return Status::TypeError("Input object was not a NumPy array");
+  }
+  if (PyArray_NDIM(reinterpret_cast<PyArrayObject*>(ao)) != 1) {
+    return Status::Invalid("only handle 1-dimensional arrays");
+  }
+
+  NumPyConverter converter(pool, ao, mo, type, from_pandas, cast_options);
+  RETURN_NOT_OK(converter.Convert());
+  const auto& output_arrays = converter.result();
+  DCHECK_GT(output_arrays.size(), 0);
+  *out = std::make_shared<ChunkedArray>(output_arrays);
+  return Status::OK();
+}
+
+Status NdarrayToArrow(MemoryPool* pool, PyObject* ao, PyObject* mo, bool from_pandas,
+                      const std::shared_ptr<DataType>& type,
+                      std::shared_ptr<ChunkedArray>* out) {
+  return NdarrayToArrow(pool, ao, mo, from_pandas, type, compute::CastOptions(), out);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/parquet_encryption.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/parquet_encryption.cc
new file mode 100644
index 0000000..a5f924b
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/parquet_encryption.cc
@@ -0,0 +1,98 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/parquet_encryption.h"
+#include "parquet/exception.h"
+
+namespace arrow {
+namespace py {
+namespace parquet {
+namespace encryption {
+
+PyKmsClient::PyKmsClient(PyObject* handler, PyKmsClientVtable vtable)
+    : handler_(handler), vtable_(std::move(vtable)) {
+  Py_INCREF(handler);
+}
+
+PyKmsClient::~PyKmsClient() {}
+
+std::string PyKmsClient::WrapKey(const std::string& key_bytes,
+                                 const std::string& master_key_identifier) {
+  std::string wrapped;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.wrap_key(handler_.obj(), key_bytes, master_key_identifier, &wrapped);
+    return CheckPyError();
+  });
+  if (!st.ok()) {
+    throw ::parquet::ParquetStatusException(st);
+  }
+  return wrapped;
+}
+
+std::string PyKmsClient::UnwrapKey(const std::string& wrapped_key,
+                                   const std::string& master_key_identifier) {
+  std::string unwrapped;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.unwrap_key(handler_.obj(), wrapped_key, master_key_identifier, &unwrapped);
+    return CheckPyError();
+  });
+  if (!st.ok()) {
+    throw ::parquet::ParquetStatusException(st);
+  }
+  return unwrapped;
+}
+
+PyKmsClientFactory::PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable)
+    : handler_(handler), vtable_(std::move(vtable)) {
+  Py_INCREF(handler);
+}
+
+PyKmsClientFactory::~PyKmsClientFactory() {}
+
+std::shared_ptr<::parquet::encryption::KmsClient> PyKmsClientFactory::CreateKmsClient(
+    const ::parquet::encryption::KmsConnectionConfig& kms_connection_config) {
+  std::shared_ptr<::parquet::encryption::KmsClient> kms_client;
+  auto st = SafeCallIntoPython([&]() -> Status {
+    vtable_.create_kms_client(handler_.obj(), kms_connection_config, &kms_client);
+    return CheckPyError();
+  });
+  if (!st.ok()) {
+    throw ::parquet::ParquetStatusException(st);
+  }
+  return kms_client;
+}
+
+arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
+PyCryptoFactory::SafeGetFileEncryptionProperties(
+    const ::parquet::encryption::KmsConnectionConfig& kms_connection_config,
+    const ::parquet::encryption::EncryptionConfiguration& encryption_config) {
+  PARQUET_CATCH_AND_RETURN(
+      this->GetFileEncryptionProperties(kms_connection_config, encryption_config));
+}
+
+arrow::Result<std::shared_ptr<::parquet::FileDecryptionProperties>>
+PyCryptoFactory::SafeGetFileDecryptionProperties(
+    const ::parquet::encryption::KmsConnectionConfig& kms_connection_config,
+    const ::parquet::encryption::DecryptionConfiguration& decryption_config) {
+  PARQUET_CATCH_AND_RETURN(
+      this->GetFileDecryptionProperties(kms_connection_config, decryption_config));
+}
+
+}  // namespace encryption
+}  // namespace parquet
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/platform.h b/src/vendored/apache-arrow-12.0.1/arrow/python/platform.h
index 80f7e60..e71c7ac 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/platform.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/platform.h
@@ -24,7 +24,7 @@
 // to mean Py_ssize_t (defining this to suppress deprecation warning)
 #define PY_SSIZE_T_CLEAN
 
-#include <Python.h> // IWYU pragma: export
+#include <Python.h>  // IWYU pragma: export
 #include <datetime.h>
 
 // Work around C2528 error
@@ -32,5 +32,10 @@
 #if _MSC_VER >= 1900
 #undef timezone
 #endif
-#endif
 
+// https://bugs.python.org/issue36020
+// TODO(wjones127): Can remove once we drop support for CPython 3.9
+#ifdef snprintf
+#undef snprintf
+#endif
+#endif
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/pyarrow.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/pyarrow.cc
new file mode 100644
index 0000000..30d1f04
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/pyarrow.cc
@@ -0,0 +1,94 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/pyarrow.h"
+
+#include <memory>
+#include <utility>
+
+#include "arrow/array.h"
+#include "arrow/table.h"
+#include "arrow/tensor.h"
+#include "arrow/type.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/datetime.h"
+namespace {
+#include "arrow/python/pyarrow_api.h"
+}
+
+namespace arrow {
+namespace py {
+
+static Status UnwrapError(PyObject* obj, const char* expected_type) {
+  return Status::TypeError("Could not unwrap ", expected_type,
+                           " from Python object of type '", Py_TYPE(obj)->tp_name, "'");
+}
+
+int import_pyarrow() {
+#ifdef PYPY_VERSION
+  PyDateTime_IMPORT;
+#else
+  internal::InitDatetime();
+#endif
+  return ::import_pyarrow__lib();
+}
+
+#define DEFINE_WRAP_FUNCTIONS(FUNC_SUFFIX, TYPE_NAME)                                   \
+  bool is_##FUNC_SUFFIX(PyObject* obj) { return ::pyarrow_is_##FUNC_SUFFIX(obj) != 0; } \
+                                                                                        \
+  PyObject* wrap_##FUNC_SUFFIX(const std::shared_ptr<TYPE_NAME>& src) {                 \
+    return ::pyarrow_wrap_##FUNC_SUFFIX(src);                                           \
+  }                                                                                     \
+  Result<std::shared_ptr<TYPE_NAME>> unwrap_##FUNC_SUFFIX(PyObject* obj) {              \
+    auto out = ::pyarrow_unwrap_##FUNC_SUFFIX(obj);                                     \
+    if (out) {                                                                          \
+      return std::move(out);                                                            \
+    } else {                                                                            \
+      return UnwrapError(obj, #TYPE_NAME);                                              \
+    }                                                                                   \
+  }
+
+DEFINE_WRAP_FUNCTIONS(buffer, Buffer)
+
+DEFINE_WRAP_FUNCTIONS(data_type, DataType)
+DEFINE_WRAP_FUNCTIONS(field, Field)
+DEFINE_WRAP_FUNCTIONS(schema, Schema)
+
+DEFINE_WRAP_FUNCTIONS(scalar, Scalar)
+
+DEFINE_WRAP_FUNCTIONS(array, Array)
+DEFINE_WRAP_FUNCTIONS(chunked_array, ChunkedArray)
+
+DEFINE_WRAP_FUNCTIONS(sparse_coo_tensor, SparseCOOTensor)
+DEFINE_WRAP_FUNCTIONS(sparse_csc_matrix, SparseCSCMatrix)
+DEFINE_WRAP_FUNCTIONS(sparse_csf_tensor, SparseCSFTensor)
+DEFINE_WRAP_FUNCTIONS(sparse_csr_matrix, SparseCSRMatrix)
+DEFINE_WRAP_FUNCTIONS(tensor, Tensor)
+
+DEFINE_WRAP_FUNCTIONS(batch, RecordBatch)
+DEFINE_WRAP_FUNCTIONS(table, Table)
+
+#undef DEFINE_WRAP_FUNCTIONS
+
+namespace internal {
+
+int check_status(const Status& status) { return ::pyarrow_internal_check_status(status); }
+
+}  // namespace internal
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/python_test.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/python_test.cc
new file mode 100644
index 0000000..01ab8a3
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/python_test.cc
@@ -0,0 +1,888 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <memory>
+#include <optional>
+#include <sstream>
+#include <string>
+
+#include "platform.h"
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/table.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/arrow_to_pandas.h"
+#include "arrow/python/decimal.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/numpy_interop.h"
+#include "arrow/python/python_test.h"
+#include "arrow/python/python_to_arrow.h"
+
+#define ASSERT_EQ(x, y)                                                        \
+  {                                                                            \
+    auto&& _left = (x);                                                        \
+    auto&& _right = (y);                                                       \
+    if (_left != _right) {                                                     \
+      return Status::Invalid("Expected equality between `", #x, "` and `", #y, \
+                             "`, but ", arrow::py::testing::ToString(_left),   \
+                             " != ", arrow::py::testing::ToString(_right));    \
+    }                                                                          \
+  }
+
+#define ASSERT_NE(x, y)                                                          \
+  {                                                                              \
+    auto&& _left = (x);                                                          \
+    auto&& _right = (y);                                                         \
+    if (_left == _right) {                                                       \
+      return Status::Invalid("Expected inequality between `", #x, "` and `", #y, \
+                             "`, but ", arrow::py::testing::ToString(_left),     \
+                             " == ", arrow::py::testing::ToString(_right));      \
+    }                                                                            \
+  }
+
+#define ASSERT_FALSE(v)                                                            \
+  {                                                                                \
+    auto&& _v = (v);                                                               \
+    if (!!_v) {                                                                    \
+      return Status::Invalid("Expected `", #v, "` to evaluate to false, but got ", \
+                             arrow::py::testing::ToString(_v));                    \
+    }                                                                              \
+  }
+
+#define ASSERT_TRUE(v)                                                            \
+  {                                                                               \
+    auto&& _v = (v);                                                              \
+    if (!_v) {                                                                    \
+      return Status::Invalid("Expected `", #v, "` to evaluate to true, but got ", \
+                             arrow::py::testing::ToString(_v));                   \
+    }                                                                             \
+  }
+
+#define ASSERT_FALSE_MSG(v, msg)                                                   \
+  {                                                                                \
+    auto&& _v = (v);                                                               \
+    if (!!_v) {                                                                    \
+      return Status::Invalid("Expected `", #v, "` to evaluate to false, but got ", \
+                             arrow::py::testing::ToString(_v), ": ", msg);         \
+    }                                                                              \
+  }
+
+#define ASSERT_TRUE_MSG(v, msg)                                                   \
+  {                                                                               \
+    auto&& _v = (v);                                                              \
+    if (!_v) {                                                                    \
+      return Status::Invalid("Expected `", #v, "` to evaluate to true, but got ", \
+                             arrow::py::testing::ToString(_v), ": ", msg);        \
+    }                                                                             \
+  }
+
+#define ASSERT_OK(expr)                                                                \
+  {                                                                                    \
+    for (::arrow::Status _st = ::arrow::internal::GenericToStatus((expr)); !_st.ok();) \
+      return Status::Invalid("`", #expr, "` failed with ", _st.ToString());            \
+  }
+
+#define ASSERT_RAISES(code, expr)                                               \
+  {                                                                             \
+    for (::arrow::Status _st_expr = ::arrow::internal::GenericToStatus((expr)); \
+         !_st_expr.Is##code();)                                                 \
+      return Status::Invalid("Expected `", #expr, "` to fail with ", #code,     \
+                             ", but got ", _st_expr.ToString());                \
+  }
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace py {
+namespace testing {
+
+// ARROW-17938: Some standard libraries have ambiguous operator<<(nullptr_t),
+// work around it using a custom printer function.
+
+template <typename T>
+std::string ToString(const T& t) {
+  std::stringstream ss;
+  ss << t;
+  return ss.str();
+}
+
+template <>
+std::string ToString(const std::nullptr_t&) {
+  return "nullptr";
+}
+
+namespace {
+
+Status TestOwnedRefMoves() {
+  std::vector<OwnedRef> vec;
+  PyObject *u, *v;
+  u = PyList_New(0);
+  v = PyList_New(0);
+
+  {
+    OwnedRef ref(u);
+    vec.push_back(std::move(ref));
+    ASSERT_EQ(ref.obj(), nullptr);
+  }
+  vec.emplace_back(v);
+  ASSERT_EQ(Py_REFCNT(u), 1);
+  ASSERT_EQ(Py_REFCNT(v), 1);
+  return Status::OK();
+}
+
+Status TestOwnedRefNoGILMoves() {
+  PyAcquireGIL lock;
+  lock.release();
+
+  {
+    std::vector<OwnedRef> vec;
+    PyObject *u, *v;
+    {
+      lock.acquire();
+      u = PyList_New(0);
+      v = PyList_New(0);
+      lock.release();
+    }
+    {
+      OwnedRefNoGIL ref(u);
+      vec.push_back(std::move(ref));
+      ASSERT_EQ(ref.obj(), nullptr);
+    }
+    vec.emplace_back(v);
+    ASSERT_EQ(Py_REFCNT(u), 1);
+    ASSERT_EQ(Py_REFCNT(v), 1);
+    return Status::OK();
+  }
+}
+
+std::string FormatPythonException(const std::string& exc_class_name) {
+  std::stringstream ss;
+  ss << "Python exception: ";
+  ss << exc_class_name;
+  return ss.str();
+}
+
+Status TestCheckPyErrorStatus() {
+  Status st;
+  std::string expected_detail = "";
+
+  auto check_error = [](Status& st, const char* expected_message = "some error",
+                        std::string expected_detail = "") {
+    st = CheckPyError();
+    ASSERT_EQ(st.message(), expected_message);
+    ASSERT_FALSE(PyErr_Occurred());
+    if (expected_detail.size() > 0) {
+      auto detail = st.detail();
+      ASSERT_NE(detail, nullptr);
+      ASSERT_EQ(detail->ToString(), expected_detail);
+    }
+    return Status::OK();
+  };
+
+  for (PyObject* exc_type : {PyExc_Exception, PyExc_SyntaxError}) {
+    PyErr_SetString(exc_type, "some error");
+    ASSERT_OK(check_error(st));
+    ASSERT_TRUE(st.IsUnknownError());
+  }
+
+  PyErr_SetString(PyExc_TypeError, "some error");
+  ASSERT_OK(check_error(st, "some error", FormatPythonException("TypeError")));
+  ASSERT_TRUE(st.IsTypeError());
+
+  PyErr_SetString(PyExc_ValueError, "some error");
+  ASSERT_OK(check_error(st));
+  ASSERT_TRUE(st.IsInvalid());
+
+  PyErr_SetString(PyExc_KeyError, "some error");
+  ASSERT_OK(check_error(st, "'some error'"));
+  ASSERT_TRUE(st.IsKeyError());
+
+  for (PyObject* exc_type : {PyExc_OSError, PyExc_IOError}) {
+    PyErr_SetString(exc_type, "some error");
+    ASSERT_OK(check_error(st));
+    ASSERT_TRUE(st.IsIOError());
+  }
+
+  PyErr_SetString(PyExc_NotImplementedError, "some error");
+  ASSERT_OK(check_error(st, "some error", FormatPythonException("NotImplementedError")));
+  ASSERT_TRUE(st.IsNotImplemented());
+
+  // No override if a specific status code is given
+  PyErr_SetString(PyExc_TypeError, "some error");
+  st = CheckPyError(StatusCode::SerializationError);
+  ASSERT_TRUE(st.IsSerializationError());
+  ASSERT_EQ(st.message(), "some error");
+  ASSERT_FALSE(PyErr_Occurred());
+
+  return Status::OK();
+}
+
+Status TestCheckPyErrorStatusNoGIL() {
+  PyAcquireGIL lock;
+  {
+    Status st;
+    PyErr_SetString(PyExc_ZeroDivisionError, "zzzt");
+    st = ConvertPyError();
+    ASSERT_FALSE(PyErr_Occurred());
+    lock.release();
+    ASSERT_TRUE(st.IsUnknownError());
+    ASSERT_EQ(st.message(), "zzzt");
+    ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+    return Status::OK();
+  }
+}
+
+Status TestRestorePyErrorBasics() {
+  PyErr_SetString(PyExc_ZeroDivisionError, "zzzt");
+  auto st = ConvertPyError();
+  ASSERT_FALSE(PyErr_Occurred());
+  ASSERT_TRUE(st.IsUnknownError());
+  ASSERT_EQ(st.message(), "zzzt");
+  ASSERT_EQ(st.detail()->ToString(), FormatPythonException("ZeroDivisionError"));
+
+  RestorePyError(st);
+  ASSERT_TRUE(PyErr_Occurred());
+  PyObject* exc_type;
+  PyObject* exc_value;
+  PyObject* exc_traceback;
+  PyErr_Fetch(&exc_type, &exc_value, &exc_traceback);
+  ASSERT_TRUE(PyErr_GivenExceptionMatches(exc_type, PyExc_ZeroDivisionError));
+  std::string py_message;
+  ASSERT_OK(internal::PyObject_StdStringStr(exc_value, &py_message));
+  ASSERT_EQ(py_message, "zzzt");
+
+  return Status::OK();
+}
+
+Status TestPyBufferInvalidInputObject() {
+  std::shared_ptr<Buffer> res;
+  PyObject* input = Py_None;
+  auto old_refcnt = Py_REFCNT(input);
+  {
+    Status st = PyBuffer::FromPyObject(input).status();
+    ASSERT_TRUE_MSG(IsPyError(st), st.ToString());
+    ASSERT_FALSE(PyErr_Occurred());
+  }
+  ASSERT_EQ(old_refcnt, Py_REFCNT(input));
+  return Status::OK();
+}
+
+// Because of how it is declared, the Numpy C API instance initialized
+// within libarrow_python.dll may not be visible in this test under Windows
+// ("unresolved external symbol arrow_ARRAY_API referenced").
+#ifndef _WIN32
+Status TestPyBufferNumpyArray() {
+  npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+  auto buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie();
+
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  buf = std::move(PyBuffer::FromPyObject(arr)).ValueOrDie();
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  return Status::OK();
+}
+
+Status TestNumPyBufferNumpyArray() {
+  npy_intp dims[1] = {10};
+
+  OwnedRef arr_ref(PyArray_SimpleNew(1, dims, NPY_FLOAT));
+  PyObject* arr = arr_ref.obj();
+  ASSERT_NE(arr, nullptr);
+  auto old_refcnt = Py_REFCNT(arr);
+
+  auto buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_TRUE(buf->is_mutable());
+  ASSERT_EQ(buf->mutable_data(), buf->data());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  // Read-only
+  PyArray_CLEARFLAGS(reinterpret_cast<PyArrayObject*>(arr), NPY_ARRAY_WRITEABLE);
+  buf = std::make_shared<NumPyBuffer>(arr);
+  ASSERT_TRUE(buf->is_cpu());
+  ASSERT_EQ(buf->data(), PyArray_DATA(reinterpret_cast<PyArrayObject*>(arr)));
+  ASSERT_FALSE(buf->is_mutable());
+  ASSERT_EQ(old_refcnt + 1, Py_REFCNT(arr));
+  buf.reset();
+  ASSERT_EQ(old_refcnt, Py_REFCNT(arr));
+
+  return Status::OK();
+}
+#endif
+
+Status TestPythonDecimalToString() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("-39402950693754869342983");
+  PyObject* python_object =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  ASSERT_NE(python_object, nullptr);
+
+  std::string string_result;
+  ASSERT_OK(internal::PythonDecimalToString(python_object, &string_result));
+
+  return Status::OK();
+}
+
+Status TestInferPrecisionAndScale() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("-394029506937548693.42983");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+
+  const auto expected_precision =
+      static_cast<int32_t>(decimal_string.size() - 2);  // 1 for -, 1 for .
+  const int32_t expected_scale = 5;
+
+  ASSERT_EQ(expected_precision, metadata.precision());
+  ASSERT_EQ(expected_scale, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestInferPrecisionAndNegativeScale() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("-3.94042983E+10");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+
+  const auto expected_precision = 11;
+  const int32_t expected_scale = 0;
+
+  ASSERT_EQ(expected_precision, metadata.precision());
+  ASSERT_EQ(expected_scale, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestInferAllLeadingZeros() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("0.001");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+  ASSERT_EQ(3, metadata.precision());
+  ASSERT_EQ(3, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestInferAllLeadingZerosExponentialNotationPositive() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("0.01E5");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+  ASSERT_EQ(4, metadata.precision());
+  ASSERT_EQ(0, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestInferAllLeadingZerosExponentialNotationNegative() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("0.01E3");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+  ASSERT_EQ(2, metadata.precision());
+  ASSERT_EQ(0, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestObjectBlockWriteFails() {
+  StringBuilder builder;
+  const char value[] = {'\xf1', '\0'};
+
+  for (int i = 0; i < 1000; ++i) {
+    ASSERT_OK(builder.Append(value, static_cast<int32_t>(strlen(value))));
+  }
+
+  std::shared_ptr<Array> arr;
+  ASSERT_OK(builder.Finish(&arr));
+
+  auto f1 = field("f1", utf8());
+  auto f2 = field("f2", utf8());
+  auto f3 = field("f3", utf8());
+  std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3};
+  std::vector<std::shared_ptr<Array>> cols = {arr, arr, arr};
+
+  auto schema = ::arrow::schema(fields);
+  auto table = Table::Make(schema, cols);
+
+  Status st;
+  Py_BEGIN_ALLOW_THREADS;
+  PyObject* out;
+  PandasOptions options;
+  options.use_threads = true;
+  st = ConvertTableToPandas(options, table, &out);
+  Py_END_ALLOW_THREADS;
+  ASSERT_RAISES(UnknownError, st);
+
+  return Status::OK();
+}
+
+Status TestMixedTypeFails() {
+  OwnedRef list_ref(PyList_New(3));
+  PyObject* list = list_ref.obj();
+
+  ASSERT_NE(list, nullptr);
+
+  PyObject* str = PyUnicode_FromString("abc");
+  ASSERT_NE(str, nullptr);
+
+  PyObject* integer = PyLong_FromLong(1234L);
+  ASSERT_NE(integer, nullptr);
+
+  PyObject* doub = PyFloat_FromDouble(123.0234);
+  ASSERT_NE(doub, nullptr);
+
+  // This steals a reference to each object, so we don't need to decref them later
+  // just the list
+  ASSERT_EQ(PyList_SetItem(list, 0, str), 0);
+  ASSERT_EQ(PyList_SetItem(list, 1, integer), 0);
+  ASSERT_EQ(PyList_SetItem(list, 2, doub), 0);
+
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, {}));
+
+  return Status::OK();
+}
+
+template <typename DecimalValue>
+Status DecimalTestFromPythonDecimalRescale(std::shared_ptr<DataType> type,
+                                           PyObject* python_decimal,
+                                           std::optional<int> expected) {
+  DecimalValue value;
+  const auto& decimal_type = checked_cast<const DecimalType&>(*type);
+
+  if (expected.has_value()) {
+    ASSERT_OK(internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value));
+    ASSERT_EQ(expected.value(), value);
+
+    ASSERT_OK(internal::DecimalFromPyObject(python_decimal, decimal_type, &value));
+    ASSERT_EQ(expected.value(), value);
+  } else {
+    ASSERT_RAISES(Invalid, internal::DecimalFromPythonDecimal(python_decimal,
+                                                              decimal_type, &value));
+    ASSERT_RAISES(Invalid,
+                  internal::DecimalFromPyObject(python_decimal, decimal_type, &value));
+  }
+  return Status::OK();
+}
+
+Status TestFromPythonDecimalRescaleNotTruncateable() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("1.001");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  // We fail when truncating values that would lose data if cast to a decimal type with
+  // lower scale
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal128>(::arrow::decimal128(10, 2),
+                                                            python_decimal, {}));
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal256>(::arrow::decimal256(10, 2),
+                                                            python_decimal, {}));
+
+  return Status::OK();
+}
+
+Status TestFromPythonDecimalRescaleTruncateable() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("1.000");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  // We allow truncation of values that do not lose precision when dividing by 10 * the
+  // difference between the scales, e.g., 1.000 -> 1.00
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal128>(::arrow::decimal128(10, 2),
+                                                            python_decimal, 100));
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal256>(::arrow::decimal256(10, 2),
+                                                            python_decimal, 100));
+
+  return Status::OK();
+}
+
+Status TestFromPythonNegativeDecimalRescale() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("-1.000");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal128>(::arrow::decimal128(10, 9),
+                                                            python_decimal, -1000000000));
+  ASSERT_OK(DecimalTestFromPythonDecimalRescale<Decimal256>(::arrow::decimal256(10, 9),
+                                                            python_decimal, -1000000000));
+
+  return Status::OK();
+}
+
+Status TestDecimal128FromPythonInteger() {
+  Decimal128 value;
+  OwnedRef python_long(PyLong_FromLong(42));
+  auto type = ::arrow::decimal128(10, 2);
+  const auto& decimal_type = checked_cast<const DecimalType&>(*type);
+  ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value));
+  ASSERT_EQ(4200, value);
+  return Status::OK();
+}
+
+Status TestDecimal256FromPythonInteger() {
+  Decimal256 value;
+  OwnedRef python_long(PyLong_FromLong(42));
+  auto type = ::arrow::decimal256(10, 2);
+  const auto& decimal_type = checked_cast<const DecimalType&>(*type);
+  ASSERT_OK(internal::DecimalFromPyObject(python_long.obj(), decimal_type, &value));
+  ASSERT_EQ(4200, value);
+  return Status::OK();
+}
+
+Status TestDecimal128OverflowFails() {
+  Decimal128 value;
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("9999999999999999999999999999999999999.9");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+  ASSERT_EQ(38, metadata.precision());
+  ASSERT_EQ(1, metadata.scale());
+
+  auto type = ::arrow::decimal(38, 38);
+  const auto& decimal_type = checked_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid,
+                internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value));
+  return Status::OK();
+}
+
+Status TestDecimal256OverflowFails() {
+  Decimal256 value;
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string(
+      "999999999999999999999999999999999999999999999999999999999999999999999999999.9");
+  PyObject* python_decimal =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(python_decimal));
+  ASSERT_EQ(76, metadata.precision());
+  ASSERT_EQ(1, metadata.scale());
+
+  auto type = ::arrow::decimal(76, 76);
+  const auto& decimal_type = checked_cast<const DecimalType&>(*type);
+  ASSERT_RAISES(Invalid,
+                internal::DecimalFromPythonDecimal(python_decimal, decimal_type, &value));
+  return Status::OK();
+}
+
+Status TestNoneAndNaN() {
+  OwnedRef list_ref(PyList_New(4));
+  PyObject* list = list_ref.obj();
+
+  ASSERT_NE(list, nullptr);
+
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+  PyObject* constructor = decimal_constructor_.obj();
+  PyObject* decimal_value = internal::DecimalFromString(constructor, "1.234");
+  ASSERT_NE(decimal_value, nullptr);
+
+  Py_INCREF(Py_None);
+  PyObject* missing_value1 = Py_None;
+  ASSERT_NE(missing_value1, nullptr);
+
+  PyObject* missing_value2 = PyFloat_FromDouble(NPY_NAN);
+  ASSERT_NE(missing_value2, nullptr);
+
+  PyObject* missing_value3 = internal::DecimalFromString(constructor, "nan");
+  ASSERT_NE(missing_value3, nullptr);
+
+  // This steals a reference to each object, so we don't need to decref them later,
+  // just the list
+  ASSERT_EQ(0, PyList_SetItem(list, 0, decimal_value));
+  ASSERT_EQ(0, PyList_SetItem(list, 1, missing_value1));
+  ASSERT_EQ(0, PyList_SetItem(list, 2, missing_value2));
+  ASSERT_EQ(0, PyList_SetItem(list, 3, missing_value3));
+
+  PyConversionOptions options;
+  ASSERT_RAISES(TypeError, ConvertPySequence(list, nullptr, options));
+
+  options.from_pandas = true;
+  auto chunked = std::move(ConvertPySequence(list, nullptr, options)).ValueOrDie();
+  ASSERT_EQ(chunked->num_chunks(), 1);
+
+  auto arr = chunked->chunk(0);
+  ASSERT_TRUE(arr->IsValid(0));
+  ASSERT_TRUE(arr->IsNull(1));
+  ASSERT_TRUE(arr->IsNull(2));
+  ASSERT_TRUE(arr->IsNull(3));
+
+  return Status::OK();
+}
+
+Status TestMixedPrecisionAndScale() {
+  std::vector<std::string> strings{{"0.001", "1.01E5", "1.01E5"}};
+
+  OwnedRef list_ref(PyList_New(static_cast<Py_ssize_t>(strings.size())));
+  PyObject* list = list_ref.obj();
+
+  ASSERT_NE(list, nullptr);
+
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+  // PyList_SetItem steals a reference to the item so we don't decref it later
+  PyObject* decimal_constructor = decimal_constructor_.obj();
+  for (Py_ssize_t i = 0; i < static_cast<Py_ssize_t>(strings.size()); ++i) {
+    const int result = PyList_SetItem(
+        list, i, internal::DecimalFromString(decimal_constructor, strings.at(i)));
+    ASSERT_EQ(0, result);
+  }
+
+  auto arr = std::move(ConvertPySequence(list, nullptr, {})).ValueOrDie();
+  const auto& type = checked_cast<const DecimalType&>(*arr->type());
+
+  int32_t expected_precision = 9;
+  int32_t expected_scale = 3;
+  ASSERT_EQ(expected_precision, type.precision());
+  ASSERT_EQ(expected_scale, type.scale());
+
+  return Status::OK();
+}
+
+Status TestMixedPrecisionAndScaleSequenceConvert() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string_1("0.01");
+  PyObject* value1 =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string_1);
+  ASSERT_NE(value1, nullptr);
+
+  std::string decimal_string_2("0.001");
+  PyObject* value2 =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string_2);
+  ASSERT_NE(value2, nullptr);
+
+  OwnedRef list_ref(PyList_New(2));
+  PyObject* list = list_ref.obj();
+
+  // This steals a reference to each object, so we don't need to decref them later
+  // just the list
+  ASSERT_EQ(PyList_SetItem(list, 0, value1), 0);
+  ASSERT_EQ(PyList_SetItem(list, 1, value2), 0);
+
+  auto arr = std::move(ConvertPySequence(list, nullptr, {})).ValueOrDie();
+  const auto& type = checked_cast<const Decimal128Type&>(*arr->type());
+  ASSERT_EQ(3, type.precision());
+  ASSERT_EQ(3, type.scale());
+
+  return Status::OK();
+}
+
+Status TestSimpleInference() {
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+
+  std::string decimal_string("0.01");
+  PyObject* value =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+  ASSERT_NE(value, nullptr);
+  internal::DecimalMetadata metadata;
+  ASSERT_OK(metadata.Update(value));
+  ASSERT_EQ(2, metadata.precision());
+  ASSERT_EQ(2, metadata.scale());
+
+  return Status::OK();
+}
+
+Status TestUpdateWithNaN() {
+  internal::DecimalMetadata metadata;
+  OwnedRef decimal_constructor_;
+  OwnedRef decimal_module;
+  RETURN_NOT_OK(internal::ImportModule("decimal", &decimal_module));
+  RETURN_NOT_OK(
+      internal::ImportFromModule(decimal_module.obj(), "Decimal", &decimal_constructor_));
+  std::string decimal_string("nan");
+  PyObject* nan_value =
+      internal::DecimalFromString(decimal_constructor_.obj(), decimal_string);
+
+  ASSERT_OK(metadata.Update(nan_value));
+  ASSERT_EQ(std::numeric_limits<int32_t>::min(), metadata.precision());
+  ASSERT_EQ(std::numeric_limits<int32_t>::min(), metadata.scale());
+
+  return Status::OK();
+}
+
+}  // namespace
+
+std::vector<TestCase> GetCppTestCases() {
+  return {
+      {"test_owned_ref_moves", TestOwnedRefMoves},
+      {"test_owned_ref_nogil_moves", TestOwnedRefNoGILMoves},
+      {"test_check_pyerror_status", TestCheckPyErrorStatus},
+      {"test_check_pyerror_status_nogil", TestCheckPyErrorStatusNoGIL},
+      {"test_restore_pyerror_basics", TestRestorePyErrorBasics},
+      {"test_pybuffer_invalid_input_object", TestPyBufferInvalidInputObject},
+#ifndef _WIN32
+      {"test_pybuffer_numpy_array", TestPyBufferNumpyArray},
+      {"test_numpybuffer_numpy_array", TestNumPyBufferNumpyArray},
+#endif
+      {"test_python_decimal_to_string", TestPythonDecimalToString},
+      {"test_infer_precision_and_scale", TestInferPrecisionAndScale},
+      {"test_infer_precision_and_negative_scale", TestInferPrecisionAndNegativeScale},
+      {"test_infer_all_leading_zeros", TestInferAllLeadingZeros},
+      {"test_infer_all_leading_zeros_exponential_notation_positive",
+       TestInferAllLeadingZerosExponentialNotationPositive},
+      {"test_infer_all_leading_zeros_exponential_notation_negative",
+       TestInferAllLeadingZerosExponentialNotationNegative},
+      {"test_object_block_write_fails", TestObjectBlockWriteFails},
+      {"test_mixed_type_fails", TestMixedTypeFails},
+      {"test_from_python_decimal_rescale_not_truncateable",
+       TestFromPythonDecimalRescaleNotTruncateable},
+      {"test_from_python_decimal_rescale_truncateable",
+       TestFromPythonDecimalRescaleTruncateable},
+      {"test_from_python_negative_decimal_rescale", TestFromPythonNegativeDecimalRescale},
+      {"test_decimal128_from_python_integer", TestDecimal128FromPythonInteger},
+      {"test_decimal256_from_python_integer", TestDecimal256FromPythonInteger},
+      {"test_decimal128_overflow_fails", TestDecimal128OverflowFails},
+      {"test_decimal256_overflow_fails", TestDecimal256OverflowFails},
+      {"test_none_and_nan", TestNoneAndNaN},
+      {"test_mixed_precision_and_scale", TestMixedPrecisionAndScale},
+      {"test_mixed_precision_and_scale_sequence_convert",
+       TestMixedPrecisionAndScaleSequenceConvert},
+      {"test_simple_inference", TestSimpleInference},
+      {"test_update_with_nan", TestUpdateWithNaN},
+  };
+}
+
+}  // namespace testing
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.cc
new file mode 100644
index 0000000..486bd84
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.cc
@@ -0,0 +1,1240 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/python_to_arrow.h"
+#include "arrow/python/numpy_interop.h"
+
+#include <datetime.h>
+
+#include <algorithm>
+#include <limits>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_decimal.h"
+#include "arrow/array/builder_dict.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_time.h"
+#include "arrow/chunked_array.h"
+#include "arrow/result.h"
+#include "arrow/scalar.h"
+#include "arrow/status.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/converter.h"
+#include "arrow/util/decimal.h"
+#include "arrow/util/int_util_overflow.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/datetime.h"
+#include "arrow/python/decimal.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/inference.h"
+#include "arrow/python/iterators.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/type_traits.h"
+#include "arrow/visit_type_inline.h"
+
+namespace arrow {
+
+using internal::checked_cast;
+using internal::checked_pointer_cast;
+
+using internal::Converter;
+using internal::DictionaryConverter;
+using internal::ListConverter;
+using internal::PrimitiveConverter;
+using internal::StructConverter;
+
+using internal::MakeChunker;
+using internal::MakeConverter;
+
+namespace py {
+
+namespace {
+enum class MonthDayNanoField { kMonths, kWeeksAndDays, kDaysOnly, kNanoseconds };
+
+template <MonthDayNanoField field>
+struct MonthDayNanoTraits;
+
+struct MonthDayNanoAttrData {
+  const char* name;
+  const int64_t multiplier;
+};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kMonths> {
+  using c_type = int32_t;
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kMonths>::attrs[] = {
+    {"years", 1}, {"months", /*months_in_year=*/12}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays> {
+  using c_type = int32_t;
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kWeeksAndDays>::attrs[] =
+    {{"weeks", 1}, {"days", /*days_in_week=*/7}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kDaysOnly> {
+  using c_type = int32_t;
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kDaysOnly>::attrs[] = {
+    {"days", 1}, {nullptr, 0}};
+
+template <>
+struct MonthDayNanoTraits<MonthDayNanoField::kNanoseconds> {
+  using c_type = int64_t;
+  static const MonthDayNanoAttrData attrs[];
+};
+
+const MonthDayNanoAttrData MonthDayNanoTraits<MonthDayNanoField::kNanoseconds>::attrs[] =
+    {{"hours", 1},
+     {"minutes", /*minutes_in_hours=*/60},
+     {"seconds", /*seconds_in_minute=*/60},
+     {"milliseconds", /*milliseconds_in_seconds*/ 1000},
+     {"microseconds", /*microseconds_in_millseconds=*/1000},
+     {"nanoseconds", /*nanoseconds_in_microseconds=*/1000},
+     {nullptr, 0}};
+
+template <MonthDayNanoField field>
+struct PopulateMonthDayNano {
+  using Traits = MonthDayNanoTraits<field>;
+  using field_c_type = typename Traits::c_type;
+
+  static Status Field(PyObject* obj, field_c_type* out, bool* found_attrs) {
+    *out = 0;
+    for (const MonthDayNanoAttrData* attr = &Traits::attrs[0]; attr->multiplier != 0;
+         ++attr) {
+      if (attr->multiplier != 1 &&
+          ::arrow::internal::MultiplyWithOverflow(
+              static_cast<field_c_type>(attr->multiplier), *out, out)) {
+        return Status::Invalid("Overflow on: ", (attr - 1)->name,
+                               " for: ", internal::PyObject_StdStringRepr(obj));
+      }
+
+      OwnedRef field_value(PyObject_GetAttrString(obj, attr->name));
+      if (field_value.obj() == nullptr) {
+        // No attribute present, skip  to the next one.
+        PyErr_Clear();
+        continue;
+      }
+      RETURN_IF_PYERROR();
+      *found_attrs = true;
+      field_c_type value;
+      RETURN_NOT_OK(internal::CIntFromPython(field_value.obj(), &value, attr->name));
+      if (::arrow::internal::AddWithOverflow(*out, value, out)) {
+        return Status::Invalid("Overflow on: ", attr->name,
+                               " for: ", internal::PyObject_StdStringRepr(obj));
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
+// Utility for converting single python objects to their intermediate C representations
+// which can be fed to the typed builders
+class PyValue {
+ public:
+  // Type aliases for shorter signature definitions
+  using I = PyObject*;
+  using O = PyConversionOptions;
+
+  // Used for null checking before actually converting the values
+  static bool IsNull(const O& options, I obj) {
+    if (options.from_pandas) {
+      return internal::PandasObjectIsNull(obj);
+    } else {
+      return obj == Py_None;
+    }
+  }
+
+  // Used for post-conversion numpy NaT sentinel checking
+  static bool IsNaT(const TimestampType*, int64_t value) {
+    return internal::npy_traits<NPY_DATETIME>::isnull(value);
+  }
+
+  // Used for post-conversion numpy NaT sentinel checking
+  static bool IsNaT(const DurationType*, int64_t value) {
+    return internal::npy_traits<NPY_TIMEDELTA>::isnull(value);
+  }
+
+  static Result<std::nullptr_t> Convert(const NullType*, const O&, I obj) {
+    if (obj == Py_None) {
+      return nullptr;
+    } else {
+      return Status::Invalid("Invalid null value");
+    }
+  }
+
+  static Result<bool> Convert(const BooleanType*, const O&, I obj) {
+    if (obj == Py_True) {
+      return true;
+    } else if (obj == Py_False) {
+      return false;
+    } else if (PyArray_IsScalar(obj, Bool)) {
+      return reinterpret_cast<PyBoolScalarObject*>(obj)->obval == NPY_TRUE;
+    } else {
+      return internal::InvalidValue(obj, "tried to convert to boolean");
+    }
+  }
+
+  template <typename T>
+  static enable_if_integer<T, Result<typename T::c_type>> Convert(const T* type, const O&,
+                                                                  I obj) {
+    typename T::c_type value;
+    auto status = internal::CIntFromPython(obj, &value);
+    if (ARROW_PREDICT_TRUE(status.ok())) {
+      return value;
+    } else if (!internal::PyIntScalar_Check(obj)) {
+      std::stringstream ss;
+      ss << "tried to convert to " << type->ToString();
+      return internal::InvalidValue(obj, ss.str());
+    } else {
+      return status;
+    }
+  }
+
+  static Result<uint16_t> Convert(const HalfFloatType*, const O&, I obj) {
+    uint16_t value;
+    RETURN_NOT_OK(PyFloat_AsHalf(obj, &value));
+    return value;
+  }
+
+  static Result<float> Convert(const FloatType*, const O&, I obj) {
+    float value;
+    if (internal::PyFloatScalar_Check(obj)) {
+      value = static_cast<float>(PyFloat_AsDouble(obj));
+      RETURN_IF_PYERROR();
+    } else if (internal::PyIntScalar_Check(obj)) {
+      RETURN_NOT_OK(internal::IntegerScalarToFloat32Safe(obj, &value));
+    } else {
+      return internal::InvalidValue(obj, "tried to convert to float32");
+    }
+    return value;
+  }
+
+  static Result<double> Convert(const DoubleType*, const O&, I obj) {
+    double value;
+    if (PyFloat_Check(obj)) {
+      value = PyFloat_AS_DOUBLE(obj);
+    } else if (internal::PyFloatScalar_Check(obj)) {
+      // Other kinds of float-y things
+      value = PyFloat_AsDouble(obj);
+      RETURN_IF_PYERROR();
+    } else if (internal::PyIntScalar_Check(obj)) {
+      RETURN_NOT_OK(internal::IntegerScalarToDoubleSafe(obj, &value));
+    } else {
+      return internal::InvalidValue(obj, "tried to convert to double");
+    }
+    return value;
+  }
+
+  static Result<Decimal128> Convert(const Decimal128Type* type, const O&, I obj) {
+    Decimal128 value;
+    RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value));
+    return value;
+  }
+
+  static Result<Decimal256> Convert(const Decimal256Type* type, const O&, I obj) {
+    Decimal256 value;
+    RETURN_NOT_OK(internal::DecimalFromPyObject(obj, *type, &value));
+    return value;
+  }
+
+  static Result<int32_t> Convert(const Date32Type*, const O&, I obj) {
+    int32_t value;
+    if (PyDate_Check(obj)) {
+      auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
+      value = static_cast<int32_t>(internal::PyDate_to_days(pydate));
+    } else {
+      RETURN_NOT_OK(
+          internal::CIntFromPython(obj, &value, "Integer too large for date32"));
+    }
+    return value;
+  }
+
+  static Result<int64_t> Convert(const Date64Type*, const O&, I obj) {
+    int64_t value;
+    if (PyDateTime_Check(obj)) {
+      auto pydate = reinterpret_cast<PyDateTime_DateTime*>(obj);
+      value = internal::PyDateTime_to_ms(pydate);
+      // Truncate any intraday milliseconds
+      // TODO: introduce an option for this
+      value -= value % 86400000LL;
+    } else if (PyDate_Check(obj)) {
+      auto pydate = reinterpret_cast<PyDateTime_Date*>(obj);
+      value = internal::PyDate_to_ms(pydate);
+    } else {
+      RETURN_NOT_OK(
+          internal::CIntFromPython(obj, &value, "Integer too large for date64"));
+    }
+    return value;
+  }
+
+  static Result<int32_t> Convert(const Time32Type* type, const O&, I obj) {
+    int32_t value;
+    if (PyTime_Check(obj)) {
+      switch (type->unit()) {
+        case TimeUnit::SECOND:
+          value = static_cast<int32_t>(internal::PyTime_to_s(obj));
+          break;
+        case TimeUnit::MILLI:
+          value = static_cast<int32_t>(internal::PyTime_to_ms(obj));
+          break;
+        default:
+          return Status::UnknownError("Invalid time unit");
+      }
+    } else {
+      RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int32"));
+    }
+    return value;
+  }
+
+  static Result<int64_t> Convert(const Time64Type* type, const O&, I obj) {
+    int64_t value;
+    if (PyTime_Check(obj)) {
+      switch (type->unit()) {
+        case TimeUnit::MICRO:
+          value = internal::PyTime_to_us(obj);
+          break;
+        case TimeUnit::NANO:
+          value = internal::PyTime_to_ns(obj);
+          break;
+        default:
+          return Status::UnknownError("Invalid time unit");
+      }
+    } else {
+      RETURN_NOT_OK(internal::CIntFromPython(obj, &value, "Integer too large for int64"));
+    }
+    return value;
+  }
+
+  static Result<int64_t> Convert(const TimestampType* type, const O& options, I obj) {
+    int64_t value, offset;
+    if (PyDateTime_Check(obj)) {
+      if (ARROW_PREDICT_FALSE(options.ignore_timezone)) {
+        offset = 0;
+      } else {
+        ARROW_ASSIGN_OR_RAISE(offset, internal::PyDateTime_utcoffset_s(obj));
+      }
+      auto dt = reinterpret_cast<PyDateTime_DateTime*>(obj);
+      switch (type->unit()) {
+        case TimeUnit::SECOND:
+          value = internal::PyDateTime_to_s(dt) - offset;
+          break;
+        case TimeUnit::MILLI:
+          value = internal::PyDateTime_to_ms(dt) - offset * 1000LL;
+          break;
+        case TimeUnit::MICRO:
+          value = internal::PyDateTime_to_us(dt) - offset * 1000000LL;
+          break;
+        case TimeUnit::NANO:
+          if (internal::IsPandasTimestamp(obj)) {
+            // pd.Timestamp value attribute contains the offset from unix epoch
+            // so no adjustment for timezone is need.
+            OwnedRef nanos(PyObject_GetAttrString(obj, "value"));
+            RETURN_IF_PYERROR();
+            RETURN_NOT_OK(internal::CIntFromPython(nanos.obj(), &value));
+          } else {
+            // Conversion to nanoseconds can overflow -> check multiply of microseconds
+            value = internal::PyDateTime_to_us(dt);
+            if (arrow::internal::MultiplyWithOverflow(value, 1000LL, &value)) {
+              return internal::InvalidValue(obj,
+                                            "out of bounds for nanosecond resolution");
+            }
+
+            // Adjust with offset and check for overflow
+            if (arrow::internal::SubtractWithOverflow(value, offset * 1000000000LL,
+                                                      &value)) {
+              return internal::InvalidValue(obj,
+                                            "out of bounds for nanosecond resolution");
+            }
+          }
+          break;
+        default:
+          return Status::UnknownError("Invalid time unit");
+      }
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      // validate that the numpy scalar has np.datetime64 dtype
+      std::shared_ptr<DataType> numpy_type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      if (!numpy_type->Equals(*type)) {
+        return Status::NotImplemented("Expected np.datetime64 but got: ",
+                                      numpy_type->ToString());
+      }
+      return reinterpret_cast<PyDatetimeScalarObject*>(obj)->obval;
+    } else {
+      RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
+    }
+    return value;
+  }
+
+  static Result<MonthDayNanoIntervalType::MonthDayNanos> Convert(
+      const MonthDayNanoIntervalType* /*type*/, const O& /*options*/, I obj) {
+    MonthDayNanoIntervalType::MonthDayNanos output;
+    bool found_attrs = false;
+    RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kMonths>::Field(
+        obj, &output.months, &found_attrs));
+    // on relativeoffset weeks is a property calculated from days.  On
+    // DateOffset is is a field on its own. timedelta doesn't have a weeks
+    // attribute.
+    PyObject* pandas_date_offset_type = internal::BorrowPandasDataOffsetType();
+    bool is_date_offset = pandas_date_offset_type == (PyObject*)Py_TYPE(obj);
+    if (!is_date_offset) {
+      RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kDaysOnly>::Field(
+          obj, &output.days, &found_attrs));
+    } else {
+      RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kWeeksAndDays>::Field(
+          obj, &output.days, &found_attrs));
+    }
+    RETURN_NOT_OK(PopulateMonthDayNano<MonthDayNanoField::kNanoseconds>::Field(
+        obj, &output.nanoseconds, &found_attrs));
+
+    // date_offset can have zero fields.
+    if (found_attrs || is_date_offset) {
+      return output;
+    }
+    if (PyTuple_Check(obj) && PyTuple_Size(obj) == 3) {
+      RETURN_NOT_OK(internal::CIntFromPython(PyTuple_GET_ITEM(obj, 0), &output.months,
+                                             "Months (tuple item #0) too large"));
+      RETURN_NOT_OK(internal::CIntFromPython(PyTuple_GET_ITEM(obj, 1), &output.days,
+                                             "Days (tuple item #1) too large"));
+      RETURN_NOT_OK(internal::CIntFromPython(PyTuple_GET_ITEM(obj, 2),
+                                             &output.nanoseconds,
+                                             "Nanoseconds (tuple item #2) too large"));
+      return output;
+    }
+    return Status::TypeError("No temporal attributes found on object.");
+  }
+
+  static Result<int64_t> Convert(const DurationType* type, const O&, I obj) {
+    int64_t value;
+    if (PyDelta_Check(obj)) {
+      auto dt = reinterpret_cast<PyDateTime_Delta*>(obj);
+      switch (type->unit()) {
+        case TimeUnit::SECOND:
+          value = internal::PyDelta_to_s(dt);
+          break;
+        case TimeUnit::MILLI:
+          value = internal::PyDelta_to_ms(dt);
+          break;
+        case TimeUnit::MICRO: {
+          ARROW_ASSIGN_OR_RAISE(value, internal::PyDelta_to_us(dt));
+          break;
+        }
+        case TimeUnit::NANO:
+          if (internal::IsPandasTimedelta(obj)) {
+            OwnedRef nanos(PyObject_GetAttrString(obj, "value"));
+            RETURN_IF_PYERROR();
+            RETURN_NOT_OK(internal::CIntFromPython(nanos.obj(), &value));
+          } else {
+            ARROW_ASSIGN_OR_RAISE(value, internal::PyDelta_to_ns(dt));
+          }
+          break;
+        default:
+          return Status::UnknownError("Invalid time unit");
+      }
+    } else if (PyArray_CheckAnyScalarExact(obj)) {
+      // validate that the numpy scalar has np.datetime64 dtype
+      std::shared_ptr<DataType> numpy_type;
+      RETURN_NOT_OK(NumPyDtypeToArrow(PyArray_DescrFromScalar(obj), &numpy_type));
+      if (!numpy_type->Equals(*type)) {
+        return Status::NotImplemented("Expected np.timedelta64 but got: ",
+                                      numpy_type->ToString());
+      }
+      return reinterpret_cast<PyTimedeltaScalarObject*>(obj)->obval;
+    } else {
+      RETURN_NOT_OK(internal::CIntFromPython(obj, &value));
+    }
+    return value;
+  }
+
+  // The binary-like intermediate representation is PyBytesView because it keeps temporary
+  // python objects alive (non-contiguous memoryview) and stores whether the original
+  // object was unicode encoded or not, which is used for unicode -> bytes coersion if
+  // there is a non-unicode object observed.
+
+  static Status Convert(const BaseBinaryType*, const O&, I obj, PyBytesView& view) {
+    return view.ParseString(obj);
+  }
+
+  static Status Convert(const FixedSizeBinaryType* type, const O&, I obj,
+                        PyBytesView& view) {
+    ARROW_RETURN_NOT_OK(view.ParseString(obj));
+    if (view.size != type->byte_width()) {
+      std::stringstream ss;
+      ss << "expected to be length " << type->byte_width() << " was " << view.size;
+      return internal::InvalidValue(obj, ss.str());
+    } else {
+      return Status::OK();
+    }
+  }
+
+  template <typename T>
+  static enable_if_string<T, Status> Convert(const T*, const O& options, I obj,
+                                             PyBytesView& view) {
+    if (options.strict) {
+      // Strict conversion, force output to be unicode / utf8 and validate that
+      // any binary values are utf8
+      ARROW_RETURN_NOT_OK(view.ParseString(obj, true));
+      if (!view.is_utf8) {
+        return internal::InvalidValue(obj, "was not a utf8 string");
+      }
+      return Status::OK();
+    } else {
+      // Non-strict conversion; keep track of whether values are unicode or bytes
+      return view.ParseString(obj);
+    }
+  }
+
+  static Result<bool> Convert(const DataType* type, const O&, I obj) {
+    return Status::NotImplemented("PyValue::Convert is not implemented for type ", type);
+  }
+};
+
+// The base Converter class is a mixin with predefined behavior and constructors.
+class PyConverter : public Converter<PyObject*, PyConversionOptions> {
+ public:
+  // Iterate over the input values and defer the conversion to the Append method
+  Status Extend(PyObject* values, int64_t size, int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
+    /// Ensure we've allocated enough space
+    RETURN_NOT_OK(this->Reserve(size - offset));
+    // Iterate over the items adding each one
+    return internal::VisitSequence(
+        values, offset,
+        [this](PyObject* item, bool* /* unused */) { return this->Append(item); });
+  }
+
+  // Convert and append a sequence of values masked with a numpy array
+  Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size,
+                      int64_t offset = 0) override {
+    DCHECK_GE(size, offset);
+    /// Ensure we've allocated enough space
+    RETURN_NOT_OK(this->Reserve(size - offset));
+    // Iterate over the items adding each one
+    return internal::VisitSequenceMasked(
+        values, mask, offset, [this](PyObject* item, bool is_masked, bool* /* unused */) {
+          if (is_masked) {
+            return this->AppendNull();
+          } else {
+            // This will also apply the null-checking convention in the event
+            // that the value is not masked
+            return this->Append(item);  // perhaps use AppendValue instead?
+          }
+        });
+  }
+};
+
+template <typename T, typename Enable = void>
+class PyPrimitiveConverter;
+
+template <typename T>
+class PyListConverter;
+
+template <typename U, typename Enable = void>
+class PyDictionaryConverter;
+
+class PyStructConverter;
+
+template <typename T, typename Enable = void>
+struct PyConverterTrait;
+
+template <typename T>
+struct PyConverterTrait<
+    T, enable_if_t<(!is_nested_type<T>::value && !is_interval_type<T>::value &&
+                    !is_extension_type<T>::value) ||
+                   std::is_same<T, MonthDayNanoIntervalType>::value>> {
+  using type = PyPrimitiveConverter<T>;
+};
+
+template <typename T>
+struct PyConverterTrait<T, enable_if_list_like<T>> {
+  using type = PyListConverter<T>;
+};
+
+template <>
+struct PyConverterTrait<StructType> {
+  using type = PyStructConverter;
+};
+
+template <>
+struct PyConverterTrait<DictionaryType> {
+  template <typename T>
+  using dictionary_type = PyDictionaryConverter<T>;
+};
+
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_null<T>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->primitive_builder_->AppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      if (scalar->is_valid) {
+        return Status::Invalid("Cannot append scalar of type ", scalar->type->ToString(),
+                               " to builder for type null");
+      } else {
+        return this->primitive_builder_->AppendNull();
+      }
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      return this->primitive_builder_->Append(converted);
+    }
+  }
+};
+
+template <typename T>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_boolean_type<T>::value || is_number_type<T>::value ||
+                   is_decimal_type<T>::value || is_date_type<T>::value ||
+                   is_time_type<T>::value ||
+                   std::is_same<MonthDayNanoIntervalType, T>::value>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    // Since the required space has been already allocated in the Extend functions we can
+    // rely on the Unsafe builder API which improves the performance.
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      this->primitive_builder_->UnsafeAppend(converted);
+    }
+    return Status::OK();
+  }
+};
+
+template <typename T>
+class PyPrimitiveConverter<
+    T, enable_if_t<is_timestamp_type<T>::value || is_duration_type<T>::value>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+    } else {
+      ARROW_ASSIGN_OR_RAISE(
+          auto converted, PyValue::Convert(this->primitive_type_, this->options_, value));
+      // Numpy NaT sentinels can be checked after the conversion
+      if (PyArray_CheckAnyScalarExact(value) &&
+          PyValue::IsNaT(this->primitive_type_, converted)) {
+        this->primitive_builder_->UnsafeAppendNull();
+      } else {
+        this->primitive_builder_->UnsafeAppend(converted);
+      }
+    }
+    return Status::OK();
+  }
+};
+
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_t<std::is_same<T, FixedSizeBinaryType>::value>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
+      this->primitive_builder_->UnsafeAppend(view_.bytes);
+    }
+    return Status::OK();
+  }
+
+ protected:
+  PyBytesView view_;
+};
+
+template <typename T>
+class PyPrimitiveConverter<T, enable_if_base_binary<T>>
+    : public PrimitiveConverter<T, PyConverter> {
+ public:
+  using OffsetType = typename T::offset_type;
+
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      this->primitive_builder_->UnsafeAppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->AppendScalar(*scalar));
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->primitive_type_, this->options_, value, view_));
+      if (!view_.is_utf8) {
+        // observed binary value
+        observed_binary_ = true;
+      }
+      // Since we don't know the varying length input size in advance, we need to
+      // reserve space in the value builder one by one. ReserveData raises CapacityError
+      // if the value would not fit into the array.
+      ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size));
+      this->primitive_builder_->UnsafeAppend(view_.bytes,
+                                             static_cast<OffsetType>(view_.size));
+    }
+    return Status::OK();
+  }
+
+  Result<std::shared_ptr<Array>> ToArray() override {
+    ARROW_ASSIGN_OR_RAISE(auto array, (PrimitiveConverter<T, PyConverter>::ToArray()));
+    if (observed_binary_) {
+      // if we saw any non-unicode, cast results to BinaryArray
+      auto binary_type = TypeTraits<typename T::PhysicalType>::type_singleton();
+      return array->View(binary_type);
+    } else {
+      return array;
+    }
+  }
+
+ protected:
+  PyBytesView view_;
+  bool observed_binary_ = false;
+};
+
+template <typename U>
+class PyDictionaryConverter<U, enable_if_has_c_type<U>>
+    : public DictionaryConverter<U, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->value_builder_->AppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      return this->value_builder_->AppendScalar(*scalar, 1);
+    } else {
+      ARROW_ASSIGN_OR_RAISE(auto converted,
+                            PyValue::Convert(this->value_type_, this->options_, value));
+      return this->value_builder_->Append(converted);
+    }
+  }
+};
+
+template <typename U>
+class PyDictionaryConverter<U, enable_if_has_string_view<U>>
+    : public DictionaryConverter<U, PyConverter> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->value_builder_->AppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      return this->value_builder_->AppendScalar(*scalar, 1);
+    } else {
+      ARROW_RETURN_NOT_OK(
+          PyValue::Convert(this->value_type_, this->options_, value, view_));
+      return this->value_builder_->Append(view_.bytes, static_cast<int32_t>(view_.size));
+    }
+  }
+
+ protected:
+  PyBytesView view_;
+};
+
+template <typename T>
+class PyListConverter : public ListConverter<T, PyConverter, PyConverterTrait> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->list_builder_->AppendNull();
+    }
+
+    RETURN_NOT_OK(this->list_builder_->Append());
+    if (PyArray_Check(value)) {
+      RETURN_NOT_OK(AppendNdarray(value));
+    } else if (PySequence_Check(value)) {
+      RETURN_NOT_OK(AppendSequence(value));
+    } else if (PySet_Check(value) || (Py_TYPE(value) == &PyDictValues_Type)) {
+      RETURN_NOT_OK(AppendIterable(value));
+    } else if (PyDict_Check(value) && this->type()->id() == Type::MAP) {
+      // Branch to support Python Dict with `map` DataType.
+      auto items = PyDict_Items(value);
+      OwnedRef item_ref(items);
+      RETURN_NOT_OK(AppendSequence(items));
+    } else {
+      return internal::InvalidType(
+          value, "was not a sequence or recognized null for conversion to list type");
+    }
+
+    return ValidateBuilder(this->list_type_);
+  }
+
+ protected:
+  Status ValidateBuilder(const MapType*) {
+    if (this->list_builder_->key_builder()->null_count() > 0) {
+      return Status::Invalid("Invalid Map: key field can not contain null values");
+    } else {
+      return Status::OK();
+    }
+  }
+
+  Status ValidateBuilder(const BaseListType*) { return Status::OK(); }
+
+  Status AppendSequence(PyObject* value) {
+    int64_t size = static_cast<int64_t>(PySequence_Size(value));
+    RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
+    return this->value_converter_->Extend(value, size);
+  }
+
+  Status AppendIterable(PyObject* value) {
+    PyObject* iterator = PyObject_GetIter(value);
+    OwnedRef iter_ref(iterator);
+    while (PyObject* item = PyIter_Next(iterator)) {
+      OwnedRef item_ref(item);
+      RETURN_NOT_OK(this->value_converter_->Reserve(1));
+      RETURN_NOT_OK(this->value_converter_->Append(item));
+    }
+    return Status::OK();
+  }
+
+  Status AppendNdarray(PyObject* value) {
+    PyArrayObject* ndarray = reinterpret_cast<PyArrayObject*>(value);
+    if (PyArray_NDIM(ndarray) != 1) {
+      return Status::Invalid("Can only convert 1-dimensional array values");
+    }
+    const int64_t size = PyArray_SIZE(ndarray);
+    RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size));
+
+    const auto value_type = this->value_converter_->builder()->type();
+    switch (value_type->id()) {
+// If the value type does not match the expected NumPy dtype, then fall through
+// to a slower PySequence-based path
+#define LIST_FAST_CASE(TYPE_ID, TYPE, NUMPY_TYPE)         \
+  case Type::TYPE_ID: {                                   \
+    if (PyArray_DESCR(ndarray)->type_num != NUMPY_TYPE) { \
+      return this->value_converter_->Extend(value, size); \
+    }                                                     \
+    return AppendNdarrayTyped<TYPE, NUMPY_TYPE>(ndarray); \
+  }
+      LIST_FAST_CASE(BOOL, BooleanType, NPY_BOOL)
+      LIST_FAST_CASE(UINT8, UInt8Type, NPY_UINT8)
+      LIST_FAST_CASE(INT8, Int8Type, NPY_INT8)
+      LIST_FAST_CASE(UINT16, UInt16Type, NPY_UINT16)
+      LIST_FAST_CASE(INT16, Int16Type, NPY_INT16)
+      LIST_FAST_CASE(UINT32, UInt32Type, NPY_UINT32)
+      LIST_FAST_CASE(INT32, Int32Type, NPY_INT32)
+      LIST_FAST_CASE(UINT64, UInt64Type, NPY_UINT64)
+      LIST_FAST_CASE(INT64, Int64Type, NPY_INT64)
+      LIST_FAST_CASE(HALF_FLOAT, HalfFloatType, NPY_FLOAT16)
+      LIST_FAST_CASE(FLOAT, FloatType, NPY_FLOAT)
+      LIST_FAST_CASE(DOUBLE, DoubleType, NPY_DOUBLE)
+      LIST_FAST_CASE(TIMESTAMP, TimestampType, NPY_DATETIME)
+      LIST_FAST_CASE(DURATION, DurationType, NPY_TIMEDELTA)
+#undef LIST_FAST_CASE
+      default: {
+        return this->value_converter_->Extend(value, size);
+      }
+    }
+  }
+
+  template <typename ArrowType, int NUMPY_TYPE>
+  Status AppendNdarrayTyped(PyArrayObject* ndarray) {
+    // no need to go through the conversion
+    using NumpyTrait = internal::npy_traits<NUMPY_TYPE>;
+    using NumpyType = typename NumpyTrait::value_type;
+    using ValueBuilderType = typename TypeTraits<ArrowType>::BuilderType;
+
+    const bool null_sentinels_possible =
+        // Always treat Numpy's NaT as null
+        NUMPY_TYPE == NPY_DATETIME || NUMPY_TYPE == NPY_TIMEDELTA ||
+        // Observing pandas's null sentinels
+        (this->options_.from_pandas && NumpyTrait::supports_nulls);
+
+    auto value_builder =
+        checked_cast<ValueBuilderType*>(this->value_converter_->builder().get());
+
+    Ndarray1DIndexer<NumpyType> values(ndarray);
+    if (null_sentinels_possible) {
+      for (int64_t i = 0; i < values.size(); ++i) {
+        if (NumpyTrait::isnull(values[i])) {
+          RETURN_NOT_OK(value_builder->AppendNull());
+        } else {
+          RETURN_NOT_OK(value_builder->Append(values[i]));
+        }
+      }
+    } else if (!values.is_strided()) {
+      RETURN_NOT_OK(value_builder->AppendValues(values.data(), values.size()));
+    } else {
+      for (int64_t i = 0; i < values.size(); ++i) {
+        RETURN_NOT_OK(value_builder->Append(values[i]));
+      }
+    }
+    return Status::OK();
+  }
+};
+
+class PyStructConverter : public StructConverter<PyConverter, PyConverterTrait> {
+ public:
+  Status Append(PyObject* value) override {
+    if (PyValue::IsNull(this->options_, value)) {
+      return this->struct_builder_->AppendNull();
+    } else if (arrow::py::is_scalar(value)) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> scalar,
+                            arrow::py::unwrap_scalar(value));
+      return this->struct_builder_->AppendScalar(*scalar);
+    }
+    switch (input_kind_) {
+      case InputKind::DICT:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendDict(value);
+      case InputKind::TUPLE:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendTuple(value);
+      case InputKind::ITEMS:
+        RETURN_NOT_OK(this->struct_builder_->Append());
+        return AppendItems(value);
+      default:
+        RETURN_NOT_OK(InferInputKind(value));
+        return Append(value);
+    }
+  }
+
+ protected:
+  Status Init(MemoryPool* pool) override {
+    RETURN_NOT_OK((StructConverter<PyConverter, PyConverterTrait>::Init(pool)));
+
+    // Store the field names as a PyObjects for dict matching
+    num_fields_ = this->struct_type_->num_fields();
+    bytes_field_names_.reset(PyList_New(num_fields_));
+    unicode_field_names_.reset(PyList_New(num_fields_));
+    RETURN_IF_PYERROR();
+
+    for (int i = 0; i < num_fields_; i++) {
+      const auto& field_name = this->struct_type_->field(i)->name();
+      PyObject* bytes = PyBytes_FromStringAndSize(field_name.c_str(), field_name.size());
+      PyObject* unicode =
+          PyUnicode_FromStringAndSize(field_name.c_str(), field_name.size());
+      RETURN_IF_PYERROR();
+      PyList_SET_ITEM(bytes_field_names_.obj(), i, bytes);
+      PyList_SET_ITEM(unicode_field_names_.obj(), i, unicode);
+    }
+    return Status::OK();
+  }
+
+  Status InferInputKind(PyObject* value) {
+    // Infer input object's type, note that heterogeneous sequences are not allowed
+    if (PyDict_Check(value)) {
+      input_kind_ = InputKind::DICT;
+    } else if (PyTuple_Check(value)) {
+      input_kind_ = InputKind::TUPLE;
+    } else if (PySequence_Check(value)) {
+      input_kind_ = InputKind::ITEMS;
+    } else {
+      return internal::InvalidType(value,
+                                   "was not a dict, tuple, or recognized null value "
+                                   "for conversion to struct type");
+    }
+    return Status::OK();
+  }
+
+  Status InferKeyKind(PyObject* items) {
+    for (int i = 0; i < PySequence_Length(items); i++) {
+      // retrieve the key from the passed key-value pairs
+      ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
+
+      // check key exists between the unicode field names
+      bool do_contain = PySequence_Contains(unicode_field_names_.obj(), pair.first);
+      RETURN_IF_PYERROR();
+      if (do_contain) {
+        key_kind_ = KeyKind::UNICODE;
+        return Status::OK();
+      }
+
+      // check key exists between the bytes field names
+      do_contain = PySequence_Contains(bytes_field_names_.obj(), pair.first);
+      RETURN_IF_PYERROR();
+      if (do_contain) {
+        key_kind_ = KeyKind::BYTES;
+        return Status::OK();
+      }
+    }
+    return Status::OK();
+  }
+
+  Status AppendEmpty() {
+    for (int i = 0; i < num_fields_; i++) {
+      RETURN_NOT_OK(this->children_[i]->Append(Py_None));
+    }
+    return Status::OK();
+  }
+
+  Status AppendTuple(PyObject* tuple) {
+    if (!PyTuple_Check(tuple)) {
+      return internal::InvalidType(tuple, "was expecting a tuple");
+    }
+    if (PyTuple_GET_SIZE(tuple) != num_fields_) {
+      return Status::Invalid("Tuple size must be equal to number of struct fields");
+    }
+    for (int i = 0; i < num_fields_; i++) {
+      PyObject* value = PyTuple_GET_ITEM(tuple, i);
+      RETURN_NOT_OK(this->children_[i]->Append(value));
+    }
+    return Status::OK();
+  }
+
+  Status AppendDict(PyObject* dict) {
+    if (!PyDict_Check(dict)) {
+      return internal::InvalidType(dict, "was expecting a dict");
+    }
+    switch (key_kind_) {
+      case KeyKind::UNICODE:
+        return AppendDict(dict, unicode_field_names_.obj());
+      case KeyKind::BYTES:
+        return AppendDict(dict, bytes_field_names_.obj());
+      default:
+        RETURN_NOT_OK(InferKeyKind(PyDict_Items(dict)));
+        if (key_kind_ == KeyKind::UNKNOWN) {
+          // was unable to infer the type which means that all keys are absent
+          return AppendEmpty();
+        } else {
+          return AppendDict(dict);
+        }
+    }
+  }
+
+  Status AppendItems(PyObject* items) {
+    if (!PySequence_Check(items)) {
+      return internal::InvalidType(items, "was expecting a sequence of key-value items");
+    }
+    switch (key_kind_) {
+      case KeyKind::UNICODE:
+        return AppendItems(items, unicode_field_names_.obj());
+      case KeyKind::BYTES:
+        return AppendItems(items, bytes_field_names_.obj());
+      default:
+        RETURN_NOT_OK(InferKeyKind(items));
+        if (key_kind_ == KeyKind::UNKNOWN) {
+          // was unable to infer the type which means that all keys are absent
+          return AppendEmpty();
+        } else {
+          return AppendItems(items);
+        }
+    }
+  }
+
+  Status AppendDict(PyObject* dict, PyObject* field_names) {
+    // NOTE we're ignoring any extraneous dict items
+    for (int i = 0; i < num_fields_; i++) {
+      PyObject* name = PyList_GET_ITEM(field_names, i);  // borrowed
+      PyObject* value = PyDict_GetItem(dict, name);      // borrowed
+      if (value == NULL) {
+        RETURN_IF_PYERROR();
+      }
+      RETURN_NOT_OK(this->children_[i]->Append(value ? value : Py_None));
+    }
+    return Status::OK();
+  }
+
+  Result<std::pair<PyObject*, PyObject*>> GetKeyValuePair(PyObject* seq, int index) {
+    PyObject* pair = PySequence_GetItem(seq, index);
+    RETURN_IF_PYERROR();
+    if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
+      return internal::InvalidType(pair, "was expecting tuple of (key, value) pair");
+    }
+    PyObject* key = PyTuple_GetItem(pair, 0);
+    RETURN_IF_PYERROR();
+    PyObject* value = PyTuple_GetItem(pair, 1);
+    RETURN_IF_PYERROR();
+    return std::make_pair(key, value);
+  }
+
+  Status AppendItems(PyObject* items, PyObject* field_names) {
+    auto length = static_cast<int>(PySequence_Size(items));
+    RETURN_IF_PYERROR();
+
+    // append the values for the defined fields
+    for (int i = 0; i < std::min(num_fields_, length); i++) {
+      // retrieve the key-value pair
+      ARROW_ASSIGN_OR_RAISE(auto pair, GetKeyValuePair(items, i));
+
+      // validate that the key and the field name are equal
+      PyObject* name = PyList_GET_ITEM(field_names, i);
+      bool are_equal = PyObject_RichCompareBool(pair.first, name, Py_EQ);
+      RETURN_IF_PYERROR();
+
+      // finally append to the respective child builder
+      if (are_equal) {
+        RETURN_NOT_OK(this->children_[i]->Append(pair.second));
+      } else {
+        ARROW_ASSIGN_OR_RAISE(auto key_view, PyBytesView::FromString(pair.first));
+        ARROW_ASSIGN_OR_RAISE(auto name_view, PyBytesView::FromString(name));
+        return Status::Invalid("The expected field name is `", name_view.bytes, "` but `",
+                               key_view.bytes, "` was given");
+      }
+    }
+    // insert null values for missing fields
+    for (int i = length; i < num_fields_; i++) {
+      RETURN_NOT_OK(this->children_[i]->AppendNull());
+    }
+    return Status::OK();
+  }
+
+  // Whether we're converting from a sequence of dicts or tuples or list of pairs
+  enum class InputKind { UNKNOWN, DICT, TUPLE, ITEMS } input_kind_ = InputKind::UNKNOWN;
+  // Whether the input dictionary keys' type is python bytes or unicode
+  enum class KeyKind { UNKNOWN, BYTES, UNICODE } key_kind_ = KeyKind::UNKNOWN;
+  // Store the field names as a PyObjects for dict matching
+  OwnedRef bytes_field_names_;
+  OwnedRef unicode_field_names_;
+  // Store the number of fields for later reuse
+  int num_fields_;
+};
+
+// Convert *obj* to a sequence if necessary
+// Fill *size* to its length.  If >= 0 on entry, *size* is an upper size
+// bound that may lead to truncation.
+Status ConvertToSequenceAndInferSize(PyObject* obj, PyObject** seq, int64_t* size) {
+  if (PySequence_Check(obj)) {
+    // obj is already a sequence
+    int64_t real_size = static_cast<int64_t>(PySequence_Size(obj));
+    RETURN_IF_PYERROR();
+    if (*size < 0) {
+      *size = real_size;
+    } else {
+      *size = std::min(real_size, *size);
+    }
+    Py_INCREF(obj);
+    *seq = obj;
+  } else if (*size < 0) {
+    // unknown size, exhaust iterator
+    *seq = PySequence_List(obj);
+    RETURN_IF_PYERROR();
+    *size = static_cast<int64_t>(PyList_GET_SIZE(*seq));
+  } else {
+    // size is known but iterator could be infinite
+    Py_ssize_t i, n = *size;
+    PyObject* iter = PyObject_GetIter(obj);
+    RETURN_IF_PYERROR();
+    OwnedRef iter_ref(iter);
+    PyObject* lst = PyList_New(n);
+    RETURN_IF_PYERROR();
+    for (i = 0; i < n; i++) {
+      PyObject* item = PyIter_Next(iter);
+      if (!item) {
+        // either an error occurred or the iterator ended
+        RETURN_IF_PYERROR();
+        break;
+      }
+      PyList_SET_ITEM(lst, i, item);
+    }
+    // Shrink list if len(iterator) < size
+    if (i < n && PyList_SetSlice(lst, i, n, NULL)) {
+      Py_DECREF(lst);
+      RETURN_IF_PYERROR();
+    }
+    *seq = lst;
+    *size = std::min<int64_t>(i, *size);
+  }
+  return Status::OK();
+}
+
+}  // namespace
+
+Result<std::shared_ptr<ChunkedArray>> ConvertPySequence(PyObject* obj, PyObject* mask,
+                                                        PyConversionOptions options,
+                                                        MemoryPool* pool) {
+  PyAcquireGIL lock;
+
+  PyObject* seq = nullptr;
+  OwnedRef tmp_seq_nanny;
+
+  ARROW_ASSIGN_OR_RAISE(auto is_pandas_imported, internal::IsModuleImported("pandas"));
+  if (is_pandas_imported) {
+    // If pandas has been already imported initialize the static pandas objects to
+    // support converting from pd.Timedelta and pd.Timestamp objects
+    internal::InitPandasStaticData();
+  }
+
+  int64_t size = options.size;
+  RETURN_NOT_OK(ConvertToSequenceAndInferSize(obj, &seq, &size));
+  tmp_seq_nanny.reset(seq);
+
+  // In some cases, type inference may be "loose", like strings. If the user
+  // passed pa.string(), then we will error if we encounter any non-UTF8
+  // value. If not, then we will allow the result to be a BinaryArray
+  if (options.type == nullptr) {
+    ARROW_ASSIGN_OR_RAISE(options.type, InferArrowType(seq, mask, options.from_pandas));
+    options.strict = false;
+  } else {
+    options.strict = true;
+  }
+  DCHECK_GE(size, 0);
+
+  ARROW_ASSIGN_OR_RAISE(auto converter, (MakeConverter<PyConverter, PyConverterTrait>(
+                                            options.type, options, pool)));
+  if (converter->may_overflow()) {
+    // The converter hierarchy contains binary- or list-like builders which can overflow
+    // depending on the input values. Wrap the converter with a chunker which detects
+    // the overflow and automatically creates new chunks.
+    ARROW_ASSIGN_OR_RAISE(auto chunked_converter, MakeChunker(std::move(converter)));
+    if (mask != nullptr && mask != Py_None) {
+      RETURN_NOT_OK(chunked_converter->ExtendMasked(seq, mask, size));
+    } else {
+      RETURN_NOT_OK(chunked_converter->Extend(seq, size));
+    }
+    return chunked_converter->ToChunkedArray();
+  } else {
+    // If the converter can't overflow spare the capacity error checking on the hot-path,
+    // this improves the performance roughly by ~10% for primitive types.
+    if (mask != nullptr && mask != Py_None) {
+      RETURN_NOT_OK(converter->ExtendMasked(seq, mask, size));
+    } else {
+      RETURN_NOT_OK(converter->Extend(seq, size));
+    }
+    return converter->ToChunkedArray();
+  }
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.h b/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.h
index d737047..d167996 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/python_to_arrow.h
@@ -25,9 +25,9 @@
 #include <cstdint>
 #include <memory>
 
+#include "arrow/python/visibility.h"
 #include "arrow/type.h"
 #include "arrow/util/macros.h"
-#include "arrow/python/visibility.h"
 
 #include "arrow/python/common.h"
 
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.cc
new file mode 100644
index 0000000..ad079cb
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.cc
@@ -0,0 +1,798 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/serialize.h"
+#include "arrow/python/numpy_interop.h"
+
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <numpy/arrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include "arrow/array.h"
+#include "arrow/array/builder_binary.h"
+#include "arrow/array/builder_nested.h"
+#include "arrow/array/builder_primitive.h"
+#include "arrow/array/builder_union.h"
+#include "arrow/io/interfaces.h"
+#include "arrow/io/memory.h"
+#include "arrow/ipc/util.h"
+#include "arrow/ipc/writer.h"
+#include "arrow/record_batch.h"
+#include "arrow/result.h"
+#include "arrow/tensor.h"
+#include "arrow/util/logging.h"
+
+#include "arrow/python/common.h"
+#include "arrow/python/datetime.h"
+#include "arrow/python/helpers.h"
+#include "arrow/python/iterators.h"
+#include "arrow/python/numpy_convert.h"
+#include "arrow/python/platform.h"
+#include "arrow/python/pyarrow.h"
+
+constexpr int32_t kMaxRecursionDepth = 100;
+
+namespace arrow {
+
+using internal::checked_cast;
+
+namespace py {
+
+class SequenceBuilder;
+class DictBuilder;
+
+Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
+              int32_t recursion_depth, SerializedPyObject* blobs_out);
+
+// A Sequence is a heterogeneous collections of elements. It can contain
+// scalar Python types, lists, tuples, dictionaries, tensors and sparse tensors.
+class SequenceBuilder {
+ public:
+  explicit SequenceBuilder(MemoryPool* pool = default_memory_pool())
+      : pool_(pool),
+        types_(::arrow::int8(), pool),
+        offsets_(::arrow::int32(), pool),
+        type_map_(PythonType::NUM_PYTHON_TYPES, -1) {
+    auto null_builder = std::make_shared<NullBuilder>(pool);
+    auto initial_ty = dense_union({field("0", null())});
+    builder_.reset(new DenseUnionBuilder(pool, {null_builder}, initial_ty));
+  }
+
+  // Appending a none to the sequence
+  Status AppendNone() { return builder_->AppendNull(); }
+
+  template <typename BuilderType, typename MakeBuilderFn>
+  Status CreateAndUpdate(std::shared_ptr<BuilderType>* child_builder, int8_t tag,
+                         MakeBuilderFn make_builder) {
+    if (!*child_builder) {
+      child_builder->reset(make_builder());
+      std::ostringstream convert;
+      convert.imbue(std::locale::classic());
+      convert << static_cast<int>(tag);
+      type_map_[tag] = builder_->AppendChild(*child_builder, convert.str());
+    }
+    return builder_->Append(type_map_[tag]);
+  }
+
+  template <typename BuilderType, typename T>
+  Status AppendPrimitive(std::shared_ptr<BuilderType>* child_builder, const T val,
+                         int8_t tag) {
+    RETURN_NOT_OK(
+        CreateAndUpdate(child_builder, tag, [this]() { return new BuilderType(pool_); }));
+    return (*child_builder)->Append(val);
+  }
+
+  // Appending a boolean to the sequence
+  Status AppendBool(const bool data) {
+    return AppendPrimitive(&bools_, data, PythonType::BOOL);
+  }
+
+  // Appending an int64_t to the sequence
+  Status AppendInt64(const int64_t data) {
+    return AppendPrimitive(&ints_, data, PythonType::INT);
+  }
+
+  // Append a list of bytes to the sequence
+  Status AppendBytes(const uint8_t* data, int32_t length) {
+    RETURN_NOT_OK(CreateAndUpdate(&bytes_, PythonType::BYTES,
+                                  [this]() { return new BinaryBuilder(pool_); }));
+    return bytes_->Append(data, length);
+  }
+
+  // Appending a string to the sequence
+  Status AppendString(const char* data, int32_t length) {
+    RETURN_NOT_OK(CreateAndUpdate(&strings_, PythonType::STRING,
+                                  [this]() { return new StringBuilder(pool_); }));
+    return strings_->Append(data, length);
+  }
+
+  // Appending a half_float to the sequence
+  Status AppendHalfFloat(const npy_half data) {
+    return AppendPrimitive(&half_floats_, data, PythonType::HALF_FLOAT);
+  }
+
+  // Appending a float to the sequence
+  Status AppendFloat(const float data) {
+    return AppendPrimitive(&floats_, data, PythonType::FLOAT);
+  }
+
+  // Appending a double to the sequence
+  Status AppendDouble(const double data) {
+    return AppendPrimitive(&doubles_, data, PythonType::DOUBLE);
+  }
+
+  // Appending a Date64 timestamp to the sequence
+  Status AppendDate64(const int64_t timestamp) {
+    return AppendPrimitive(&date64s_, timestamp, PythonType::DATE64);
+  }
+
+  // Appending a tensor to the sequence
+  //
+  // \param tensor_index Index of the tensor in the object.
+  Status AppendTensor(const int32_t tensor_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&tensor_indices_, PythonType::TENSOR,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return tensor_indices_->Append(tensor_index);
+  }
+
+  // Appending a sparse coo tensor to the sequence
+  //
+  // \param sparse_coo_tensor_index Index of the sparse coo tensor in the object.
+  Status AppendSparseCOOTensor(const int32_t sparse_coo_tensor_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&sparse_coo_tensor_indices_,
+                                  PythonType::SPARSECOOTENSOR,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return sparse_coo_tensor_indices_->Append(sparse_coo_tensor_index);
+  }
+
+  // Appending a sparse csr matrix to the sequence
+  //
+  // \param sparse_csr_matrix_index Index of the sparse csr matrix in the object.
+  Status AppendSparseCSRMatrix(const int32_t sparse_csr_matrix_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&sparse_csr_matrix_indices_,
+                                  PythonType::SPARSECSRMATRIX,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return sparse_csr_matrix_indices_->Append(sparse_csr_matrix_index);
+  }
+
+  // Appending a sparse csc matrix to the sequence
+  //
+  // \param sparse_csc_matrix_index Index of the sparse csc matrix in the object.
+  Status AppendSparseCSCMatrix(const int32_t sparse_csc_matrix_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&sparse_csc_matrix_indices_,
+                                  PythonType::SPARSECSCMATRIX,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return sparse_csc_matrix_indices_->Append(sparse_csc_matrix_index);
+  }
+
+  // Appending a sparse csf tensor to the sequence
+  //
+  // \param sparse_csf_tensor_index Index of the sparse csf tensor in the object.
+  Status AppendSparseCSFTensor(const int32_t sparse_csf_tensor_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&sparse_csf_tensor_indices_,
+                                  PythonType::SPARSECSFTENSOR,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return sparse_csf_tensor_indices_->Append(sparse_csf_tensor_index);
+  }
+
+  // Appending a numpy ndarray to the sequence
+  //
+  // \param tensor_index Index of the tensor in the object.
+  Status AppendNdarray(const int32_t ndarray_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&ndarray_indices_, PythonType::NDARRAY,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return ndarray_indices_->Append(ndarray_index);
+  }
+
+  // Appending a buffer to the sequence
+  //
+  // \param buffer_index Index of the buffer in the object.
+  Status AppendBuffer(const int32_t buffer_index) {
+    RETURN_NOT_OK(CreateAndUpdate(&buffer_indices_, PythonType::BUFFER,
+                                  [this]() { return new Int32Builder(pool_); }));
+    return buffer_indices_->Append(buffer_index);
+  }
+
+  Status AppendSequence(PyObject* context, PyObject* sequence, int8_t tag,
+                        std::shared_ptr<ListBuilder>& target_sequence,
+                        std::unique_ptr<SequenceBuilder>& values, int32_t recursion_depth,
+                        SerializedPyObject* blobs_out) {
+    if (recursion_depth >= kMaxRecursionDepth) {
+      return Status::NotImplemented(
+          "This object exceeds the maximum recursion depth. It may contain itself "
+          "recursively.");
+    }
+    RETURN_NOT_OK(CreateAndUpdate(&target_sequence, tag, [this, &values]() {
+      values.reset(new SequenceBuilder(pool_));
+      return new ListBuilder(pool_, values->builder());
+    }));
+    RETURN_NOT_OK(target_sequence->Append());
+    return internal::VisitIterable(
+        sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
+          return Append(context, obj, values.get(), recursion_depth, blobs_out);
+        });
+  }
+
+  Status AppendList(PyObject* context, PyObject* list, int32_t recursion_depth,
+                    SerializedPyObject* blobs_out) {
+    return AppendSequence(context, list, PythonType::LIST, lists_, list_values_,
+                          recursion_depth + 1, blobs_out);
+  }
+
+  Status AppendTuple(PyObject* context, PyObject* tuple, int32_t recursion_depth,
+                     SerializedPyObject* blobs_out) {
+    return AppendSequence(context, tuple, PythonType::TUPLE, tuples_, tuple_values_,
+                          recursion_depth + 1, blobs_out);
+  }
+
+  Status AppendSet(PyObject* context, PyObject* set, int32_t recursion_depth,
+                   SerializedPyObject* blobs_out) {
+    return AppendSequence(context, set, PythonType::SET, sets_, set_values_,
+                          recursion_depth + 1, blobs_out);
+  }
+
+  Status AppendDict(PyObject* context, PyObject* dict, int32_t recursion_depth,
+                    SerializedPyObject* blobs_out);
+
+  // Finish building the sequence and return the result.
+  // Input arrays may be nullptr
+  Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
+
+  std::shared_ptr<DenseUnionBuilder> builder() { return builder_; }
+
+ private:
+  MemoryPool* pool_;
+
+  Int8Builder types_;
+  Int32Builder offsets_;
+
+  /// Mapping from PythonType to child index
+  std::vector<int8_t> type_map_;
+
+  std::shared_ptr<BooleanBuilder> bools_;
+  std::shared_ptr<Int64Builder> ints_;
+  std::shared_ptr<BinaryBuilder> bytes_;
+  std::shared_ptr<StringBuilder> strings_;
+  std::shared_ptr<HalfFloatBuilder> half_floats_;
+  std::shared_ptr<FloatBuilder> floats_;
+  std::shared_ptr<DoubleBuilder> doubles_;
+  std::shared_ptr<Date64Builder> date64s_;
+
+  std::unique_ptr<SequenceBuilder> list_values_;
+  std::shared_ptr<ListBuilder> lists_;
+  std::unique_ptr<DictBuilder> dict_values_;
+  std::shared_ptr<ListBuilder> dicts_;
+  std::unique_ptr<SequenceBuilder> tuple_values_;
+  std::shared_ptr<ListBuilder> tuples_;
+  std::unique_ptr<SequenceBuilder> set_values_;
+  std::shared_ptr<ListBuilder> sets_;
+
+  std::shared_ptr<Int32Builder> tensor_indices_;
+  std::shared_ptr<Int32Builder> sparse_coo_tensor_indices_;
+  std::shared_ptr<Int32Builder> sparse_csr_matrix_indices_;
+  std::shared_ptr<Int32Builder> sparse_csc_matrix_indices_;
+  std::shared_ptr<Int32Builder> sparse_csf_tensor_indices_;
+  std::shared_ptr<Int32Builder> ndarray_indices_;
+  std::shared_ptr<Int32Builder> buffer_indices_;
+
+  std::shared_ptr<DenseUnionBuilder> builder_;
+};
+
+// Constructing dictionaries of key/value pairs. Sequences of
+// keys and values are built separately using a pair of
+// SequenceBuilders. The resulting Arrow representation
+// can be obtained via the Finish method.
+class DictBuilder {
+ public:
+  explicit DictBuilder(MemoryPool* pool = nullptr) : keys_(pool), vals_(pool) {
+    builder_.reset(new StructBuilder(struct_({field("keys", dense_union(FieldVector{})),
+                                              field("vals", dense_union(FieldVector{}))}),
+                                     pool, {keys_.builder(), vals_.builder()}));
+  }
+
+  // Builder for the keys of the dictionary
+  SequenceBuilder& keys() { return keys_; }
+  // Builder for the values of the dictionary
+  SequenceBuilder& vals() { return vals_; }
+
+  // Construct an Arrow StructArray representing the dictionary.
+  // Contains a field "keys" for the keys and "vals" for the values.
+  Status Finish(std::shared_ptr<Array>* out) { return builder_->Finish(out); }
+
+  std::shared_ptr<StructBuilder> builder() { return builder_; }
+
+ private:
+  SequenceBuilder keys_;
+  SequenceBuilder vals_;
+  std::shared_ptr<StructBuilder> builder_;
+};
+
+Status SequenceBuilder::AppendDict(PyObject* context, PyObject* dict,
+                                   int32_t recursion_depth,
+                                   SerializedPyObject* blobs_out) {
+  if (recursion_depth >= kMaxRecursionDepth) {
+    return Status::NotImplemented(
+        "This object exceeds the maximum recursion depth. It may contain itself "
+        "recursively.");
+  }
+  RETURN_NOT_OK(CreateAndUpdate(&dicts_, PythonType::DICT, [this]() {
+    dict_values_.reset(new DictBuilder(pool_));
+    return new ListBuilder(pool_, dict_values_->builder());
+  }));
+  RETURN_NOT_OK(dicts_->Append());
+  PyObject* key;
+  PyObject* value;
+  Py_ssize_t pos = 0;
+  while (PyDict_Next(dict, &pos, &key, &value)) {
+    RETURN_NOT_OK(dict_values_->builder()->Append());
+    RETURN_NOT_OK(
+        Append(context, key, &dict_values_->keys(), recursion_depth + 1, blobs_out));
+    RETURN_NOT_OK(
+        Append(context, value, &dict_values_->vals(), recursion_depth + 1, blobs_out));
+  }
+
+  // This block is used to decrement the reference counts of the results
+  // returned by the serialization callback, which is called in AppendArray,
+  // in DeserializeDict and in Append
+  static PyObject* py_type = PyUnicode_FromString("_pytype_");
+  if (PyDict_Contains(dict, py_type)) {
+    // If the dictionary contains the key "_pytype_", then the user has to
+    // have registered a callback.
+    if (context == Py_None) {
+      return Status::Invalid("No serialization callback set");
+    }
+    Py_XDECREF(dict);
+  }
+  return Status::OK();
+}
+
+Status CallCustomCallback(PyObject* context, PyObject* method_name, PyObject* elem,
+                          PyObject** result) {
+  if (context == Py_None) {
+    *result = NULL;
+    return Status::SerializationError("error while calling callback on ",
+                                      internal::PyObject_StdStringRepr(elem),
+                                      ": handler not registered");
+  } else {
+    *result = PyObject_CallMethodObjArgs(context, method_name, elem, NULL);
+    return CheckPyError();
+  }
+}
+
+Status CallSerializeCallback(PyObject* context, PyObject* value,
+                             PyObject** serialized_object) {
+  OwnedRef method_name(PyUnicode_FromString("_serialize_callback"));
+  RETURN_NOT_OK(CallCustomCallback(context, method_name.obj(), value, serialized_object));
+  if (!PyDict_Check(*serialized_object)) {
+    return Status::TypeError("serialization callback must return a valid dictionary");
+  }
+  return Status::OK();
+}
+
+Status CallDeserializeCallback(PyObject* context, PyObject* value,
+                               PyObject** deserialized_object) {
+  OwnedRef method_name(PyUnicode_FromString("_deserialize_callback"));
+  return CallCustomCallback(context, method_name.obj(), value, deserialized_object);
+}
+
+Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
+                   int32_t recursion_depth, SerializedPyObject* blobs_out);
+
+template <typename NumpyScalarObject>
+Status AppendIntegerScalar(PyObject* obj, SequenceBuilder* builder) {
+  int64_t value = reinterpret_cast<NumpyScalarObject*>(obj)->obval;
+  return builder->AppendInt64(value);
+}
+
+// Append a potentially 64-bit wide unsigned Numpy scalar.
+// Must check for overflow as we reinterpret it as signed int64.
+template <typename NumpyScalarObject>
+Status AppendLargeUnsignedScalar(PyObject* obj, SequenceBuilder* builder) {
+  constexpr uint64_t max_value = std::numeric_limits<int64_t>::max();
+
+  uint64_t value = reinterpret_cast<NumpyScalarObject*>(obj)->obval;
+  if (value > max_value) {
+    return Status::Invalid("cannot serialize Numpy uint64 scalar >= 2**63");
+  }
+  return builder->AppendInt64(static_cast<int64_t>(value));
+}
+
+Status AppendScalar(PyObject* obj, SequenceBuilder* builder) {
+  if (PyArray_IsScalar(obj, Bool)) {
+    return builder->AppendBool(reinterpret_cast<PyBoolScalarObject*>(obj)->obval != 0);
+  } else if (PyArray_IsScalar(obj, Half)) {
+    return builder->AppendHalfFloat(reinterpret_cast<PyHalfScalarObject*>(obj)->obval);
+  } else if (PyArray_IsScalar(obj, Float)) {
+    return builder->AppendFloat(reinterpret_cast<PyFloatScalarObject*>(obj)->obval);
+  } else if (PyArray_IsScalar(obj, Double)) {
+    return builder->AppendDouble(reinterpret_cast<PyDoubleScalarObject*>(obj)->obval);
+  }
+  if (PyArray_IsScalar(obj, Byte)) {
+    return AppendIntegerScalar<PyByteScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, Short)) {
+    return AppendIntegerScalar<PyShortScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, Int)) {
+    return AppendIntegerScalar<PyIntScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, Long)) {
+    return AppendIntegerScalar<PyLongScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, LongLong)) {
+    return AppendIntegerScalar<PyLongLongScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, Int64)) {
+    return AppendIntegerScalar<PyInt64ScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, UByte)) {
+    return AppendIntegerScalar<PyUByteScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, UShort)) {
+    return AppendIntegerScalar<PyUShortScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, UInt)) {
+    return AppendIntegerScalar<PyUIntScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, ULong)) {
+    return AppendLargeUnsignedScalar<PyULongScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, ULongLong)) {
+    return AppendLargeUnsignedScalar<PyULongLongScalarObject>(obj, builder);
+  } else if (PyArray_IsScalar(obj, UInt64)) {
+    return AppendLargeUnsignedScalar<PyUInt64ScalarObject>(obj, builder);
+  }
+  return Status::NotImplemented("Numpy scalar type not recognized");
+}
+
+Status Append(PyObject* context, PyObject* elem, SequenceBuilder* builder,
+              int32_t recursion_depth, SerializedPyObject* blobs_out) {
+  // The bool case must precede the int case (PyInt_Check passes for bools)
+  if (PyBool_Check(elem)) {
+    RETURN_NOT_OK(builder->AppendBool(elem == Py_True));
+  } else if (PyArray_DescrFromScalar(elem)->type_num == NPY_HALF) {
+    npy_half halffloat = reinterpret_cast<PyHalfScalarObject*>(elem)->obval;
+    RETURN_NOT_OK(builder->AppendHalfFloat(halffloat));
+  } else if (PyFloat_Check(elem)) {
+    RETURN_NOT_OK(builder->AppendDouble(PyFloat_AS_DOUBLE(elem)));
+  } else if (PyLong_Check(elem)) {
+    int overflow = 0;
+    int64_t data = PyLong_AsLongLongAndOverflow(elem, &overflow);
+    if (!overflow) {
+      RETURN_NOT_OK(builder->AppendInt64(data));
+    } else {
+      // Attempt to serialize the object using the custom callback.
+      PyObject* serialized_object;
+      // The reference count of serialized_object will be decremented in SerializeDict
+      RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
+      RETURN_NOT_OK(
+          builder->AppendDict(context, serialized_object, recursion_depth, blobs_out));
+    }
+  } else if (PyBytes_Check(elem)) {
+    auto data = reinterpret_cast<uint8_t*>(PyBytes_AS_STRING(elem));
+    int32_t size = -1;
+    RETURN_NOT_OK(internal::CastSize(PyBytes_GET_SIZE(elem), &size));
+    RETURN_NOT_OK(builder->AppendBytes(data, size));
+  } else if (PyUnicode_Check(elem)) {
+    ARROW_ASSIGN_OR_RAISE(auto view, PyBytesView::FromUnicode(elem));
+    int32_t size = -1;
+    RETURN_NOT_OK(internal::CastSize(view.size, &size));
+    RETURN_NOT_OK(builder->AppendString(view.bytes, size));
+  } else if (PyList_CheckExact(elem)) {
+    RETURN_NOT_OK(builder->AppendList(context, elem, recursion_depth, blobs_out));
+  } else if (PyDict_CheckExact(elem)) {
+    RETURN_NOT_OK(builder->AppendDict(context, elem, recursion_depth, blobs_out));
+  } else if (PyTuple_CheckExact(elem)) {
+    RETURN_NOT_OK(builder->AppendTuple(context, elem, recursion_depth, blobs_out));
+  } else if (PySet_Check(elem)) {
+    RETURN_NOT_OK(builder->AppendSet(context, elem, recursion_depth, blobs_out));
+  } else if (PyArray_IsScalar(elem, Generic)) {
+    RETURN_NOT_OK(AppendScalar(elem, builder));
+  } else if (PyArray_CheckExact(elem)) {
+    RETURN_NOT_OK(AppendArray(context, reinterpret_cast<PyArrayObject*>(elem), builder,
+                              recursion_depth, blobs_out));
+  } else if (elem == Py_None) {
+    RETURN_NOT_OK(builder->AppendNone());
+  } else if (PyDateTime_Check(elem)) {
+    PyDateTime_DateTime* datetime = reinterpret_cast<PyDateTime_DateTime*>(elem);
+    RETURN_NOT_OK(builder->AppendDate64(internal::PyDateTime_to_us(datetime)));
+  } else if (is_buffer(elem)) {
+    RETURN_NOT_OK(builder->AppendBuffer(static_cast<int32_t>(blobs_out->buffers.size())));
+    ARROW_ASSIGN_OR_RAISE(auto buffer, unwrap_buffer(elem));
+    blobs_out->buffers.push_back(buffer);
+  } else if (is_tensor(elem)) {
+    RETURN_NOT_OK(builder->AppendTensor(static_cast<int32_t>(blobs_out->tensors.size())));
+    ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_tensor(elem));
+    blobs_out->tensors.push_back(tensor);
+  } else if (is_sparse_coo_tensor(elem)) {
+    RETURN_NOT_OK(builder->AppendSparseCOOTensor(
+        static_cast<int32_t>(blobs_out->sparse_tensors.size())));
+    ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_sparse_coo_tensor(elem));
+    blobs_out->sparse_tensors.push_back(tensor);
+  } else if (is_sparse_csr_matrix(elem)) {
+    RETURN_NOT_OK(builder->AppendSparseCSRMatrix(
+        static_cast<int32_t>(blobs_out->sparse_tensors.size())));
+    ARROW_ASSIGN_OR_RAISE(auto matrix, unwrap_sparse_csr_matrix(elem));
+    blobs_out->sparse_tensors.push_back(matrix);
+  } else if (is_sparse_csc_matrix(elem)) {
+    RETURN_NOT_OK(builder->AppendSparseCSCMatrix(
+        static_cast<int32_t>(blobs_out->sparse_tensors.size())));
+    ARROW_ASSIGN_OR_RAISE(auto matrix, unwrap_sparse_csc_matrix(elem));
+    blobs_out->sparse_tensors.push_back(matrix);
+  } else if (is_sparse_csf_tensor(elem)) {
+    RETURN_NOT_OK(builder->AppendSparseCSFTensor(
+        static_cast<int32_t>(blobs_out->sparse_tensors.size())));
+    ARROW_ASSIGN_OR_RAISE(auto tensor, unwrap_sparse_csf_tensor(elem));
+    blobs_out->sparse_tensors.push_back(tensor);
+  } else {
+    // Attempt to serialize the object using the custom callback.
+    PyObject* serialized_object;
+    // The reference count of serialized_object will be decremented in SerializeDict
+    RETURN_NOT_OK(CallSerializeCallback(context, elem, &serialized_object));
+    RETURN_NOT_OK(
+        builder->AppendDict(context, serialized_object, recursion_depth, blobs_out));
+  }
+  return Status::OK();
+}
+
+Status AppendArray(PyObject* context, PyArrayObject* array, SequenceBuilder* builder,
+                   int32_t recursion_depth, SerializedPyObject* blobs_out) {
+  int dtype = PyArray_TYPE(array);
+  switch (dtype) {
+    case NPY_UINT8:
+    case NPY_INT8:
+    case NPY_UINT16:
+    case NPY_INT16:
+    case NPY_UINT32:
+    case NPY_INT32:
+    case NPY_UINT64:
+    case NPY_INT64:
+    case NPY_HALF:
+    case NPY_FLOAT:
+    case NPY_DOUBLE: {
+      RETURN_NOT_OK(
+          builder->AppendNdarray(static_cast<int32_t>(blobs_out->ndarrays.size())));
+      std::shared_ptr<Tensor> tensor;
+      RETURN_NOT_OK(NdarrayToTensor(default_memory_pool(),
+                                    reinterpret_cast<PyObject*>(array), {}, &tensor));
+      blobs_out->ndarrays.push_back(tensor);
+    } break;
+    default: {
+      PyObject* serialized_object;
+      // The reference count of serialized_object will be decremented in SerializeDict
+      RETURN_NOT_OK(CallSerializeCallback(context, reinterpret_cast<PyObject*>(array),
+                                          &serialized_object));
+      RETURN_NOT_OK(builder->AppendDict(context, serialized_object, recursion_depth + 1,
+                                        blobs_out));
+    }
+  }
+  return Status::OK();
+}
+
+std::shared_ptr<RecordBatch> MakeBatch(std::shared_ptr<Array> data) {
+  auto field = std::make_shared<Field>("list", data->type());
+  auto schema = ::arrow::schema({field});
+  return RecordBatch::Make(schema, data->length(), {data});
+}
+
+Status SerializeObject(PyObject* context, PyObject* sequence, SerializedPyObject* out) {
+  PyAcquireGIL lock;
+  SequenceBuilder builder;
+  RETURN_NOT_OK(internal::VisitIterable(
+      sequence, [&](PyObject* obj, bool* keep_going /* unused */) {
+        return Append(context, obj, &builder, 0, out);
+      }));
+  std::shared_ptr<Array> array;
+  RETURN_NOT_OK(builder.Finish(&array));
+  out->batch = MakeBatch(array);
+  return Status::OK();
+}
+
+Status SerializeNdarray(std::shared_ptr<Tensor> tensor, SerializedPyObject* out) {
+  std::shared_ptr<Array> array;
+  SequenceBuilder builder;
+  RETURN_NOT_OK(builder.AppendNdarray(static_cast<int32_t>(out->ndarrays.size())));
+  out->ndarrays.push_back(tensor);
+  RETURN_NOT_OK(builder.Finish(&array));
+  out->batch = MakeBatch(array);
+  return Status::OK();
+}
+
+Status WriteNdarrayHeader(std::shared_ptr<DataType> dtype,
+                          const std::vector<int64_t>& shape, int64_t tensor_num_bytes,
+                          io::OutputStream* dst) {
+  auto empty_tensor = std::make_shared<Tensor>(
+      dtype, std::make_shared<Buffer>(nullptr, tensor_num_bytes), shape);
+  SerializedPyObject serialized_tensor;
+  RETURN_NOT_OK(SerializeNdarray(empty_tensor, &serialized_tensor));
+  return serialized_tensor.WriteTo(dst);
+}
+
+SerializedPyObject::SerializedPyObject()
+    : ipc_options(ipc::IpcWriteOptions::Defaults()) {}
+
+Status SerializedPyObject::WriteTo(io::OutputStream* dst) {
+  int32_t num_tensors = static_cast<int32_t>(this->tensors.size());
+  int32_t num_sparse_tensors = static_cast<int32_t>(this->sparse_tensors.size());
+  int32_t num_ndarrays = static_cast<int32_t>(this->ndarrays.size());
+  int32_t num_buffers = static_cast<int32_t>(this->buffers.size());
+  RETURN_NOT_OK(
+      dst->Write(reinterpret_cast<const uint8_t*>(&num_tensors), sizeof(int32_t)));
+  RETURN_NOT_OK(
+      dst->Write(reinterpret_cast<const uint8_t*>(&num_sparse_tensors), sizeof(int32_t)));
+  RETURN_NOT_OK(
+      dst->Write(reinterpret_cast<const uint8_t*>(&num_ndarrays), sizeof(int32_t)));
+  RETURN_NOT_OK(
+      dst->Write(reinterpret_cast<const uint8_t*>(&num_buffers), sizeof(int32_t)));
+
+  // Align stream to 8-byte offset
+  RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kArrowIpcAlignment));
+  RETURN_NOT_OK(ipc::WriteRecordBatchStream({this->batch}, this->ipc_options, dst));
+
+  // Align stream to 64-byte offset so tensor bodies are 64-byte aligned
+  RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
+
+  int32_t metadata_length;
+  int64_t body_length;
+  for (const auto& tensor : this->tensors) {
+    RETURN_NOT_OK(ipc::WriteTensor(*tensor, dst, &metadata_length, &body_length));
+    RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
+  }
+
+  for (const auto& sparse_tensor : this->sparse_tensors) {
+    RETURN_NOT_OK(
+        ipc::WriteSparseTensor(*sparse_tensor, dst, &metadata_length, &body_length));
+    RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
+  }
+
+  for (const auto& tensor : this->ndarrays) {
+    RETURN_NOT_OK(ipc::WriteTensor(*tensor, dst, &metadata_length, &body_length));
+    RETURN_NOT_OK(ipc::AlignStream(dst, ipc::kTensorAlignment));
+  }
+
+  for (const auto& buffer : this->buffers) {
+    int64_t size = buffer->size();
+    RETURN_NOT_OK(dst->Write(reinterpret_cast<const uint8_t*>(&size), sizeof(int64_t)));
+    RETURN_NOT_OK(dst->Write(buffer->data(), size));
+  }
+
+  return Status::OK();
+}
+
+namespace {
+
+Status CountSparseTensors(
+    const std::vector<std::shared_ptr<SparseTensor>>& sparse_tensors, PyObject** out) {
+  OwnedRef num_sparse_tensors(PyDict_New());
+  size_t num_coo = 0;
+  size_t num_csr = 0;
+  size_t num_csc = 0;
+  size_t num_csf = 0;
+  size_t ndim_csf = 0;
+
+  for (const auto& sparse_tensor : sparse_tensors) {
+    switch (sparse_tensor->format_id()) {
+      case SparseTensorFormat::COO:
+        ++num_coo;
+        break;
+      case SparseTensorFormat::CSR:
+        ++num_csr;
+        break;
+      case SparseTensorFormat::CSC:
+        ++num_csc;
+        break;
+      case SparseTensorFormat::CSF:
+        ++num_csf;
+        ndim_csf += sparse_tensor->ndim();
+        break;
+    }
+  }
+
+  PyDict_SetItemString(num_sparse_tensors.obj(), "coo", PyLong_FromSize_t(num_coo));
+  PyDict_SetItemString(num_sparse_tensors.obj(), "csr", PyLong_FromSize_t(num_csr));
+  PyDict_SetItemString(num_sparse_tensors.obj(), "csc", PyLong_FromSize_t(num_csc));
+  PyDict_SetItemString(num_sparse_tensors.obj(), "csf", PyLong_FromSize_t(num_csf));
+  PyDict_SetItemString(num_sparse_tensors.obj(), "ndim_csf", PyLong_FromSize_t(ndim_csf));
+  RETURN_IF_PYERROR();
+
+  *out = num_sparse_tensors.detach();
+  return Status::OK();
+}
+
+}  // namespace
+
+Status SerializedPyObject::GetComponents(MemoryPool* memory_pool, PyObject** out) {
+  PyAcquireGIL py_gil;
+
+  OwnedRef result(PyDict_New());
+  PyObject* buffers = PyList_New(0);
+  PyObject* num_sparse_tensors = nullptr;
+
+  // TODO(wesm): Not sure how pedantic we need to be about checking the return
+  // values of these functions. There are other places where we do not check
+  // PyDict_SetItem/SetItemString return value, but these failures would be
+  // quite esoteric
+  PyDict_SetItemString(result.obj(), "num_tensors",
+                       PyLong_FromSize_t(this->tensors.size()));
+  RETURN_NOT_OK(CountSparseTensors(this->sparse_tensors, &num_sparse_tensors));
+  PyDict_SetItemString(result.obj(), "num_sparse_tensors", num_sparse_tensors);
+  PyDict_SetItemString(result.obj(), "ndim_csf", num_sparse_tensors);
+  PyDict_SetItemString(result.obj(), "num_ndarrays",
+                       PyLong_FromSize_t(this->ndarrays.size()));
+  PyDict_SetItemString(result.obj(), "num_buffers",
+                       PyLong_FromSize_t(this->buffers.size()));
+  PyDict_SetItemString(result.obj(), "data", buffers);
+  RETURN_IF_PYERROR();
+
+  Py_DECREF(buffers);
+
+  auto PushBuffer = [&buffers](const std::shared_ptr<Buffer>& buffer) {
+    PyObject* wrapped_buffer = wrap_buffer(buffer);
+    RETURN_IF_PYERROR();
+    if (PyList_Append(buffers, wrapped_buffer) < 0) {
+      Py_DECREF(wrapped_buffer);
+      RETURN_IF_PYERROR();
+    }
+    Py_DECREF(wrapped_buffer);
+    return Status::OK();
+  };
+
+  constexpr int64_t kInitialCapacity = 1024;
+
+  // Write the record batch describing the object structure
+  py_gil.release();
+  ARROW_ASSIGN_OR_RAISE(auto stream,
+                        io::BufferOutputStream::Create(kInitialCapacity, memory_pool));
+  RETURN_NOT_OK(
+      ipc::WriteRecordBatchStream({this->batch}, this->ipc_options, stream.get()));
+  ARROW_ASSIGN_OR_RAISE(auto buffer, stream->Finish());
+  py_gil.acquire();
+
+  RETURN_NOT_OK(PushBuffer(buffer));
+
+  // For each tensor, get a metadata buffer and a buffer for the body
+  for (const auto& tensor : this->tensors) {
+    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<ipc::Message> message,
+                          ipc::GetTensorMessage(*tensor, memory_pool));
+    RETURN_NOT_OK(PushBuffer(message->metadata()));
+    RETURN_NOT_OK(PushBuffer(message->body()));
+  }
+
+  // For each sparse tensor, get a metadata buffer and buffers containing index and data
+  for (const auto& sparse_tensor : this->sparse_tensors) {
+    ipc::IpcPayload payload;
+    RETURN_NOT_OK(ipc::GetSparseTensorPayload(*sparse_tensor, memory_pool, &payload));
+    RETURN_NOT_OK(PushBuffer(payload.metadata));
+    for (const auto& body : payload.body_buffers) {
+      RETURN_NOT_OK(PushBuffer(body));
+    }
+  }
+
+  // For each ndarray, get a metadata buffer and a buffer for the body
+  for (const auto& ndarray : this->ndarrays) {
+    ARROW_ASSIGN_OR_RAISE(std::unique_ptr<ipc::Message> message,
+                          ipc::GetTensorMessage(*ndarray, memory_pool));
+    RETURN_NOT_OK(PushBuffer(message->metadata()));
+    RETURN_NOT_OK(PushBuffer(message->body()));
+  }
+
+  for (const auto& buf : this->buffers) {
+    RETURN_NOT_OK(PushBuffer(buf));
+  }
+
+  *out = result.detach();
+  return Status::OK();
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.h b/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.h
index e9fd843..fd207d3 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/serialize.h
@@ -21,9 +21,9 @@
 #include <vector>
 
 #include "arrow/ipc/options.h"
+#include "arrow/python/visibility.h"
 #include "arrow/sparse_tensor.h"
 #include "arrow/status.h"
-#include "arrow/python/visibility.h"
 
 // Forward declaring PyObject, see
 // https://mail.python.org/pipermail/python-dev/2003-August/037601.html
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/udf.cc b/src/vendored/apache-arrow-12.0.1/arrow/python/udf.cc
new file mode 100644
index 0000000..435c89f
--- /dev/null
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/udf.cc
@@ -0,0 +1,736 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "arrow/python/udf.h"
+#include "arrow/array/builder_base.h"
+#include "arrow/buffer_builder.h"
+#include "arrow/compute/api_aggregate.h"
+#include "arrow/compute/api_vector.h"
+#include "arrow/compute/function.h"
+#include "arrow/compute/kernel.h"
+#include "arrow/compute/row/grouper.h"
+#include "arrow/python/common.h"
+#include "arrow/table.h"
+#include "arrow/util/checked_cast.h"
+#include "arrow/util/logging.h"
+
+namespace arrow {
+using compute::ExecSpan;
+using compute::Grouper;
+using compute::KernelContext;
+using compute::KernelState;
+using internal::checked_cast;
+
+namespace py {
+namespace {
+
+struct PythonUdfKernelState : public compute::KernelState {
+  explicit PythonUdfKernelState(std::shared_ptr<OwnedRefNoGIL> function)
+      : function(function) {
+    Py_INCREF(function->obj());
+  }
+
+  // function needs to be destroyed at process exit
+  // and Python may no longer be initialized.
+  ~PythonUdfKernelState() {
+    if (_Py_IsFinalizing()) {
+      function->detach();
+    }
+  }
+
+  std::shared_ptr<OwnedRefNoGIL> function;
+};
+
+struct PythonUdfKernelInit {
+  explicit PythonUdfKernelInit(std::shared_ptr<OwnedRefNoGIL> function)
+      : function(function) {
+    Py_INCREF(function->obj());
+  }
+
+  // function needs to be destroyed at process exit
+  // and Python may no longer be initialized.
+  ~PythonUdfKernelInit() {
+    if (_Py_IsFinalizing()) {
+      function->detach();
+    }
+  }
+
+  Result<std::unique_ptr<compute::KernelState>> operator()(
+      compute::KernelContext*, const compute::KernelInitArgs&) {
+    return std::make_unique<PythonUdfKernelState>(function);
+  }
+
+  std::shared_ptr<OwnedRefNoGIL> function;
+};
+
+struct ScalarUdfAggregator : public compute::KernelState {
+  virtual Status Consume(compute::KernelContext* ctx, const compute::ExecSpan& batch) = 0;
+  virtual Status MergeFrom(compute::KernelContext* ctx, compute::KernelState&& src) = 0;
+  virtual Status Finalize(compute::KernelContext* ctx, Datum* out) = 0;
+};
+
+struct HashUdfAggregator : public compute::KernelState {
+  virtual Status Resize(KernelContext* ctx, int64_t size) = 0;
+  virtual Status Consume(KernelContext* ctx, const ExecSpan& batch) = 0;
+  virtual Status Merge(KernelContext* ct, KernelState&& other, const ArrayData&) = 0;
+  virtual Status Finalize(KernelContext* ctx, Datum* out) = 0;
+};
+
+arrow::Status AggregateUdfConsume(compute::KernelContext* ctx,
+                                  const compute::ExecSpan& batch) {
+  return checked_cast<ScalarUdfAggregator*>(ctx->state())->Consume(ctx, batch);
+}
+
+arrow::Status AggregateUdfMerge(compute::KernelContext* ctx, compute::KernelState&& src,
+                                compute::KernelState* dst) {
+  return checked_cast<ScalarUdfAggregator*>(dst)->MergeFrom(ctx, std::move(src));
+}
+
+arrow::Status AggregateUdfFinalize(compute::KernelContext* ctx, arrow::Datum* out) {
+  return checked_cast<ScalarUdfAggregator*>(ctx->state())->Finalize(ctx, out);
+}
+
+arrow::Status HashAggregateUdfResize(KernelContext* ctx, int64_t size) {
+  return checked_cast<HashUdfAggregator*>(ctx->state())->Resize(ctx, size);
+}
+
+arrow::Status HashAggregateUdfConsume(KernelContext* ctx, const ExecSpan& batch) {
+  return checked_cast<HashUdfAggregator*>(ctx->state())->Consume(ctx, batch);
+}
+
+arrow::Status HashAggregateUdfMerge(KernelContext* ctx, KernelState&& src,
+                                    const ArrayData& group_id_mapping) {
+  return checked_cast<HashUdfAggregator*>(ctx->state())
+      ->Merge(ctx, std::move(src), group_id_mapping);
+}
+
+arrow::Status HashAggregateUdfFinalize(KernelContext* ctx, Datum* out) {
+  return checked_cast<HashUdfAggregator*>(ctx->state())->Finalize(ctx, out);
+}
+
+struct PythonTableUdfKernelInit {
+  PythonTableUdfKernelInit(std::shared_ptr<OwnedRefNoGIL> function_maker,
+                           UdfWrapperCallback cb)
+      : function_maker(function_maker), cb(cb) {
+    Py_INCREF(function_maker->obj());
+  }
+
+  // function needs to be destroyed at process exit
+  // and Python may no longer be initialized.
+  ~PythonTableUdfKernelInit() {
+    if (_Py_IsFinalizing()) {
+      function_maker->detach();
+    }
+  }
+
+  Result<std::unique_ptr<compute::KernelState>> operator()(
+      compute::KernelContext* ctx, const compute::KernelInitArgs&) {
+    UdfContext udf_context{ctx->memory_pool(), /*batch_length=*/0};
+    std::unique_ptr<OwnedRefNoGIL> function;
+    RETURN_NOT_OK(SafeCallIntoPython([this, &udf_context, &function] {
+      OwnedRef empty_tuple(PyTuple_New(0));
+      function = std::make_unique<OwnedRefNoGIL>(
+          cb(function_maker->obj(), udf_context, empty_tuple.obj()));
+      RETURN_NOT_OK(CheckPyError());
+      return Status::OK();
+    }));
+    if (!PyCallable_Check(function->obj())) {
+      return Status::TypeError("Expected a callable Python object.");
+    }
+    return std::make_unique<PythonUdfKernelState>(std::move(function));
+  }
+
+  std::shared_ptr<OwnedRefNoGIL> function_maker;
+  UdfWrapperCallback cb;
+};
+
+struct PythonUdfScalarAggregatorImpl : public ScalarUdfAggregator {
+  PythonUdfScalarAggregatorImpl(std::shared_ptr<OwnedRefNoGIL> function,
+                                UdfWrapperCallback cb,
+                                std::vector<std::shared_ptr<DataType>> input_types,
+                                std::shared_ptr<DataType> output_type)
+      : function(function), cb(std::move(cb)), output_type(std::move(output_type)) {
+    Py_INCREF(function->obj());
+    std::vector<std::shared_ptr<Field>> fields;
+    for (size_t i = 0; i < input_types.size(); i++) {
+      fields.push_back(field("", input_types[i]));
+    }
+    input_schema = schema(std::move(fields));
+  };
+
+  ~PythonUdfScalarAggregatorImpl() override {
+    if (_Py_IsFinalizing()) {
+      function->detach();
+    }
+  }
+
+  Status Consume(compute::KernelContext* ctx, const compute::ExecSpan& batch) override {
+    ARROW_ASSIGN_OR_RAISE(
+        auto rb, batch.ToExecBatch().ToRecordBatch(input_schema, ctx->memory_pool()));
+    values.push_back(std::move(rb));
+    return Status::OK();
+  }
+
+  Status MergeFrom(compute::KernelContext* ctx, compute::KernelState&& src) override {
+    auto& other_values = checked_cast<PythonUdfScalarAggregatorImpl&>(src).values;
+    values.insert(values.end(), std::make_move_iterator(other_values.begin()),
+                  std::make_move_iterator(other_values.end()));
+
+    other_values.erase(other_values.begin(), other_values.end());
+    return Status::OK();
+  }
+
+  Status Finalize(compute::KernelContext* ctx, Datum* out) override {
+    auto state =
+        arrow::internal::checked_cast<PythonUdfScalarAggregatorImpl*>(ctx->state());
+    const int num_args = input_schema->num_fields();
+
+    // Note: The way that batches are concatenated together
+    // would result in using double amount of the memory.
+    // This is OK for now because non decomposable aggregate
+    // UDF is supposed to be used with segmented aggregation
+    // where the size of the segment is more or less constant
+    // so doubling that is not a big deal. This can be also
+    // improved in the future to use more efficient way to
+    // concatenate.
+    ARROW_ASSIGN_OR_RAISE(auto table,
+                          arrow::Table::FromRecordBatches(input_schema, values));
+    ARROW_ASSIGN_OR_RAISE(table, table->CombineChunks(ctx->memory_pool()));
+    UdfContext udf_context{ctx->memory_pool(), table->num_rows()};
+
+    if (table->num_rows() == 0) {
+      return Status::Invalid("Finalized is called with empty inputs");
+    }
+
+    RETURN_NOT_OK(SafeCallIntoPython([&] {
+      std::unique_ptr<OwnedRef> result;
+      OwnedRef arg_tuple(PyTuple_New(num_args));
+      RETURN_NOT_OK(CheckPyError());
+
+      for (int arg_id = 0; arg_id < num_args; arg_id++) {
+        // Since we combined chunks there is only one chunk
+        std::shared_ptr<Array> c_data = table->column(arg_id)->chunk(0);
+        PyObject* data = wrap_array(c_data);
+        PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
+      }
+      result =
+          std::make_unique<OwnedRef>(cb(function->obj(), udf_context, arg_tuple.obj()));
+      RETURN_NOT_OK(CheckPyError());
+      // unwrapping the output for expected output type
+      if (is_scalar(result->obj())) {
+        ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> val, unwrap_scalar(result->obj()));
+        if (*output_type != *val->type) {
+          return Status::TypeError("Expected output datatype ", output_type->ToString(),
+                                   ", but function returned datatype ",
+                                   val->type->ToString());
+        }
+        out->value = std::move(val);
+        return Status::OK();
+      }
+      return Status::TypeError("Unexpected output type: ",
+                               Py_TYPE(result->obj())->tp_name, " (expected Scalar)");
+    }));
+    return Status::OK();
+  }
+
+  std::shared_ptr<OwnedRefNoGIL> function;
+  UdfWrapperCallback cb;
+  std::vector<std::shared_ptr<RecordBatch>> values;
+  std::shared_ptr<Schema> input_schema;
+  std::shared_ptr<DataType> output_type;
+};
+
+struct PythonUdfHashAggregatorImpl : public HashUdfAggregator {
+  PythonUdfHashAggregatorImpl(std::shared_ptr<OwnedRefNoGIL> function,
+                              UdfWrapperCallback cb,
+                              std::vector<std::shared_ptr<DataType>> input_types,
+                              std::shared_ptr<DataType> output_type)
+      : function(function), cb(std::move(cb)), output_type(std::move(output_type)) {
+    Py_INCREF(function->obj());
+    std::vector<std::shared_ptr<Field>> fields;
+    fields.reserve(input_types.size());
+    for (size_t i = 0; i < input_types.size(); i++) {
+      fields.push_back(field("", input_types[i]));
+    }
+    input_schema = schema(std::move(fields));
+  };
+
+  ~PythonUdfHashAggregatorImpl() override {
+    if (_Py_IsFinalizing()) {
+      function->detach();
+    }
+  }
+
+  // same as ApplyGrouping in parition.cc
+  // replicated the code here to avoid complicating the dependencies
+  static Result<RecordBatchVector> ApplyGroupings(
+      const ListArray& groupings, const std::shared_ptr<RecordBatch>& batch) {
+    ARROW_ASSIGN_OR_RAISE(Datum sorted,
+                          compute::Take(batch, groupings.data()->child_data[0]));
+
+    const auto& sorted_batch = *sorted.record_batch();
+
+    RecordBatchVector out(static_cast<size_t>(groupings.length()));
+    for (size_t i = 0; i < out.size(); ++i) {
+      out[i] = sorted_batch.Slice(groupings.value_offset(i), groupings.value_length(i));
+    }
+
+    return out;
+  }
+
+  Status Resize(KernelContext* ctx, int64_t new_num_groups) {
+    // We only need to change num_groups in resize
+    // similar to other hash aggregate kernels
+    num_groups = new_num_groups;
+    return Status::OK();
+  }
+
+  Status Consume(KernelContext* ctx, const ExecSpan& batch) {
+    ARROW_ASSIGN_OR_RAISE(
+        std::shared_ptr<RecordBatch> rb,
+        batch.ToExecBatch().ToRecordBatch(input_schema, ctx->memory_pool()));
+
+    // This is similar to GroupedListImpl
+    // last array is the group id
+    const ArraySpan& groups_array_data = batch[batch.num_values() - 1].array;
+    DCHECK_EQ(groups_array_data.offset, 0);
+    int64_t batch_num_values = groups_array_data.length;
+    const auto* batch_groups = groups_array_data.GetValues<uint32_t>(1);
+    RETURN_NOT_OK(groups.Append(batch_groups, batch_num_values));
+    values.push_back(std::move(rb));
+    num_values += batch_num_values;
+    return Status::OK();
+  }
+  Status Merge(KernelContext* ctx, KernelState&& other_state,
+               const ArrayData& group_id_mapping) {
+    // This is similar to GroupedListImpl
+    auto& other = checked_cast<PythonUdfHashAggregatorImpl&>(other_state);
+    auto& other_values = other.values;
+    const uint32_t* other_raw_groups = other.groups.data();
+    values.insert(values.end(), std::make_move_iterator(other_values.begin()),
+                  std::make_move_iterator(other_values.end()));
+
+    auto g = group_id_mapping.GetValues<uint32_t>(1);
+    for (uint32_t other_g = 0; static_cast<int64_t>(other_g) < other.num_values;
+         ++other_g) {
+      // Different state can have different group_id mappings, so we
+      // need to translate the ids
+      RETURN_NOT_OK(groups.Append(g[other_raw_groups[other_g]]));
+    }
+
+    num_values += other.num_values;
+    return Status::OK();
+  }
+
+  Status Finalize(KernelContext* ctx, Datum* out) {
+    // Exclude the last column which is the group id
+    const int num_args = input_schema->num_fields() - 1;
+
+    ARROW_ASSIGN_OR_RAISE(auto groups_buffer, groups.Finish());
+    ARROW_ASSIGN_OR_RAISE(auto groupings,
+                          Grouper::MakeGroupings(UInt32Array(num_values, groups_buffer),
+                                                 static_cast<uint32_t>(num_groups)));
+
+    ARROW_ASSIGN_OR_RAISE(auto table,
+                          arrow::Table::FromRecordBatches(input_schema, values));
+    ARROW_ASSIGN_OR_RAISE(auto rb, table->CombineChunksToBatch(ctx->memory_pool()));
+    UdfContext udf_context{ctx->memory_pool(), table->num_rows()};
+
+    if (rb->num_rows() == 0) {
+      *out = Datum();
+      return Status::OK();
+    }
+
+    ARROW_ASSIGN_OR_RAISE(RecordBatchVector rbs, ApplyGroupings(*groupings, rb));
+
+    return SafeCallIntoPython([&] {
+      ARROW_ASSIGN_OR_RAISE(std::unique_ptr<ArrayBuilder> builder,
+                            MakeBuilder(output_type, ctx->memory_pool()));
+      for (auto& group_rb : rbs) {
+        std::unique_ptr<OwnedRef> result;
+        OwnedRef arg_tuple(PyTuple_New(num_args));
+        RETURN_NOT_OK(CheckPyError());
+
+        for (int arg_id = 0; arg_id < num_args; arg_id++) {
+          // Since we combined chunks there is only one chunk
+          std::shared_ptr<Array> c_data = group_rb->column(arg_id);
+          PyObject* data = wrap_array(c_data);
+          PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
+        }
+
+        result =
+            std::make_unique<OwnedRef>(cb(function->obj(), udf_context, arg_tuple.obj()));
+        RETURN_NOT_OK(CheckPyError());
+
+        // unwrapping the output for expected output type
+        if (is_scalar(result->obj())) {
+          ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Scalar> val,
+                                unwrap_scalar(result->obj()));
+          if (*output_type != *val->type) {
+            return Status::TypeError("Expected output datatype ", output_type->ToString(),
+                                     ", but function returned datatype ",
+                                     val->type->ToString());
+          }
+          ARROW_RETURN_NOT_OK(builder->AppendScalar(std::move(*val)));
+        } else {
+          return Status::TypeError("Unexpected output type: ",
+                                   Py_TYPE(result->obj())->tp_name, " (expected Scalar)");
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto result, builder->Finish());
+      out->value = std::move(result->data());
+      return Status::OK();
+    });
+  }
+
+  std::shared_ptr<OwnedRefNoGIL> function;
+  UdfWrapperCallback cb;
+  // Accumulated input batches
+  std::vector<std::shared_ptr<RecordBatch>> values;
+  // Group ids - extracted from the last column from the batch
+  TypedBufferBuilder<uint32_t> groups;
+  int64_t num_groups = 0;
+  int64_t num_values = 0;
+  std::shared_ptr<Schema> input_schema;
+  std::shared_ptr<DataType> output_type;
+};
+
+struct PythonUdf : public PythonUdfKernelState {
+  PythonUdf(std::shared_ptr<OwnedRefNoGIL> function, UdfWrapperCallback cb,
+            std::vector<TypeHolder> input_types, compute::OutputType output_type)
+      : PythonUdfKernelState(function),
+        cb(cb),
+        input_types(input_types),
+        output_type(output_type) {}
+
+  UdfWrapperCallback cb;
+  std::vector<TypeHolder> input_types;
+  compute::OutputType output_type;
+  TypeHolder resolved_type;
+
+  Result<TypeHolder> ResolveType(compute::KernelContext* ctx,
+                                 const std::vector<TypeHolder>& types) {
+    if (input_types == types) {
+      if (!resolved_type) {
+        ARROW_ASSIGN_OR_RAISE(resolved_type, output_type.Resolve(ctx, input_types));
+      }
+      return resolved_type;
+    }
+    return output_type.Resolve(ctx, types);
+  }
+
+  Status Exec(compute::KernelContext* ctx, const compute::ExecSpan& batch,
+              compute::ExecResult* out) {
+    auto state = arrow::internal::checked_cast<PythonUdfKernelState*>(ctx->state());
+    std::shared_ptr<OwnedRefNoGIL>& function = state->function;
+    const int num_args = batch.num_values();
+    UdfContext udf_context{ctx->memory_pool(), batch.length};
+
+    OwnedRef arg_tuple(PyTuple_New(num_args));
+    RETURN_NOT_OK(CheckPyError());
+    for (int arg_id = 0; arg_id < num_args; arg_id++) {
+      if (batch[arg_id].is_scalar()) {
+        std::shared_ptr<Scalar> c_data = batch[arg_id].scalar->GetSharedPtr();
+        PyObject* data = wrap_scalar(c_data);
+        PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
+      } else {
+        std::shared_ptr<Array> c_data = batch[arg_id].array.ToArray();
+        PyObject* data = wrap_array(c_data);
+        PyTuple_SetItem(arg_tuple.obj(), arg_id, data);
+      }
+    }
+
+    OwnedRef result(cb(function->obj(), udf_context, arg_tuple.obj()));
+    RETURN_NOT_OK(CheckPyError());
+    // unwrapping the output for expected output type
+    if (is_array(result.obj())) {
+      ARROW_ASSIGN_OR_RAISE(std::shared_ptr<Array> val, unwrap_array(result.obj()));
+      ARROW_ASSIGN_OR_RAISE(TypeHolder type, ResolveType(ctx, batch.GetTypes()));
+      if (type.type == NULLPTR) {
+        return Status::TypeError("expected output datatype is null");
+      }
+      if (*type.type != *val->type()) {
+        return Status::TypeError("Expected output datatype ", type.type->ToString(),
+                                 ", but function returned datatype ",
+                                 val->type()->ToString());
+      }
+      out->value = std::move(val->data());
+      return Status::OK();
+    } else {
+      return Status::TypeError("Unexpected output type: ", Py_TYPE(result.obj())->tp_name,
+                               " (expected Array)");
+    }
+    return Status::OK();
+  }
+};
+
+Status PythonUdfExec(compute::KernelContext* ctx, const compute::ExecSpan& batch,
+                     compute::ExecResult* out) {
+  auto udf = static_cast<PythonUdf*>(ctx->kernel()->data.get());
+  return SafeCallIntoPython([&]() -> Status { return udf->Exec(ctx, batch, out); });
+}
+
+Status RegisterUdf(PyObject* user_function, compute::KernelInit kernel_init,
+                   UdfWrapperCallback wrapper, const UdfOptions& options,
+                   compute::FunctionRegistry* registry) {
+  if (!PyCallable_Check(user_function)) {
+    return Status::TypeError("Expected a callable Python object.");
+  }
+  auto scalar_func = std::make_shared<compute::ScalarFunction>(
+      options.func_name, options.arity, options.func_doc);
+  Py_INCREF(user_function);
+  std::vector<compute::InputType> input_types;
+  for (const auto& in_dtype : options.input_types) {
+    input_types.emplace_back(in_dtype);
+  }
+  compute::OutputType output_type(options.output_type);
+  auto udf_data = std::make_shared<PythonUdf>(
+      std::make_shared<OwnedRefNoGIL>(user_function), wrapper,
+      TypeHolder::FromTypes(options.input_types), options.output_type);
+  compute::ScalarKernel kernel(
+      compute::KernelSignature::Make(std::move(input_types), std::move(output_type),
+                                     options.arity.is_varargs),
+      PythonUdfExec, kernel_init);
+  kernel.data = std::move(udf_data);
+
+  kernel.mem_allocation = compute::MemAllocation::NO_PREALLOCATE;
+  kernel.null_handling = compute::NullHandling::COMPUTED_NO_PREALLOCATE;
+  RETURN_NOT_OK(scalar_func->AddKernel(std::move(kernel)));
+  if (registry == NULLPTR) {
+    registry = compute::GetFunctionRegistry();
+  }
+  RETURN_NOT_OK(registry->AddFunction(std::move(scalar_func)));
+  return Status::OK();
+}
+
+}  // namespace
+
+Status RegisterScalarFunction(PyObject* function, UdfWrapperCallback cb,
+                              const UdfOptions& options,
+                              compute::FunctionRegistry* registry) {
+  return RegisterUdf(function,
+                     PythonUdfKernelInit{std::make_shared<OwnedRefNoGIL>(function)}, cb,
+                     options, registry);
+}
+
+Status RegisterTabularFunction(PyObject* function, UdfWrapperCallback cb,
+                               const UdfOptions& options,
+                               compute::FunctionRegistry* registry) {
+  if (options.arity.num_args != 0 || options.arity.is_varargs) {
+    return Status::NotImplemented("tabular function of non-null arity");
+  }
+  if (options.output_type->id() != Type::type::STRUCT) {
+    return Status::Invalid("tabular function with non-struct output");
+  }
+  return RegisterUdf(
+      function, PythonTableUdfKernelInit{std::make_shared<OwnedRefNoGIL>(function), cb},
+      cb, options, registry);
+}
+
+Status RegisterScalarAggregateFunction(PyObject* function, UdfWrapperCallback cb,
+                                       const UdfOptions& options,
+                                       compute::FunctionRegistry* registry) {
+  if (!PyCallable_Check(function)) {
+    return Status::TypeError("Expected a callable Python object.");
+  }
+
+  if (registry == NULLPTR) {
+    registry = compute::GetFunctionRegistry();
+  }
+
+  // Py_INCREF here so that once a function is registered
+  // its refcount gets increased by 1 and doesn't get gced
+  // if all existing refs are gone
+  Py_INCREF(function);
+
+  static auto default_scalar_aggregate_options =
+      compute::ScalarAggregateOptions::Defaults();
+  auto aggregate_func = std::make_shared<compute::ScalarAggregateFunction>(
+      options.func_name, options.arity, options.func_doc,
+      &default_scalar_aggregate_options);
+
+  std::vector<compute::InputType> input_types;
+  for (const auto& in_dtype : options.input_types) {
+    input_types.emplace_back(in_dtype);
+  }
+  compute::OutputType output_type(options.output_type);
+
+  compute::KernelInit init = [cb, function, options](compute::KernelContext* ctx,
+                                                     const compute::KernelInitArgs& args)
+      -> Result<std::unique_ptr<compute::KernelState>> {
+    return std::make_unique<PythonUdfScalarAggregatorImpl>(
+        std::make_shared<OwnedRefNoGIL>(function), cb, options.input_types,
+        options.output_type);
+  };
+
+  auto sig = compute::KernelSignature::Make(
+      std::move(input_types), std::move(output_type), options.arity.is_varargs);
+  compute::ScalarAggregateKernel kernel(std::move(sig), std::move(init),
+                                        AggregateUdfConsume, AggregateUdfMerge,
+                                        AggregateUdfFinalize, /*ordered=*/false);
+  RETURN_NOT_OK(aggregate_func->AddKernel(std::move(kernel)));
+  RETURN_NOT_OK(registry->AddFunction(std::move(aggregate_func)));
+  return Status::OK();
+}
+
+/// \brief Create a new UdfOptions with adjustment for hash kernel
+/// \param options User provided udf options
+UdfOptions AdjustForHashAggregate(const UdfOptions& options) {
+  UdfOptions hash_options;
+  // Append hash_ before the function name to seperate from the scalar
+  // version
+  hash_options.func_name = "hash_" + options.func_name;
+  // Extend input types with group id. Group id is appended by the group
+  // aggregation node. Here we change both arity and input types
+  if (options.arity.is_varargs) {
+    hash_options.arity = options.arity;
+  } else {
+    hash_options.arity = compute::Arity(options.arity.num_args + 1, false);
+  }
+  // Changing the function doc shouldn't be necessarily because group id
+  // is not user visible, however, this is currently needed to pass the
+  // function validation. The name group_id_array is consistent with
+  // hash kernels in hash_aggregate.cc
+  hash_options.func_doc = options.func_doc;
+  hash_options.func_doc.arg_names.emplace_back("group_id_array");
+  std::vector<std::shared_ptr<DataType>> input_dtypes = options.input_types;
+  input_dtypes.emplace_back(uint32());
+  hash_options.input_types = std::move(input_dtypes);
+  hash_options.output_type = options.output_type;
+  return hash_options;
+}
+
+Status RegisterHashAggregateFunction(PyObject* function, UdfWrapperCallback cb,
+                                     const UdfOptions& options,
+                                     compute::FunctionRegistry* registry) {
+  if (!PyCallable_Check(function)) {
+    return Status::TypeError("Expected a callable Python object.");
+  }
+
+  if (registry == NULLPTR) {
+    registry = compute::GetFunctionRegistry();
+  }
+
+  // Py_INCREF here so that once a function is registered
+  // its refcount gets increased by 1 and doesn't get gced
+  // if all existing refs are gone
+  Py_INCREF(function);
+  UdfOptions hash_options = AdjustForHashAggregate(options);
+
+  std::vector<compute::InputType> input_types;
+  for (const auto& in_dtype : hash_options.input_types) {
+    input_types.emplace_back(in_dtype);
+  }
+  compute::OutputType output_type(hash_options.output_type);
+
+  static auto default_hash_aggregate_options =
+      compute::ScalarAggregateOptions::Defaults();
+  auto hash_aggregate_func = std::make_shared<compute::HashAggregateFunction>(
+      hash_options.func_name, hash_options.arity, hash_options.func_doc,
+      &default_hash_aggregate_options);
+
+  compute::KernelInit init = [function, cb, hash_options](
+                                 compute::KernelContext* ctx,
+                                 const compute::KernelInitArgs& args)
+      -> Result<std::unique_ptr<compute::KernelState>> {
+    return std::make_unique<PythonUdfHashAggregatorImpl>(
+        std::make_shared<OwnedRefNoGIL>(function), cb, hash_options.input_types,
+        hash_options.output_type);
+  };
+
+  auto sig = compute::KernelSignature::Make(
+      std::move(input_types), std::move(output_type), hash_options.arity.is_varargs);
+
+  compute::HashAggregateKernel kernel(
+      std::move(sig), std::move(init), HashAggregateUdfResize, HashAggregateUdfConsume,
+      HashAggregateUdfMerge, HashAggregateUdfFinalize, /*ordered=*/false);
+  RETURN_NOT_OK(hash_aggregate_func->AddKernel(std::move(kernel)));
+  RETURN_NOT_OK(registry->AddFunction(std::move(hash_aggregate_func)));
+  return Status::OK();
+}
+
+Status RegisterAggregateFunction(PyObject* function, UdfWrapperCallback cb,
+                                 const UdfOptions& options,
+                                 compute::FunctionRegistry* registry) {
+  RETURN_NOT_OK(RegisterScalarAggregateFunction(function, cb, options, registry));
+  RETURN_NOT_OK(RegisterHashAggregateFunction(function, cb, options, registry));
+
+  return Status::OK();
+}
+
+Result<std::shared_ptr<RecordBatchReader>> CallTabularFunction(
+    const std::string& func_name, const std::vector<Datum>& args,
+    compute::FunctionRegistry* registry) {
+  if (args.size() != 0) {
+    return Status::NotImplemented("non-empty arguments to tabular function");
+  }
+  if (registry == NULLPTR) {
+    registry = compute::GetFunctionRegistry();
+  }
+  ARROW_ASSIGN_OR_RAISE(auto func, registry->GetFunction(func_name));
+  if (func->kind() != compute::Function::SCALAR) {
+    return Status::Invalid("tabular function of non-scalar kind");
+  }
+  auto arity = func->arity();
+  if (arity.num_args != 0 || arity.is_varargs) {
+    return Status::NotImplemented("tabular function of non-null arity");
+  }
+  auto kernels =
+      arrow::internal::checked_pointer_cast<compute::ScalarFunction>(func)->kernels();
+  if (kernels.size() != 1) {
+    return Status::NotImplemented("tabular function with non-single kernel");
+  }
+  const compute::ScalarKernel* kernel = kernels[0];
+  auto out_type = kernel->signature->out_type();
+  if (out_type.kind() != compute::OutputType::FIXED) {
+    return Status::Invalid("tabular kernel of non-fixed kind");
+  }
+  auto datatype = out_type.type();
+  if (datatype->id() != Type::type::STRUCT) {
+    return Status::Invalid("tabular kernel with non-struct output");
+  }
+  auto struct_type = arrow::internal::checked_cast<StructType*>(datatype.get());
+  auto schema = ::arrow::schema(struct_type->fields());
+  std::vector<TypeHolder> in_types;
+  ARROW_ASSIGN_OR_RAISE(auto func_exec,
+                        GetFunctionExecutor(func_name, in_types, NULLPTR, registry));
+  auto next_func = [schema, func_exec = std::move(
+                                func_exec)]() -> Result<std::shared_ptr<RecordBatch>> {
+    std::vector<Datum> args;
+    // passed_length of -1 or 0 with args.size() of 0 leads to an empty ExecSpanIterator
+    // in exec.cc and to never invoking the source function, so 1 is passed instead
+    // TODO: GH-33612: Support batch size in user-defined tabular functions
+    ARROW_ASSIGN_OR_RAISE(auto datum, func_exec->Execute(args, /*passed_length=*/1));
+    if (!datum.is_array()) {
+      return Status::Invalid("UDF result of non-array kind");
+    }
+    std::shared_ptr<Array> array = datum.make_array();
+    if (array->length() == 0) {
+      return IterationTraits<std::shared_ptr<RecordBatch>>::End();
+    }
+    ARROW_ASSIGN_OR_RAISE(auto batch, RecordBatch::FromStructArray(std::move(array)));
+    if (!schema->Equals(batch->schema())) {
+      return Status::Invalid("UDF result with shape not conforming to schema");
+    }
+    return std::move(batch);
+  };
+  return RecordBatchReader::MakeFromIterator(MakeFunctionIterator(std::move(next_func)),
+                                             schema);
+}
+
+}  // namespace py
+}  // namespace arrow
diff --git a/src/vendored/apache-arrow-12.0.1/arrow/python/udf.h b/src/vendored/apache-arrow-12.0.1/arrow/python/udf.h
index cde97d9..682cbb2 100644
--- a/src/vendored/apache-arrow-12.0.1/arrow/python/udf.h
+++ b/src/vendored/apache-arrow-12.0.1/arrow/python/udf.h
@@ -43,29 +43,34 @@ struct ARROW_PYTHON_EXPORT UdfOptions {
   std::shared_ptr<DataType> output_type;
 };
 
-/// \brief A context passed as the first argument of scalar UDF functions.
-struct ARROW_PYTHON_EXPORT ScalarUdfContext {
+/// \brief A context passed as the first argument of UDF functions.
+struct ARROW_PYTHON_EXPORT UdfContext {
   MemoryPool* pool;
   int64_t batch_length;
 };
 
 using UdfWrapperCallback = std::function<PyObject*(
-    PyObject* user_function, const ScalarUdfContext& context, PyObject* inputs)>;
+    PyObject* user_function, const UdfContext& context, PyObject* inputs)>;
 
 /// \brief register a Scalar user-defined-function from Python
 Status ARROW_PYTHON_EXPORT RegisterScalarFunction(
-    PyObject* user_function, UdfWrapperCallback wrapper,
-    const UdfOptions& options, compute::FunctionRegistry* registry = NULLPTR);
+    PyObject* user_function, UdfWrapperCallback wrapper, const UdfOptions& options,
+    compute::FunctionRegistry* registry = NULLPTR);
 
 /// \brief register a Table user-defined-function from Python
 Status ARROW_PYTHON_EXPORT RegisterTabularFunction(
-    PyObject* user_function, UdfWrapperCallback wrapper,
-    const UdfOptions& options, compute::FunctionRegistry* registry = NULLPTR);
+    PyObject* user_function, UdfWrapperCallback wrapper, const UdfOptions& options,
+    compute::FunctionRegistry* registry = NULLPTR);
 
-Result<std::shared_ptr<RecordBatchReader>> ARROW_PYTHON_EXPORT CallTabularFunction(
-    const std::string& func_name, const std::vector<Datum>& args,
+/// \brief register a Aggregate user-defined-function from Python
+Status ARROW_PYTHON_EXPORT RegisterAggregateFunction(
+    PyObject* user_function, UdfWrapperCallback wrapper, const UdfOptions& options,
     compute::FunctionRegistry* registry = NULLPTR);
 
+Result<std::shared_ptr<RecordBatchReader>> ARROW_PYTHON_EXPORT
+CallTabularFunction(const std::string& func_name, const std::vector<Datum>& args,
+                    compute::FunctionRegistry* registry = NULLPTR);
+
 }  // namespace py
 
 }  // namespace arrow
diff --git a/vcpkg.json b/vcpkg.json
index 40d4fdf..eec37b5 100644
--- a/vcpkg.json
+++ b/vcpkg.json
@@ -4,4 +4,4 @@
     "dependencies": [
         "arrow"
     ]
-}
\ No newline at end of file
+}