From 8cce4c6f6a06c889655c571e87b65aa4b10552f6 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 12 Sep 2019 22:57:00 +0200
Subject: [PATCH] ENH: Add IntegerArray.__arrow_array__ for custom conversion
 to Arrow (#28368)

* ENH: Add IntegerArray.__arrow_array__ for custom conversion to Arrow

* simplify pyarrow version check in tests

* add whatsnew
---
 doc/source/whatsnew/v1.0.0.rst      | 10 +++++++---
 pandas/core/arrays/integer.py       |  8 ++++++++
 pandas/tests/arrays/test_integer.py | 12 ++++++++++++
 pandas/tests/io/test_parquet.py     | 12 ++++++++++++
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 9998a9a8476431..bc77553924dfab 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -20,15 +20,19 @@ including other versions of pandas.
 
 Enhancements
 ~~~~~~~~~~~~
-- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
--
+
 
 .. _whatsnew_1000.enhancements.other:
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
--
+- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
+- The :ref:`integer dtype <integer_na>` with support for missing values can now be converted to
+  ``pyarrow`` (>= 0.15.0), which means that it is supported in writing to the Parquet file format
+  when using the ``pyarrow`` engine. It is currently not yet supported when converting back to
+  pandas (so it will become an integer or float dtype depending on the presence of missing data).
+  (:issue:`28368`)
 -
 
 .. _whatsnew_1000.api_breaking:
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 069d661e6af34d..7b03bf35faf252 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -367,6 +367,14 @@ def __array__(self, dtype=None):
         """
         return self._coerce_to_ndarray()
 
+    def __arrow_array__(self, type=None):
+        """
+        Convert myself into a pyarrow Array.
+        """
+        import pyarrow as pa
+
+        return pa.array(self._data, mask=self._mask, type=type)
+
     _HANDLED_TYPES = (np.ndarray, numbers.Number)
 
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 31a9a0483081ed..55e25caafc4ee4 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -1,6 +1,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 from pandas.core.dtypes.generic import ABCIndexClass
 
 import pandas as pd
@@ -817,6 +819,16 @@ def test_ufunc_reduce_raises(values):
         np.add.reduce(a)
 
 
+@td.skip_if_no("pyarrow", min_version="0.14.1.dev")
+def test_arrow_array(data):
+    # protocol added in 0.15.0
+    import pyarrow as pa
+
+    arr = pa.array(data)
+    expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True)
+    assert arr.equals(expected)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 9573ac15dc45fa..efc2b6d6c5b3d7 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -488,6 +488,18 @@ def test_empty_dataframe(self, pa):
         df = pd.DataFrame()
         check_round_trip(df, pa)
 
+    @td.skip_if_no("pyarrow", min_version="0.14.1.dev")
+    def test_nullable_integer(self, pa):
+        df = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="Int64")})
+        # currently de-serialized as plain int
+        expected = df.assign(a=df.a.astype("int64"))
+        check_round_trip(df, pa, expected=expected)
+
+        df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
+        # if missing values currently de-serialized as float
+        expected = df.assign(a=df.a.astype("float64"))
+        check_round_trip(df, pa, expected=expected)
+
 
 class TestParquetFastParquet(Base):
     @td.skip_if_no("fastparquet", min_version="0.2.1")