Skip to content

Commit

Permalink
ENH: Add IntegerArray.__arrow_array__ for custom conversion to Arrow (p…
Browse files Browse the repository at this point in the history
…andas-dev#28368)

* ENH: Add IntegerArray.__arrow_array__ for custom conversion to Arrow

* simplify pyarrow version check in tests

* add whatsnew
  • Loading branch information
jorisvandenbossche authored and proost committed Dec 19, 2019
1 parent 9b2f171 commit 8cce4c6
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 3 deletions.
10 changes: 7 additions & 3 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,19 @@ including other versions of pandas.

Enhancements
~~~~~~~~~~~~
- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
-


.. _whatsnew_1000.enhancements.other:

Other enhancements
^^^^^^^^^^^^^^^^^^

-
- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
- The :ref:`integer dtype <integer_na>` with support for missing values can now be converted to
``pyarrow`` (>= 0.15.0), which means that it is supported in writing to the Parquet file format
when using the ``pyarrow`` engine. It is currently not yet supported when converting back to
pandas (so it will become an integer or float dtype depending on the presence of missing data).
(:issue:`28368`)
-

.. _whatsnew_1000.api_breaking:
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,14 @@ def __array__(self, dtype=None):
"""
return self._coerce_to_ndarray()

def __arrow_array__(self, type=None):
"""
Convert myself into a pyarrow Array.
"""
import pyarrow as pa

return pa.array(self._data, mask=self._mask, type=type)

_HANDLED_TYPES = (np.ndarray, numbers.Number)

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.generic import ABCIndexClass

import pandas as pd
Expand Down Expand Up @@ -817,6 +819,16 @@ def test_ufunc_reduce_raises(values):
np.add.reduce(a)


@td.skip_if_no("pyarrow", min_version="0.14.1.dev")
def test_arrow_array(data):
# protocol added in 0.15.0
import pyarrow as pa

arr = pa.array(data)
expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True)
assert arr.equals(expected)


# TODO(jreback) - these need testing / are broken

# shift
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,18 @@ def test_empty_dataframe(self, pa):
df = pd.DataFrame()
check_round_trip(df, pa)

@td.skip_if_no("pyarrow", min_version="0.14.1.dev")
def test_nullable_integer(self, pa):
df = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="Int64")})
# currently de-serialized as plain int
expected = df.assign(a=df.a.astype("int64"))
check_round_trip(df, pa, expected=expected)

df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
# if missing values currently de-serialized as float
expected = df.assign(a=df.a.astype("float64"))
check_round_trip(df, pa, expected=expected)


class TestParquetFastParquet(Base):
@td.skip_if_no("fastparquet", min_version="0.2.1")
Expand Down

0 comments on commit 8cce4c6

Please sign in to comment.