diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2d15a7e5ccadd..1e03d9df82dd7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -20,15 +20,19 @@ including other versions of pandas. Enhancements ~~~~~~~~~~~~ -- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) -- + .. _whatsnew_1000.enhancements.other: Other enhancements ^^^^^^^^^^^^^^^^^^ -- +- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`) +- The :ref:`integer dtype ` with support for missing values can now be converted to + ``pyarrow`` (>= 0.15.0), which means that it is supported in writing to the Parquet file format + when using the ``pyarrow`` engine. It is currently not yet supported when converting back to + pandas (so it will become an integer or float dtype depending on the presence of missing data). + (:issue:`28368`) - .. _whatsnew_1000.api_breaking: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 069d661e6af34..7b03bf35faf25 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -367,6 +367,14 @@ def __array__(self, dtype=None): """ return self._coerce_to_ndarray() + def __arrow_array__(self, type=None): + """ + Convert myself into a pyarrow Array. + """ + import pyarrow as pa + + return pa.array(self._data, mask=self._mask, type=type) + _HANDLED_TYPES = (np.ndarray, numbers.Number) def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py index 31a9a0483081e..55e25caafc4ee 100644 --- a/pandas/tests/arrays/test_integer.py +++ b/pandas/tests/arrays/test_integer.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.generic import ABCIndexClass import pandas as pd @@ -817,6 +819,16 @@ def test_ufunc_reduce_raises(values): np.add.reduce(a) +@td.skip_if_no("pyarrow", min_version="0.14.1.dev") +def test_arrow_array(data): + # protocol added in 0.15.0 + import pyarrow as pa + + arr = pa.array(data) + expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True) + assert arr.equals(expected) + + # TODO(jreback) - these need testing / are broken # shift diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index d634859e72d7b..ab0daee2d4b3c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -478,6 +478,18 @@ def test_empty_dataframe(self, pa): df = pd.DataFrame() check_round_trip(df, pa) + @td.skip_if_no("pyarrow", min_version="0.14.1.dev") + def test_nullable_integer(self, pa): + df = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="Int64")}) + # currently de-serialized as plain int + expected = df.assign(a=df.a.astype("int64")) + check_round_trip(df, pa, expected=expected) + + df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")}) + # if missing values currently de-serialized as float + expected = df.assign(a=df.a.astype("float64")) + check_round_trip(df, pa, expected=expected) + class TestParquetFastParquet(Base): @td.skip_if_no("fastparquet", min_version="0.2.1")