Skip to content

Commit

Permalink
API (string): str.center with pyarrow-backed string dtype (#59624)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Sep 2, 2024
1 parent 2c76740 commit db1b8ab
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 9 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v2.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,8 @@ Conversion
Strings
^^^^^^^
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
-
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)


Interval
^^^^^^^^
Expand Down
20 changes: 18 additions & 2 deletions pandas/core/arrays/_arrow_string_mixins.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
from __future__ import annotations

from functools import partial
from typing import (
TYPE_CHECKING,
Literal,
)

import numpy as np

from pandas.compat import pa_version_under10p1
from pandas.compat import (
pa_version_under10p1,
pa_version_under17p0,
)

from pandas.core.dtypes.missing import isna

Expand Down Expand Up @@ -49,7 +53,19 @@ def _str_pad(
elif side == "right":
pa_pad = pc.utf8_rpad
elif side == "both":
pa_pad = pc.utf8_center
if pa_version_under17p0:
# GH#59624 fall back to object dtype
from pandas import array

obj_arr = self.astype(object, copy=False) # type: ignore[attr-defined]
obj = array(obj_arr, dtype=object)
result = obj._str_pad(width, side, fillchar) # type: ignore[attr-defined]
return type(self)._from_sequence(result, dtype=self.dtype) # type: ignore[attr-defined]
else:
# GH#54792
# https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
lean_left = (width % 2) == 0
pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
else:
raise ValueError(
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,7 @@ def astype(self, dtype, copy: bool = True):
_str_map = BaseStringArray._str_map
_str_startswith = ArrowStringArrayMixin._str_startswith
_str_endswith = ArrowStringArrayMixin._str_endswith
_str_pad = ArrowStringArrayMixin._str_pad

def _str_contains(
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
Expand Down Expand Up @@ -546,7 +547,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
_str_get = ArrowStringArrayMixin._str_get
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
_str_capitalize = ArrowStringArrayMixin._str_capitalize
_str_pad = ArrowStringArrayMixin._str_pad
_str_title = ArrowStringArrayMixin._str_title
_str_swapcase = ArrowStringArrayMixin._str_swapcase
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace
6 changes: 1 addition & 5 deletions pandas/tests/strings/test_case_justify.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,11 +291,7 @@ def test_center_ljust_rjust_mixed_object():


def test_center_ljust_rjust_fillchar(any_string_dtype):
if any_string_dtype == "string[pyarrow_numpy]":
pytest.skip(
"Arrow logic is different, "
"see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
)
# GH#54533, GH#54792
s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)

result = s.str.center(5, fillchar="X")
Expand Down

0 comments on commit db1b8ab

Please sign in to comment.