Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF (string): Move StringArrayNumpySemantics methods to base class #59514

Merged
merged 3 commits into from
Aug 14, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 23 additions & 33 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,12 @@ def _reduce(
axis: AxisInt | None = 0,
**kwargs,
):
if self.dtype.na_value is np.nan and name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)

if name in ["min", "max"]:
result = getattr(self, name)(skipna=skipna, axis=axis)
if keepdims:
Expand All @@ -754,6 +760,12 @@ def _reduce(

raise TypeError(f"Cannot perform reduction '{name}' with string dtype")

def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
if self.dtype.na_value is np.nan and result is libmissing.NA:
# the masked_reductions use pd.NA -> convert to np.nan
return np.nan
return super()._wrap_reduction_result(axis, result)

def min(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_min((), kwargs)
result = masked_reductions.min(
Expand All @@ -771,8 +783,11 @@ def max(self, axis=None, skipna: bool = True, **kwargs) -> Scalar:
def value_counts(self, dropna: bool = True) -> Series:
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, sort=False, dropna=dropna).astype("Int64")
result = value_counts(self._ndarray, sort=False, dropna=dropna)
result.index = result.index.astype(self.dtype)

if self.dtype.na_value is libmissing.NA:
result = result.astype("Int64")
return result

def memory_usage(self, deep: bool = False) -> int:
Expand Down Expand Up @@ -823,7 +838,13 @@ def _cmp_method(self, other, op):
# logical
result = np.zeros(len(self._ndarray), dtype="bool")
result[valid] = op(self._ndarray[valid], other)
return BooleanArray(result, mask)
res_arr = BooleanArray(result, mask)
if self.dtype.na_value is np.nan:
if op == operator.ne:
return res_arr.to_numpy(np.bool_, na_value=True)
else:
return res_arr.to_numpy(np.bool_, na_value=False)
return res_arr

_arith_method = _cmp_method

Expand Down Expand Up @@ -864,37 +885,6 @@ def _from_backing_data(self, arr: np.ndarray) -> StringArrayNumpySemantics:
# we always preserve the dtype
return NDArrayBacked._from_backing_data(self, arr)

def _reduce(
self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
):
if name in ["any", "all"]:
if name == "any":
return nanops.nanany(self._ndarray, skipna=skipna)
else:
return nanops.nanall(self._ndarray, skipna=skipna)
else:
return super()._reduce(name, skipna=skipna, keepdims=keepdims, **kwargs)

def _wrap_reduction_result(self, axis: AxisInt | None, result) -> Any:
# the masked_reductions use pd.NA
if result is libmissing.NA:
return np.nan
return super()._wrap_reduction_result(axis, result)

def _cmp_method(self, other, op):
result = super()._cmp_method(other, op)
if op == operator.ne:
return result.to_numpy(np.bool_, na_value=True)
else:
return result.to_numpy(np.bool_, na_value=False)

def value_counts(self, dropna: bool = True) -> Series:
from pandas.core.algorithms import value_counts_internal as value_counts

result = value_counts(self._ndarray, sort=False, dropna=dropna)
result.index = result.index.astype(self.dtype)
return result

# ------------------------------------------------------------------------
# String methods interface
_str_na_value = np.nan