Skip to content

Commit

Permalink
BUG: permit str dtype -> IntegerDtype conversions
Browse files Browse the repository at this point in the history
  • Loading branch information
alexreg committed Oct 11, 2021
1 parent c021d33 commit ef7888c
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 16 deletions.
27 changes: 18 additions & 9 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

from typing import overload
from typing import (
Optional,
overload,
)
import warnings

import numpy as np
Expand Down Expand Up @@ -32,6 +35,7 @@
is_integer_dtype,
is_list_like,
is_object_dtype,
is_string_dtype,
pandas_dtype,
)
from pandas.core.dtypes.missing import isna
Expand Down Expand Up @@ -119,17 +123,20 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
return None


def safe_cast(values, dtype, copy: bool):
def safe_cast(values, dtype, inferred_type: str | None, copy: bool):
"""
Safely cast the values to the dtype if they
are equivalent, meaning floats must be equivalent to the
ints.
"""
if inferred_type in ("string", "unicode"):
# casts from str are always safe since they raise
# a ValueError if the str cannot be parsed into an int
return values.astype(dtype, copy=copy)

try:
return values.astype(dtype, casting="safe", copy=copy)
except TypeError as err:

casted = values.astype(dtype, copy=copy)
if (casted == values).all():
return casted
Expand All @@ -143,7 +150,7 @@ def coerce_to_array(
values, dtype, mask=None, copy: bool = False
) -> tuple[np.ndarray, np.ndarray]:
"""
Coerce the input values array to numpy arrays with a mask
Coerce the input values array to numpy arrays with a mask.
Parameters
----------
Expand Down Expand Up @@ -187,7 +194,9 @@ def coerce_to_array(
return values, mask

values = np.array(values, copy=copy)
if is_object_dtype(values):
inferred_type = None
# note that `is_string_dtype` subsumes `is_object_dtype`
if is_string_dtype(values):
inferred_type = lib.infer_dtype(values, skipna=True)
if inferred_type == "empty":
values = np.empty(len(values))
Expand All @@ -198,6 +207,8 @@ def coerce_to_array(
"mixed-integer",
"integer-na",
"mixed-integer-float",
"string",
"unicode",
]:
raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")

Expand Down Expand Up @@ -230,9 +241,7 @@ def coerce_to_array(
if mask.any():
values = values.copy()
values[mask] = 1
values = safe_cast(values, dtype, copy=False)
else:
values = safe_cast(values, dtype, copy=False)
values = safe_cast(values, dtype, inferred_type, copy=False)

return values, mask

Expand Down
14 changes: 7 additions & 7 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,14 +143,14 @@ def ensure_python_int(value: int | np.integer) -> int:


def classes(*klasses) -> Callable:
"""evaluate if the tipo is a subclass of the klasses"""
"""Evaluate if the tipo is a subclass of the klasses."""
return lambda tipo: issubclass(tipo, klasses)


def classes_and_not_datetimelike(*klasses) -> Callable:
"""
evaluate if the tipo is a subclass of the klasses
and not a datetimelike
Evaluate if the tipo is a subclass of the klasses
and not a datetimelike.
"""
return lambda tipo: (
issubclass(tipo, klasses)
Expand Down Expand Up @@ -674,7 +674,7 @@ def is_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of an integer dtype.
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Expand Down Expand Up @@ -726,7 +726,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool:
"""
Check whether the provided array or dtype is of a signed integer dtype.
Unlike in `in_any_int_dtype`, timedelta64 instances will return False.
Unlike in `is_any_int_dtype`, timedelta64 instances will return False.
The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered
as integer by this function.
Expand Down Expand Up @@ -1521,7 +1521,7 @@ def is_complex_dtype(arr_or_dtype) -> bool:

def _is_dtype(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Return true if the condition is satisfied for the arr_or_dtype.
Parameters
----------
Expand Down Expand Up @@ -1580,7 +1580,7 @@ def get_dtype(arr_or_dtype) -> DtypeObj:

def _is_dtype_type(arr_or_dtype, condition) -> bool:
"""
Return a boolean if the condition is satisfied for the arr_or_dtype.
Return true if the condition is satisfied for the arr_or_dtype.
Parameters
----------
Expand Down

0 comments on commit ef7888c

Please sign in to comment.