From 9e19ecea8f8cb7c87785c1ec202456e37b8440c6 Mon Sep 17 00:00:00 2001 From: Alexander Regueiro Date: Sun, 10 Oct 2021 00:22:08 +0100 Subject: [PATCH] BUG: permit str dtype -> IntegerDtype conversions Resolves #25472, resolves #25288. --- pandas/core/arrays/integer.py | 18 +++++++++++------- pandas/core/dtypes/common.py | 14 +++++++------- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 078adeb11d3fb..b973fa83dd547 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -32,6 +32,7 @@ is_integer_dtype, is_list_like, is_object_dtype, + is_string_dtype, pandas_dtype, ) from pandas.core.dtypes.missing import isna @@ -124,12 +125,15 @@ def safe_cast(values, dtype, copy: bool): Safely cast the values to the dtype if they are equivalent, meaning floats must be equivalent to the ints. - """ + if is_string_dtype(values): + # casts from str are always safe since they raise + # a ValueError if the str cannot be parsed into an int + return values.astype(dtype, copy=copy) + try: return values.astype(dtype, casting="safe", copy=copy) except TypeError as err: - casted = values.astype(dtype, copy=copy) if (casted == values).all(): return casted @@ -143,7 +147,7 @@ def coerce_to_array( values, dtype, mask=None, copy: bool = False ) -> tuple[np.ndarray, np.ndarray]: """ - Coerce the input values array to numpy arrays with a mask + Coerce the input values array to numpy arrays with a mask. Parameters ---------- @@ -204,7 +208,9 @@ def coerce_to_array( elif is_bool_dtype(values) and is_integer_dtype(dtype): values = np.array(values, dtype=int, copy=copy) - elif not (is_integer_dtype(values) or is_float_dtype(values)): + elif not ( + is_integer_dtype(values) or is_float_dtype(values) or is_string_dtype(values) + ): raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") if mask is None: @@ -230,9 +236,7 @@ def coerce_to_array( if mask.any(): values = values.copy() values[mask] = 1 - values = safe_cast(values, dtype, copy=False) - else: - values = safe_cast(values, dtype, copy=False) + values = safe_cast(values, dtype, copy=False) return values, mask diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index a9c2b31849425..2e8641c281661 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -143,14 +143,14 @@ def ensure_python_int(value: int | np.integer) -> int: def classes(*klasses) -> Callable: - """evaluate if the tipo is a subclass of the klasses""" + """Evaluate if the tipo is a subclass of the klasses.""" return lambda tipo: issubclass(tipo, klasses) def classes_and_not_datetimelike(*klasses) -> Callable: """ - evaluate if the tipo is a subclass of the klasses - and not a datetimelike + Evaluate if the tipo is a subclass of the klasses + and not a datetimelike. """ return lambda tipo: ( issubclass(tipo, klasses) @@ -674,7 +674,7 @@ def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. - Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered as integer by this function. @@ -726,7 +726,7 @@ def is_signed_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of a signed integer dtype. - Unlike in `in_any_int_dtype`, timedelta64 instances will return False. + Unlike in `is_any_int_dtype`, timedelta64 instances will return False. The nullable Integer dtypes (e.g. pandas.Int64Dtype) are also considered as integer by this function. @@ -1521,7 +1521,7 @@ def is_complex_dtype(arr_or_dtype) -> bool: def _is_dtype(arr_or_dtype, condition) -> bool: """ - Return a boolean if the condition is satisfied for the arr_or_dtype. + Return true if the condition is satisfied for the arr_or_dtype. Parameters ---------- @@ -1580,7 +1580,7 @@ def get_dtype(arr_or_dtype) -> DtypeObj: def _is_dtype_type(arr_or_dtype, condition) -> bool: """ - Return a boolean if the condition is satisfied for the arr_or_dtype. + Return true if the condition is satisfied for the arr_or_dtype. Parameters ----------