From fcadfb849258a714d2cee9b68577f8d62acaeffc Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Thu, 5 Aug 2021 23:01:58 +0100 Subject: [PATCH] ENH: NumericIndex for any numpy int/uint/float dtype (#41153) --- pandas/_libs/join.pyx | 3 + pandas/_testing/__init__.py | 3 +- pandas/conftest.py | 15 +- pandas/core/api.py | 1 + pandas/core/dtypes/generic.py | 1 + pandas/core/indexes/base.py | 17 ++ pandas/core/indexes/category.py | 25 +++ pandas/core/indexes/numeric.py | 51 ++++- pandas/core/indexes/range.py | 1 + pandas/tests/base/test_unique.py | 9 +- pandas/tests/indexes/common.py | 44 ++-- pandas/tests/indexes/numeric/test_numeric.py | 217 +++++++++++++------ pandas/tests/indexes/test_any_index.py | 15 +- pandas/tests/indexes/test_base.py | 7 + pandas/tests/indexes/test_common.py | 7 +- pandas/tests/indexes/test_numpy_compat.py | 8 +- 16 files changed, 321 insertions(+), 103 deletions(-) diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index eefa16d23f576..b6acf8914c0a6 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -265,6 +265,9 @@ ctypedef fused join_t: int16_t int32_t int64_t + uint8_t + uint16_t + uint32_t uint64_t diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index fc6c7f4c17ea0..97e07a76b9149 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -105,6 +105,7 @@ use_numexpr, with_csv_dialect, ) +from pandas.core.api import NumericIndex from pandas.core.arrays import ( DatetimeArray, PandasArray, @@ -314,7 +315,7 @@ def makeNumericIndex(k=10, name=None, *, dtype): else: raise NotImplementedError(f"wrong dtype {dtype}") - return Index(values, dtype=dtype, name=name) + return NumericIndex(values, dtype=dtype, name=name) def makeIntIndex(k=10, name=None): diff --git a/pandas/conftest.py b/pandas/conftest.py index 218fae7ecd969..460f34fd02109 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -460,6 +460,16 @@ def _create_mi_with_dt64tz_level(): "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), + "num_int64": tm.makeNumericIndex(100, dtype="int64"), + "num_int32": tm.makeNumericIndex(100, dtype="int32"), + "num_int16": tm.makeNumericIndex(100, dtype="int16"), + "num_int8": tm.makeNumericIndex(100, dtype="int8"), + "num_uint64": tm.makeNumericIndex(100, dtype="uint64"), + "num_uint32": tm.makeNumericIndex(100, dtype="uint32"), + "num_uint16": tm.makeNumericIndex(100, dtype="uint16"), + "num_uint8": tm.makeNumericIndex(100, dtype="uint8"), + "num_float64": tm.makeNumericIndex(100, dtype="float64"), + "num_float32": tm.makeNumericIndex(100, dtype="float32"), "bool": tm.makeBoolIndex(10), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), @@ -511,7 +521,10 @@ def index_flat(request): params=[ key for key in indices_dict - if key not in ["int", "uint", "range", "empty", "repeats"] + if not ( + key in ["int", "uint", "range", "empty", "repeats"] + or key.startswith("num_") + ) and not isinstance(indices_dict[key], MultiIndex) ] ) diff --git a/pandas/core/api.py b/pandas/core/api.py index 2677530455b07..a03293ce13144 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -57,6 +57,7 @@ Int64Index, IntervalIndex, MultiIndex, + NumericIndex, PeriodIndex, RangeIndex, TimedeltaIndex, diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 2de7b262c3533..d6dbc83934db0 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -100,6 +100,7 @@ def _check(cls, inst) -> bool: "rangeindex", "float64index", "uint64index", + "numericindex", "multiindex", "datetimeindex", "timedeltaindex", diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6694d0bcc82e0..87c50e94deb34 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -81,6 +81,7 @@ is_interval_dtype, is_iterator, is_list_like, + is_numeric_dtype, is_object_dtype, is_scalar, is_signed_integer_dtype, @@ -360,6 +361,11 @@ def _outer_indexer( _can_hold_na: bool = True _can_hold_strings: bool = True + # Whether this index is a NumericIndex, but not a Int64Index, Float64Index, + # UInt64Index or RangeIndex. Needed for backwards compat. Remove this attribute and + # associated code in pandas 2.0. + _is_backward_compat_public_numeric_index: bool = False + _engine_type: type[libindex.IndexEngine] = libindex.ObjectEngine # whether we support partial string indexing. Overridden # in DatetimeIndex and PeriodIndex @@ -437,6 +443,12 @@ def __new__( return Index._simple_new(data, name=name) # index-like + elif ( + isinstance(data, Index) + and data._is_backward_compat_public_numeric_index + and dtype is None + ): + return data._constructor(data, name=name, copy=copy) elif isinstance(data, (np.ndarray, Index, ABCSeries)): if isinstance(data, ABCMultiIndex): @@ -5726,6 +5738,11 @@ def map(self, mapper, na_action=None): # empty attributes["dtype"] = self.dtype + if self._is_backward_compat_public_numeric_index and is_numeric_dtype( + new_values.dtype + ): + return self._constructor(new_values, **attributes) + return Index(new_values, **attributes) # TODO: De-duplicate with map, xref GH#32349 diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7339c82cbcc77..2faf2cab75117 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -21,6 +21,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_scalar, + pandas_dtype, ) from pandas.core.dtypes.missing import ( is_valid_na_for_dtype, @@ -280,6 +281,30 @@ def _is_dtype_compat(self, other) -> Categorical: return other + @doc(Index.astype) + def astype(self, dtype: Dtype, copy: bool = True) -> Index: + from pandas.core.api import NumericIndex + + dtype = pandas_dtype(dtype) + + categories = self.categories + # the super method always returns Int64Index, UInt64Index and Float64Index + # but if the categories are a NumericIndex with dtype float32, we want to + # return an index with the same dtype as self.categories. + if categories._is_backward_compat_public_numeric_index: + assert isinstance(categories, NumericIndex) # mypy complaint fix + try: + categories._validate_dtype(dtype) + except ValueError: + pass + else: + new_values = self._data.astype(dtype, copy=copy) + # pass copy=False because any copying has been done in the + # _data.astype call above + return categories._constructor(new_values, name=self.name, copy=False) + + return super().astype(dtype, copy=copy) + def equals(self, other: object) -> bool: """ Determine if two CategoricalIndex objects contain the same elements. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index d31f6d6a252f3..08881f915ef2d 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -97,6 +97,7 @@ class NumericIndex(Index): ) _is_numeric_dtype = True _can_hold_strings = False + _is_backward_compat_public_numeric_index: bool = True @cache_readonly def _can_hold_na(self) -> bool: @@ -165,7 +166,15 @@ def _ensure_array(cls, data, dtype, copy: bool): dtype = cls._ensure_dtype(dtype) if copy or not is_dtype_equal(data.dtype, dtype): - subarr = np.array(data, dtype=dtype, copy=copy) + # TODO: the try/except below is because it's difficult to predict the error + # and/or error message from different combinations of data and dtype. + # Efforts to avoid this try/except welcome. + # See https://github.com/pandas-dev/pandas/pull/41153#discussion_r676206222 + try: + subarr = np.array(data, dtype=dtype, copy=copy) + cls._validate_dtype(subarr.dtype) + except (TypeError, ValueError): + raise ValueError(f"data is not compatible with {cls.__name__}") cls._assert_safe_casting(data, subarr) else: subarr = data @@ -189,12 +198,24 @@ def _validate_dtype(cls, dtype: Dtype | None) -> None: ) @classmethod - def _ensure_dtype( - cls, - dtype: Dtype | None, - ) -> np.dtype | None: - """Ensure int64 dtype for Int64Index, etc. Assumed dtype is validated.""" - return cls._default_dtype + def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: + """ + Ensure int64 dtype for Int64Index etc. but allow int32 etc. for NumericIndex. + + Assumes dtype has already been validated. + """ + if dtype is None: + return cls._default_dtype + + dtype = pandas_dtype(dtype) + assert isinstance(dtype, np.dtype) + + if cls._is_backward_compat_public_numeric_index: + # dtype for NumericIndex + return dtype + else: + # dtype for Int64Index, UInt64Index etc. Needed for backwards compat. + return cls._default_dtype def __contains__(self, key) -> bool: """ @@ -214,8 +235,8 @@ def __contains__(self, key) -> bool: @doc(Index.astype) def astype(self, dtype, copy=True): + dtype = pandas_dtype(dtype) if is_float_dtype(self.dtype): - dtype = pandas_dtype(dtype) if needs_i8_conversion(dtype): raise TypeError( f"Cannot convert Float64Index to dtype {dtype}; integer " @@ -225,7 +246,16 @@ def astype(self, dtype, copy=True): # TODO(jreback); this can change once we have an EA Index type # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) - return Int64Index(arr, name=self.name) + if isinstance(self, Float64Index): + return Int64Index(arr, name=self.name) + else: + return NumericIndex(arr, name=self.name, dtype=dtype) + elif self._is_backward_compat_public_numeric_index: + # this block is needed so e.g. NumericIndex[int8].astype("int32") returns + # NumericIndex[int32] and not Int64Index with dtype int64. + # When Int64Index etc. are removed from the code base, removed this also. + if not is_extension_array_dtype(dtype) and is_numeric_dtype(dtype): + return self._constructor(self, dtype=dtype, copy=copy) return super().astype(dtype, copy=copy) @@ -335,6 +365,8 @@ class IntegerIndex(NumericIndex): This is an abstract class for Int64Index, UInt64Index. """ + _is_backward_compat_public_numeric_index: bool = False + @property def asi8(self) -> np.ndarray: # do not cache or you'll create a memory leak @@ -399,3 +431,4 @@ class Float64Index(NumericIndex): _engine_type = libindex.Float64Engine _default_dtype = np.dtype(np.float64) _dtype_validation_metadata = (is_float_dtype, "float") + _is_backward_compat_public_numeric_index: bool = False diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 0ce99df44a5f9..71bc4af78db6b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -101,6 +101,7 @@ class RangeIndex(NumericIndex): _engine_type = libindex.Int64Engine _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer") _range: range + _is_backward_compat_public_numeric_index: bool = False # -------------------------------------------------------------------- # Constructors diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index cabe766a4e9eb..6ca5f2f76861e 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -10,6 +10,7 @@ import pandas as pd import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.tests.base.common import allow_na_ops @@ -24,6 +25,9 @@ def test_unique(index_or_series_obj): expected = pd.MultiIndex.from_tuples(unique_values) expected.names = obj.names tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): @@ -62,7 +66,10 @@ def test_unique_null(null_obj, index_or_series_obj): unique_values_not_null = [val for val in unique_values_raw if not pd.isnull(val)] unique_values = [null_obj] + unique_values_not_null - if isinstance(obj, pd.Index): + if isinstance(obj, pd.Index) and obj._is_backward_compat_public_numeric_index: + expected = NumericIndex(unique_values, dtype=obj.dtype) + tm.assert_index_equal(result, expected, exact=True) + elif isinstance(obj, pd.Index): expected = pd.Index(unique_values, dtype=obj.dtype) if is_datetime64tz_dtype(obj.dtype): result = result.normalize() diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e02b2559bb8ae..2c4067c347a35 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -9,7 +9,12 @@ from pandas._libs import iNaT from pandas._libs.tslibs import Timestamp -from pandas.core.dtypes.common import is_datetime64tz_dtype +from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, + is_float_dtype, + is_integer_dtype, + is_unsigned_integer_dtype, +) from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -25,10 +30,11 @@ RangeIndex, Series, TimedeltaIndex, - UInt64Index, isna, ) +from pandas import UInt64Index # noqa:F401 import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -351,12 +357,13 @@ def test_numpy_argsort(self, index): def test_repeat(self, simple_index): rep = 2 idx = simple_index.copy() - expected = Index(idx.values.repeat(rep), name=idx.name) + new_index_cls = Int64Index if isinstance(idx, RangeIndex) else idx._constructor + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) tm.assert_index_equal(idx.repeat(rep), expected) idx = simple_index rep = np.arange(len(idx)) - expected = Index(idx.values.repeat(rep), name=idx.name) + expected = new_index_cls(idx.values.repeat(rep), name=idx.name) tm.assert_index_equal(idx.repeat(rep), expected) def test_numpy_repeat(self, simple_index): @@ -531,10 +538,10 @@ def test_hasnans_isnans(self, index_flat): if len(index) == 0: return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return elif isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): - return else: values[1] = np.nan @@ -551,7 +558,9 @@ def test_hasnans_isnans(self, index_flat): def test_fillna(self, index): # GH 11343 if len(index) == 0: - pass + return + elif isinstance(index, NumericIndex) and is_integer_dtype(index.dtype): + return elif isinstance(index, MultiIndex): idx = index.copy(deep=True) msg = "isna is not defined for MultiIndex" @@ -572,8 +581,6 @@ def test_fillna(self, index): if isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): - return else: values[1] = np.nan @@ -622,8 +629,10 @@ def test_map(self, simple_index): idx = simple_index # we don't infer UInt64 - if isinstance(idx, UInt64Index): + if is_integer_dtype(idx.dtype): expected = idx.astype("int64") + elif is_float_dtype(idx.dtype): + expected = idx.astype("float64") else: expected = idx @@ -647,7 +656,7 @@ def test_map_dictlike(self, mapper, simple_index): identity = mapper(idx.values, idx) # we don't infer to UInt64 for a dict - if isinstance(idx, UInt64Index) and isinstance(identity, dict): + if is_unsigned_integer_dtype(idx.dtype) and isinstance(identity, dict): expected = idx.astype("int64") else: expected = idx @@ -657,7 +666,12 @@ def test_map_dictlike(self, mapper, simple_index): tm.assert_index_equal(result, expected) # empty mappable - expected = Index([np.nan] * len(idx)) + if idx._is_backward_compat_public_numeric_index: + new_index_cls = NumericIndex + else: + new_index_cls = Float64Index + + expected = new_index_cls([np.nan] * len(idx)) result = idx.map(mapper(expected, idx)) tm.assert_index_equal(result, expected) @@ -782,9 +796,11 @@ class NumericBase(Base): """ def test_constructor_unwraps_index(self, dtype): + index_cls = self._index_cls + idx = Index([1, 2], dtype=dtype) - result = self._index_cls(idx) - expected = np.array([1, 2], dtype=dtype) + result = index_cls(idx) + expected = np.array([1, 2], dtype=idx.dtype) tm.assert_numpy_array_equal(result._data, expected) def test_where(self): diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 8cbca0ba8eb65..e7dd547b3e73e 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -12,19 +12,18 @@ UInt64Index, ) import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.tests.indexes.common import NumericBase -class TestFloat64Index(NumericBase): - _index_cls = Float64Index +class TestFloatNumericIndex(NumericBase): + _index_cls = NumericIndex - @pytest.fixture(params=[np.float64]) + @pytest.fixture(params=[np.float64, np.float32]) def dtype(self, request): return request.param - @pytest.fixture( - params=["int64", "uint64", "category", "datetime64", "object"], - ) + @pytest.fixture(params=["category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -54,7 +53,7 @@ def float_index(self, dtype): return self._index_cls([0.0, 2.5, 5.0, 7.5, 10.0], dtype=dtype) def test_repr_roundtrip(self, index): - tm.assert_index_equal(eval(repr(index)), index) + tm.assert_index_equal(eval(repr(index)), index, exact=True) def check_is_index(self, idx): assert isinstance(idx, Index) @@ -92,11 +91,11 @@ def test_constructor(self, dtype): assert isinstance(index, index_cls) assert index.dtype == dtype - index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + index = index_cls([1.0, 2, 3, 4, 5], dtype=dtype) assert isinstance(index, index_cls) assert index.dtype == dtype - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=dtype) assert isinstance(index, index_cls) assert index.dtype == dtype @@ -107,11 +106,6 @@ def test_constructor(self, dtype): result = index_cls(np.array([np.nan]), dtype=dtype) assert pd.isna(result.values).all() - result = Index(np.array([np.nan], dtype=dtype)) - assert isinstance(result, index_cls) - assert result.dtype == dtype - assert pd.isna(result.values).all() - def test_constructor_invalid(self): index_cls = self._index_cls cls_name = index_cls.__name__ @@ -134,10 +128,8 @@ def test_constructor_invalid(self): with pytest.raises((TypeError, ValueError), match=msg): index_cls(["a", "b", 0.0]) - msg = ( - r"float\(\) argument must be a string or a( real)? number, not 'Timestamp'" - ) - with pytest.raises(TypeError, match=msg): + msg = f"data is not compatible with {index_cls.__name__}" + with pytest.raises(ValueError, match=msg): index_cls([Timestamp("20130101")]) def test_constructor_coerce(self, mixed_index, float_index): @@ -168,7 +160,7 @@ def test_type_coercion_valid(self, float_dtype): # There is no Float32Index, so we always # generate Float64Index. idx = Index([1, 2, 3.5], dtype=float_dtype) - tm.assert_index_equal(idx, Index([1, 2, 3.5])) + tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) def test_equals_numeric(self): index_cls = self._index_cls @@ -263,19 +255,54 @@ def test_nan_multiple_containment(self): tm.assert_numpy_array_equal(idx.isin([np.nan]), np.array([False, False])) def test_fillna_float64(self): + index_cls = self._index_cls # GH 11343 idx = Index([1.0, np.nan, 3.0], dtype=float, name="x") # can't downcast exp = Index([1.0, 0.1, 3.0], name="x") - tm.assert_index_equal(idx.fillna(0.1), exp) + tm.assert_index_equal(idx.fillna(0.1), exp, exact=True) # downcast - exp = self._index_cls([1.0, 2.0, 3.0], name="x") - tm.assert_index_equal(idx.fillna(2), exp) + exact = True if index_cls is Int64Index else "equiv" + exp = index_cls([1.0, 2.0, 3.0], name="x") + tm.assert_index_equal(idx.fillna(2), exp, exact=exact) # object exp = Index([1.0, "obj", 3.0], name="x") - tm.assert_index_equal(idx.fillna("obj"), exp) + tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) + + +class TestFloat64Index(TestFloatNumericIndex): + _index_cls = Float64Index + + @pytest.fixture + def dtype(self, request): + return np.float64 + + @pytest.fixture( + params=["int64", "uint64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param + + def test_constructor_from_base_index(self, dtype): + index_cls = self._index_cls + + result = Index(np.array([np.nan], dtype=dtype)) + assert isinstance(result, index_cls) + assert result.dtype == dtype + assert pd.isna(result.values).all() + + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) + assert isinstance(index, index_cls) + assert index.dtype == np.float64 class NumericInt(NumericBase): @@ -287,10 +314,10 @@ def test_view(self, dtype): assert idx_view.name == "Foo" idx_view = idx.view(dtype) - tm.assert_index_equal(idx, index_cls(idx_view, name="Foo")) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) idx_view = idx.view(index_cls) - tm.assert_index_equal(idx, index_cls(idx_view, name="Foo")) + tm.assert_index_equal(idx, index_cls(idx_view, name="Foo"), exact=True) def test_is_monotonic(self): index_cls = self._index_cls @@ -380,16 +407,14 @@ def test_prevent_casting(self, simple_index): assert result.dtype == np.object_ -class TestInt64Index(NumericInt): - _index_cls = Int64Index +class TestIntNumericIndex(NumericInt): + _index_cls = NumericIndex - @pytest.fixture(params=[np.int64]) + @pytest.fixture(params=[np.int64, np.int32, np.int16, np.int8]) def dtype(self, request): return request.param - @pytest.fixture( - params=["uint64", "float64", "category", "datetime64", "object"], - ) + @pytest.fixture(params=["category", "datetime64", "object"]) def invalid_dtype(self, request): return request.param @@ -406,15 +431,6 @@ def index(self, request, dtype): def test_constructor(self, dtype): index_cls = self._index_cls - # pass list, coerce fine - index = index_cls([-5, 0, 1, 2], dtype=dtype) - expected = Index([-5, 0, 1, 2], dtype=dtype) - tm.assert_index_equal(index, expected) - - # from iterable - index = index_cls(iter([-5, 0, 1, 2])) - tm.assert_index_equal(index, expected) - # scalar raise Exception msg = ( rf"{index_cls.__name__}\(\.\.\.\) must be called with a collection of some " @@ -424,41 +440,54 @@ def test_constructor(self, dtype): index_cls(5) # copy + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) arr = index.values new_index = index_cls(arr, copy=True) - tm.assert_index_equal(new_index, index) + tm.assert_index_equal(new_index, index, exact=True) val = arr[0] + 3000 # this should not change index arr[0] = val assert new_index[0] != val - # interpret list-like - expected = index_cls([5, 0]) - for cls in [Index, index_cls]: - for idx in [ - cls([5, 0], dtype=dtype), - cls(np.array([5, 0]), dtype=dtype), - cls(Series([5, 0]), dtype=dtype), - ]: - tm.assert_index_equal(idx, expected) + if dtype == np.int64: + exact = "equiv" if index_cls != Int64Index else True + + # pass list, coerce fine + index = index_cls([-5, 0, 1, 2], dtype=dtype) + expected = Index([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # from iterable + index = index_cls(iter([-5, 0, 1, 2]), dtype=dtype) + expected = index_cls([-5, 0, 1, 2], dtype=dtype) + tm.assert_index_equal(index, expected, exact=exact) + + # interpret list-like + expected = index_cls([5, 0], dtype=dtype) + for cls in [Index, index_cls]: + for idx in [ + cls([5, 0], dtype=dtype), + cls(np.array([5, 0]), dtype=dtype), + cls(Series([5, 0]), dtype=dtype), + ]: + tm.assert_index_equal(idx, expected, exact=exact) def test_constructor_corner(self, dtype): index_cls = self._index_cls arr = np.array([1, 2, 3, 4], dtype=object) - index = index_cls(arr) - assert index.values.dtype == dtype - tm.assert_index_equal(index, Index(arr)) + index = index_cls(arr, dtype=dtype) + assert index.values.dtype == index.dtype + if dtype == np.int64: + exact = True if index_cls is Int64Index else "equiv" + tm.assert_index_equal(index, Index(arr), exact=exact) # preventing casting arr = np.array([1, "2", 3, "4"], dtype=object) with pytest.raises(TypeError, match="casting"): - index_cls(arr) - - arr_with_floats = [0, 2, 3, 4, 5, 1.25, 3, -1] - with pytest.raises(TypeError, match="casting"): - index_cls(arr_with_floats) + index_cls(arr, dtype=dtype) def test_constructor_coercion_signed_to_unsigned(self, uint_dtype): @@ -478,20 +507,43 @@ def test_coerce_list(self): assert type(arr) is Index -class TestUInt64Index(NumericInt): - - _index_cls = UInt64Index +class TestInt64Index(TestIntNumericIndex): + _index_cls = Int64Index @pytest.fixture def dtype(self): - return np.uint64 + return np.int64 @pytest.fixture( - params=["int64", "float64", "category", "datetime64", "object"], + params=["float64", "uint64", "object", "category", "datetime64"], ) def invalid_dtype(self, request): return request.param + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) + assert isinstance(index, index_cls) + assert index.dtype == np.int64 + + +class TestUIntNumericIndex(NumericInt): + + _index_cls = NumericIndex + + @pytest.fixture(params=[np.uint64]) + def dtype(self, request): + return request.param + + @pytest.fixture(params=["category", "datetime64", "object"]) + def invalid_dtype(self, request): + return request.param + @pytest.fixture def simple_index(self, dtype): # compat with shared Int64/Float64 tests @@ -505,31 +557,47 @@ def simple_index(self, dtype): ids=["index_inc", "index_dec"], ) def index(self, request): - return self._index_cls(request.param) + return self._index_cls(request.param, dtype=np.uint64) + + +class TestUInt64Index(TestUIntNumericIndex): + + _index_cls = UInt64Index + + @pytest.fixture + def dtype(self): + return np.uint64 + + @pytest.fixture( + params=["int64", "float64", "object", "category", "datetime64"], + ) + def invalid_dtype(self, request): + return request.param def test_constructor(self, dtype): index_cls = self._index_cls + exact = True if index_cls is UInt64Index else "equiv" idx = index_cls([1, 2, 3]) res = Index([1, 2, 3], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = index_cls([1, 2 ** 63]) res = Index([1, 2 ** 63], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = index_cls([1, 2 ** 63]) res = Index([1, 2 ** 63]) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) idx = Index([-1, 2 ** 63], dtype=object) res = Index(np.array([-1, 2 ** 63], dtype=object)) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) # https://github.com/pandas-dev/pandas/issues/29526 idx = index_cls([1, 2 ** 63 + 1], dtype=dtype) res = Index([1, 2 ** 63 + 1], dtype=dtype) - tm.assert_index_equal(res, idx) + tm.assert_index_equal(res, idx, exact=exact) def test_constructor_does_not_cast_to_float(self): # https://github.com/numpy/numpy/issues/19146 @@ -538,6 +606,17 @@ def test_constructor_does_not_cast_to_float(self): result = UInt64Index(values) assert list(result) == values + def test_constructor_32bit(self, dtype): + index_cls = self._index_cls + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + + index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) + assert isinstance(index, index_cls) + assert index.dtype == np.uint64 + @pytest.mark.parametrize( "box", diff --git a/pandas/tests/indexes/test_any_index.py b/pandas/tests/indexes/test_any_index.py index f7dcaa628228b..510d76ebe4407 100644 --- a/pandas/tests/indexes/test_any_index.py +++ b/pandas/tests/indexes/test_any_index.py @@ -5,8 +5,11 @@ """ import re +import numpy as np import pytest +from pandas.core.dtypes.common import is_float_dtype + import pandas._testing as tm @@ -47,7 +50,17 @@ def test_mutability(index): def test_map_identity_mapping(index): # GH#12766 - tm.assert_index_equal(index, index.map(lambda x: x)) + result = index.map(lambda x: x) + if index._is_backward_compat_public_numeric_index: + if is_float_dtype(index.dtype): + expected = index.astype(np.float64) + elif index.dtype == np.uint64: + expected = index.astype(np.uint64) + else: + expected = index.astype(np.int64) + else: + expected = index + tm.assert_index_equal(result, expected) def test_wrong_number_names(index): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 826649358e663..a84e83e0f54b6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -35,6 +35,7 @@ period_range, ) import pandas._testing as tm +from pandas.api.types import is_float_dtype from pandas.core.indexes.api import ( Index, MultiIndex, @@ -713,6 +714,12 @@ def test_map_dictlike(self, index, mapper): if index.empty: # to match proper result coercion for uints expected = Index([]) + elif index._is_backward_compat_public_numeric_index: + if is_float_dtype(index.dtype): + exp_dtype = np.float64 + else: + exp_dtype = np.int64 + expected = index._constructor(np.arange(len(index), 0, -1), dtype=exp_dtype) else: expected = Index(np.arange(len(index), 0, -1)) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 882e708a357c8..8facaf279f2cf 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -26,6 +26,7 @@ TimedeltaIndex, ) import pandas._testing as tm +from pandas.core.api import NumericIndex class TestCommon: @@ -261,7 +262,8 @@ def test_drop_duplicates(self, index_flat, keep): # make unique index holder = type(index) unique_values = list(set(index)) - unique_idx = holder(unique_values) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) # make duplicated index n = len(unique_idx) @@ -289,7 +291,8 @@ def test_drop_duplicates_no_duplicates(self, index_flat): else: holder = type(index) unique_values = list(set(index)) - unique_idx = holder(unique_values) + dtype = index.dtype if isinstance(index, NumericIndex) else None + unique_idx = holder(unique_values, dtype=dtype) # check on unique index expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 92adc0570dee1..80ba0c53fb9c4 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -5,13 +5,11 @@ DatetimeIndex, Float64Index, Index, - Int64Index, PeriodIndex, - RangeIndex, TimedeltaIndex, - UInt64Index, ) import pandas._testing as tm +from pandas.core.api import NumericIndex from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -51,7 +49,7 @@ def test_numpy_ufuncs_basic(index, func): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): + elif isinstance(index, NumericIndex): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) @@ -96,7 +94,7 @@ def test_numpy_ufuncs_other(index, func, request): with tm.external_error_raised(TypeError): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): + elif isinstance(index, NumericIndex): # Results in bool array result = func(index) assert isinstance(result, np.ndarray)