diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 65bfd8289fe3d..0c86d1da55233 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -145,6 +145,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`) +- Deprecated ``astype`` of datetimelike (``timedelta64[ns]``, ``datetime64[ns]``, ``Datetime64TZDtype``, ``PeriodDtype``) to integer dtypes, use ``values.view(...)`` instead (:issue:`38544`) - - diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 79ecf8620c70c..a25bc590f4d83 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -352,6 +352,14 @@ def astype(self, dtype, copy=True): elif is_integer_dtype(dtype): # we deliberately ignore int32 vs. int64 here. # See https://github.com/pandas-dev/pandas/issues/24381 for more. + warnings.warn( + f"casting {self.dtype} values to int64 with .astype(...) is " + "deprecated and will raise in a future version. " + "Use .view(...) instead.", + FutureWarning, + stacklevel=3, + ) + values = self.asi8 if is_unsigned_integer_dtype(dtype): diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d1c16de05ce55..1c6e378d07e20 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1,7 +1,6 @@ """ Routines for casting. """ - from contextlib import suppress from datetime import datetime, timedelta from typing import ( @@ -17,6 +16,7 @@ Type, Union, ) +import warnings import numpy as np @@ -997,6 +997,14 @@ def astype_nansafe( elif is_datetime64_dtype(arr): if dtype == np.int64: + warnings.warn( + f"casting {arr.dtype} values to int64 with .astype(...) " + "is deprecated and will raise in a future version. " + "Use .view(...) instead.", + FutureWarning, + # stacklevel chosen to be correct when reached via Series.astype + stacklevel=7, + ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) @@ -1009,6 +1017,14 @@ def astype_nansafe( elif is_timedelta64_dtype(arr): if dtype == np.int64: + warnings.warn( + f"casting {arr.dtype} values to int64 with .astype(...) " + "is deprecated and will raise in a future version. " + "Use .view(...) instead.", + FutureWarning, + # stacklevel chosen to be correct when reached via Series.astype + stacklevel=7, + ) if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index c8db0157ba219..52f71f8c8f505 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -184,13 +184,18 @@ def test_astype_copies(self, dtype, other): @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")]) - result = arr.astype(dtype) + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") else: expected_dtype = np.dtype("int64") - expected = arr.astype(expected_dtype) + + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/arrays/test_period.py b/pandas/tests/arrays/test_period.py index f96a15d5b2e7c..8fca2a6d83393 100644 --- a/pandas/tests/arrays/test_period.py +++ b/pandas/tests/arrays/test_period.py @@ -123,13 +123,18 @@ def test_astype(dtype): # We choose to ignore the sign and size of integers for # Period/Datetime/Timedelta astype arr = period_array(["2000", "2001", None], freq="D") - result = arr.astype(dtype) + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") else: expected_dtype = np.dtype("int64") - expected = arr.astype(expected_dtype) + + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) @@ -137,12 +142,17 @@ def test_astype(dtype): def test_astype_copies(): arr = period_array(["2000", "2001", None], freq="D") - result = arr.astype(np.int64, copy=False) + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(np.int64, copy=False) + # Add the `.base`, since we now use `.asi8` which returns a view. # We could maybe override it in PeriodArray to return ._data directly. assert result.base is arr._data - result = arr.astype(np.int64, copy=True) + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(np.int64, copy=True) assert result is not arr._data tm.assert_numpy_array_equal(result, arr._data.view("i8")) diff --git a/pandas/tests/arrays/test_timedeltas.py b/pandas/tests/arrays/test_timedeltas.py index c0567209ff91b..9d9ca41779b5a 100644 --- a/pandas/tests/arrays/test_timedeltas.py +++ b/pandas/tests/arrays/test_timedeltas.py @@ -82,13 +82,18 @@ def test_from_sequence_dtype(self): @pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"]) def test_astype_int(self, dtype): arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")]) - result = arr.astype(dtype) + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + result = arr.astype(dtype) if np.dtype(dtype).kind == "u": expected_dtype = np.dtype("uint64") else: expected_dtype = np.dtype("int64") - expected = arr.astype(expected_dtype) + + with tm.assert_produces_warning(FutureWarning): + # astype(int..) deprecated + expected = arr.astype(expected_dtype) assert result.dtype == expected_dtype tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 9e75ba0864e76..54bac7deded6c 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -719,7 +719,9 @@ def test_astype_nansafe(val, typ): msg = "Cannot convert NaT values to integer" with pytest.raises(ValueError, match=msg): - astype_nansafe(arr, dtype=typ) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # datetimelike astype(int64) deprecated + astype_nansafe(arr, dtype=typ) @pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64]) diff --git a/pandas/tests/indexes/datetimes/methods/test_astype.py b/pandas/tests/indexes/datetimes/methods/test_astype.py index 2f22236d55ff3..98d5e074091de 100644 --- a/pandas/tests/indexes/datetimes/methods/test_astype.py +++ b/pandas/tests/indexes/datetimes/methods/test_astype.py @@ -29,7 +29,8 @@ def test_astype(self): ) tm.assert_index_equal(result, expected) - result = idx.astype(int) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = idx.astype(int) expected = Int64Index( [1463356800000000000] + [-9223372036854775808] * 3, dtype=np.int64, @@ -38,7 +39,8 @@ def test_astype(self): tm.assert_index_equal(result, expected) rng = date_range("1/1/2000", periods=10, name="idx") - result = rng.astype("i8") + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, rng.asi8) @@ -48,9 +50,9 @@ def test_astype_uint(self): np.array([946684800000000000, 946771200000000000], dtype="uint64"), name="idx", ) - - tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) def test_astype_with_tz(self): diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index b4af1cb5859f0..34ce810e32273 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -197,10 +197,13 @@ def index(self, request): @pytest.mark.parametrize("subtype", ["int64", "uint64"]) def test_subtype_integer(self, index, subtype): dtype = IntervalDtype(subtype) - result = index.astype(dtype) - expected = IntervalIndex.from_arrays( - index.left.astype(subtype), index.right.astype(subtype), closed=index.closed - ) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = index.astype(dtype) + expected = IntervalIndex.from_arrays( + index.left.astype(subtype), + index.right.astype(subtype), + closed=index.closed, + ) tm.assert_index_equal(result, expected) def test_subtype_float(self, index): diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index 6182df8429e8b..409b9419cc464 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -72,13 +72,21 @@ def test_constructor(self, constructor, breaks, closed, name): ) def test_constructor_dtype(self, constructor, breaks, subtype): # GH 19262: conversion via dtype parameter - expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) + warn = None + if subtype == "int64" and breaks.dtype.kind in ["M", "m"]: + # astype(int64) deprecated + warn = FutureWarning + + with tm.assert_produces_warning(warn, check_stacklevel=False): + expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) expected = constructor(**expected_kwargs) result_kwargs = self.get_kwargs_from_breaks(breaks) iv_dtype = IntervalDtype(subtype) for dtype in (iv_dtype, str(iv_dtype)): - result = constructor(dtype=dtype, **result_kwargs) + with tm.assert_produces_warning(warn, check_stacklevel=False): + + result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) @pytest.mark.filterwarnings("ignore:Passing keywords other:FutureWarning") diff --git a/pandas/tests/indexes/period/methods/test_astype.py b/pandas/tests/indexes/period/methods/test_astype.py index 674d09c6a7a8c..943b2605363c7 100644 --- a/pandas/tests/indexes/period/methods/test_astype.py +++ b/pandas/tests/indexes/period/methods/test_astype.py @@ -37,7 +37,8 @@ def test_astype_conversion(self): ) tm.assert_index_equal(result, expected) - result = idx.astype(np.int64) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = idx.astype(np.int64) expected = Int64Index( [16937] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" ) @@ -48,15 +49,17 @@ def test_astype_conversion(self): tm.assert_index_equal(result, expected) idx = period_range("1990", "2009", freq="A", name="idx") - result = idx.astype("i8") + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = idx.astype("i8") tm.assert_index_equal(result, Index(idx.asi8, name="idx")) tm.assert_numpy_array_equal(result.values, idx.asi8) def test_astype_uint(self): arr = period_range("2000", periods=2, name="idx") expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx") - tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) def test_astype_object(self): idx = PeriodIndex([], freq="M") diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 4bd4c9f4d10fc..dce2e0172556a 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -341,9 +341,14 @@ def test_astype_preserves_name(self, index, dtype): else: index.name = "idx" + warn = None + if dtype in ["int64", "uint64"]: + if needs_i8_conversion(index.dtype): + warn = FutureWarning try: # Some of these conversions cannot succeed so we use a try / except - result = index.astype(dtype) + with tm.assert_produces_warning(warn, check_stacklevel=False): + result = index.astype(dtype) except (ValueError, TypeError, NotImplementedError, SystemError): return diff --git a/pandas/tests/indexes/timedeltas/methods/test_astype.py b/pandas/tests/indexes/timedeltas/methods/test_astype.py index 6f82e77faca7a..a849ffa98324c 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_astype.py +++ b/pandas/tests/indexes/timedeltas/methods/test_astype.py @@ -55,7 +55,8 @@ def test_astype(self): ) tm.assert_index_equal(result, expected) - result = idx.astype(int) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = idx.astype(int) expected = Int64Index( [100000000000000] + [-9223372036854775808] * 3, dtype=np.int64, name="idx" ) @@ -66,7 +67,8 @@ def test_astype(self): tm.assert_index_equal(result, expected) rng = timedelta_range("1 days", periods=10) - result = rng.astype("i8") + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = rng.astype("i8") tm.assert_index_equal(result, Index(rng.asi8)) tm.assert_numpy_array_equal(rng.asi8, result.values) @@ -75,9 +77,9 @@ def test_astype_uint(self): expected = pd.UInt64Index( np.array([3600000000000, 90000000000000], dtype="uint64") ) - - tm.assert_index_equal(arr.astype("uint64"), expected) - tm.assert_index_equal(arr.astype("uint32"), expected) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + tm.assert_index_equal(arr.astype("uint64"), expected) + tm.assert_index_equal(arr.astype("uint32"), expected) def test_astype_timedelta64(self): # GH 13149, GH 13209 diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index d7580e9f8610e..9b032da1f20ea 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -454,6 +454,9 @@ def test_astype(self, t): # coerce all mgr = create_mgr("c: f4; d: f2; e: f8") + warn = FutureWarning if t == "int64" else None + # datetimelike.astype(int64) deprecated + t = np.dtype(t) tmgr = mgr.astype(t) assert tmgr.iget(0).dtype.type == t @@ -464,7 +467,8 @@ def test_astype(self, t): mgr = create_mgr("a,b: object; c: bool; d: datetime; e: f4; f: f2; g: f8") t = np.dtype(t) - tmgr = mgr.astype(t, errors="ignore") + with tm.assert_produces_warning(warn): + tmgr = mgr.astype(t, errors="ignore") assert tmgr.iget(2).dtype.type == t assert tmgr.iget(4).dtype.type == t assert tmgr.iget(5).dtype.type == t diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5b13091470b09..eabd6a1eb0743 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -785,7 +785,7 @@ def test_constructor_dtype_datetime64(self): dtype="datetime64[ns]", ) - result = Series(Series(dates).astype(np.int64) / 1000000, dtype="M8[ms]") + result = Series(Series(dates).view(np.int64) / 1000000, dtype="M8[ms]") tm.assert_series_equal(result, expected) result = Series(dates, dtype="datetime64[ns]") @@ -800,7 +800,9 @@ def test_constructor_dtype_datetime64(self): dts = Series(dates, dtype="datetime64[ns]") # valid astype - dts.astype("int64") + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # astype(np.int64) deprecated + dts.astype("int64") # invalid casting msg = r"cannot astype a datetimelike from \[datetime64\[ns\]\] to \[int32\]" @@ -810,8 +812,10 @@ def test_constructor_dtype_datetime64(self): # ints are ok # we test with np.int64 to get similar results on # windows / 32-bit platforms - result = Series(dts, dtype=np.int64) - expected = Series(dts.astype(np.int64)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # astype(np.int64) deprecated + result = Series(dts, dtype=np.int64) + expected = Series(dts.astype(np.int64)) tm.assert_series_equal(result, expected) # invalid dates can be help as object @@ -1287,13 +1291,16 @@ def test_constructor_dtype_timedelta64(self): td = Series([np.timedelta64(1, "s")]) assert td.dtype == "timedelta64[ns]" + # FIXME: dont leave commented-out # these are frequency conversion astypes # for t in ['s', 'D', 'us', 'ms']: # with pytest.raises(TypeError): # td.astype('m8[%s]' % t) # valid astype - td.astype("int64") + with tm.assert_produces_warning(FutureWarning): + # astype(int64) deprecated + td.astype("int64") # invalid casting msg = r"cannot astype a timedelta from \[timedelta64\[ns\]\] to \[int32\]" @@ -1410,8 +1417,10 @@ def test_constructor_cant_cast_datetimelike(self, index): # ints are ok # we test with np.int64 to get similar results on # windows / 32-bit platforms - result = Series(index, dtype=np.int64) - expected = Series(index.astype(np.int64)) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # asype(np.int64) deprecated, use .view(np.int64) instead + result = Series(index, dtype=np.int64) + expected = Series(index.astype(np.int64)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize(