From c31e5410c88444f1789ee6a3c83424f40a0ba11a Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 12 Apr 2017 13:39:10 -0400 Subject: [PATCH 1/2] DEPR: Deprecate generic timestamp dtypes We only use the nanosecond frequency, and numpy doesn't even handle generic timestamp dtypes well. xref gh-15524 (comment). --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/tests/series/test_constructors.py | 24 ++++++++++++++++++ pandas/tests/series/test_dtypes.py | 32 ++++++++++++++++++++++++ pandas/types/cast.py | 25 ++++++++++++++++-- 4 files changed, 80 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index a105a6801fb61..cb3e20e50380b 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1204,6 +1204,7 @@ Deprecations - ``SparseArray.to_dense()`` has deprecated the ``fill`` parameter, as that parameter was not being respected (:issue:`14647`) - ``SparseSeries.to_dense()`` has deprecated the ``sparse_only`` parameter (:issue:`14647`) - ``Series.repeat()`` has deprecated the ``reps`` parameter in favor of ``repeats`` (:issue:`12662`) +- The ``Series`` constructor and ``.astype`` method have deprecated accepting timestamp dtypes without a frequency (e.g. ``np.datetime64``) for the ``dtype`` parameter (:issue:`15524`) - ``Index.repeat()`` and ``MultiIndex.repeat()`` have deprecated the ``n`` parameter in favor of ``repeats`` (:issue:`12662`) - ``Categorical.searchsorted()`` and ``Series.searchsorted()`` have deprecated the ``v`` parameter in favor of ``value`` (:issue:`12662`) - ``TimedeltaIndex.searchsorted()``, ``DatetimeIndex.searchsorted()``, and ``PeriodIndex.searchsorted()`` have deprecated the ``key`` parameter in favor of ``value`` (:issue:`12662`) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index dbe2db67359f3..c429be2680f18 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -839,3 +839,27 @@ def test_constructor_cast_object(self): s = Series(date_range('1/1/2000', periods=10), dtype=object) exp = Series(date_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) + + def test_constructor_generic_timestamp(self): + # see gh-15524 + dtype = np.timedelta64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'm8[ns]' + + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' + + # These timestamps have the wrong frequencies, + # so an Exception should be raised now. + msg = "cannot convert timedeltalike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='m8[ps]') + + msg = "cannot convert datetimelike" + with tm.assertRaisesRegexp(TypeError, msg): + Series([], dtype='M8[ps]') diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index a2aaff25516ae..1127c3381f57d 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -153,6 +153,38 @@ def test_astype_dict(self): self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) self.assertRaises(KeyError, s.astype, {0: str}) + def test_astype_generic_timestamp(self): + # see gh-15524 + data = [1] + + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + assert_series_equal(result, expected) + + def test_astype_empty_constructor_equality(self): + # see gh-15524 + + for dtype in np.typecodes['All']: + if dtype not in ('S', 'V'): # poor support (if any) currently + init_empty = Series([], dtype=dtype) + astype_empty = Series([]).astype(dtype) + + try: + assert_series_equal(init_empty, astype_empty) + except AssertionError as e: + name = np.dtype(dtype).name + msg = "{dtype} failed: ".format(dtype=name) + str(e) + + raise AssertionError(msg) + def test_complexx(self): # GH4819 # complex access for ndarray compat diff --git a/pandas/types/cast.py b/pandas/types/cast.py index 85053dba0c18b..bc3cc79604ade 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -1,7 +1,10 @@ """ routings for casting """ from datetime import datetime, timedelta + import numpy as np +import warnings + from pandas._libs import tslib, lib from pandas._libs.tslib import iNaT from pandas.compat import string_types, text_type, PY3 @@ -620,6 +623,14 @@ def astype_nansafe(arr, dtype, copy=True): # work around NumPy brokenness, #1987 return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + if dtype.name in ("datetime64", "timedelta64"): + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) + dtype = np.dtype(dtype.name + "[ns]") + if copy: return arr.astype(dtype) return arr.view(dtype) @@ -871,8 +882,15 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if is_datetime64 or is_datetime64tz or is_timedelta64: # force the dtype if needed + msg = ("Passing in '{dtype}' dtype with no frequency is " + "deprecated and will raise in a future version. " + "Please pass in '{dtype}[ns]' instead.") + if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE): - if dtype.name == 'datetime64[ns]': + if dtype.name in ('datetime64', 'datetime64[ns]'): + if dtype.name == 'datetime64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -886,7 +904,10 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): - if dtype.name == 'timedelta64[ns]': + if dtype.name in ('timedelta64', 'timedelta64[ns]'): + if dtype.name == 'timedelta64': + warnings.warn(msg.format(dtype=dtype.name), + FutureWarning, stacklevel=2) dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to " From eb4df87c74e5b7bfa9bc0fdaa9b967ce291ce7a1 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 12 Apr 2017 22:34:02 -0400 Subject: [PATCH 2/2] TST: Use pytest idioms in series/test_dtypes.py --- pandas/tests/series/test_constructors.py | 21 +- pandas/tests/series/test_dtypes.py | 252 ++++++++++++----------- pandas/types/cast.py | 6 +- 3 files changed, 150 insertions(+), 129 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c429be2680f18..8ad07afcacfcc 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -840,19 +840,22 @@ def test_constructor_cast_object(self): exp = Series(date_range('1/1/2000', periods=10)) tm.assert_series_equal(s, exp) - def test_constructor_generic_timestamp(self): + def test_constructor_generic_timestamp_deprecated(self): # see gh-15524 - dtype = np.timedelta64 - s = Series([], dtype=dtype) - assert s.empty - assert s.dtype == 'm8[ns]' + with tm.assert_produces_warning(FutureWarning): + dtype = np.timedelta64 + s = Series([], dtype=dtype) - dtype = np.datetime64 - s = Series([], dtype=dtype) + assert s.empty + assert s.dtype == 'm8[ns]' - assert s.empty - assert s.dtype == 'M8[ns]' + with tm.assert_produces_warning(FutureWarning): + dtype = np.datetime64 + s = Series([], dtype=dtype) + + assert s.empty + assert s.dtype == 'M8[ns]' # These timestamps have the wrong frequencies, # so an Exception should be raised now. diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 1127c3381f57d..6bbf00d6cab22 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -1,9 +1,13 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 -import sys +import pytest + from datetime import datetime + +import sys import string +import warnings from numpy import nan import numpy as np @@ -12,184 +16,199 @@ from pandas.compat import lrange, range, u from pandas import compat -from pandas.util.testing import assert_series_equal import pandas.util.testing as tm from .common import TestData -class TestSeriesDtypes(TestData, tm.TestCase): +class TestSeriesDtypes(TestData): - def test_astype(self): + @pytest.mark.parametrize("dtype", ["float32", "float64", + "int64", "int32"]) + def test_astype(self, dtype): s = Series(np.random.randn(5), name='foo') + as_typed = s.astype(dtype) - for dtype in ['float32', 'float64', 'int64', 'int32']: - astyped = s.astype(dtype) - self.assertEqual(astyped.dtype, dtype) - self.assertEqual(astyped.name, s.name) + assert as_typed.dtype == dtype + assert as_typed.name == s.name def test_dtype(self): - self.assertEqual(self.ts.dtype, np.dtype('float64')) - self.assertEqual(self.ts.dtypes, np.dtype('float64')) - self.assertEqual(self.ts.ftype, 'float64:dense') - self.assertEqual(self.ts.ftypes, 'float64:dense') - assert_series_equal(self.ts.get_dtype_counts(), Series(1, ['float64'])) - assert_series_equal(self.ts.get_ftype_counts(), Series( - 1, ['float64:dense'])) - - def test_astype_cast_nan_inf_int(self): - # GH14265, check nan and inf raise error when converting to int - types = [np.int32, np.int64] - values = [np.nan, np.inf] + assert self.ts.dtype == np.dtype('float64') + assert self.ts.dtypes == np.dtype('float64') + assert self.ts.ftype == 'float64:dense' + assert self.ts.ftypes == 'float64:dense' + tm.assert_series_equal(self.ts.get_dtype_counts(), + Series(1, ['float64'])) + tm.assert_series_equal(self.ts.get_ftype_counts(), + Series(1, ['float64:dense'])) + + @pytest.mark.parametrize("value", [np.nan, np.inf]) + @pytest.mark.parametrize("dtype", [np.int32, np.int64]) + def test_astype_cast_nan_inf_int(self, dtype, value): + # gh-14265: check NaN and inf raise error when converting to int msg = 'Cannot convert non-finite values \\(NA or inf\\) to integer' + s = Series([value]) - for this_type in types: - for this_val in values: - s = Series([this_val]) - with self.assertRaisesRegexp(ValueError, msg): - s.astype(this_type) + with tm.assertRaisesRegexp(ValueError, msg): + s.astype(dtype) - def test_astype_cast_object_int(self): + @pytest.mark.parametrize("dtype", [int, np.int8, np.int64]) + def test_astype_cast_object_int_fail(self, dtype): arr = Series(["car", "house", "tree", "1"]) + with pytest.raises(ValueError): + arr.astype(dtype) - self.assertRaises(ValueError, arr.astype, int) - self.assertRaises(ValueError, arr.astype, np.int64) - self.assertRaises(ValueError, arr.astype, np.int8) - + def test_astype_cast_object_int(self): arr = Series(['1', '2', '3', '4'], dtype=object) result = arr.astype(int) - self.assert_series_equal(result, Series(np.arange(1, 5))) + + tm.assert_series_equal(result, Series(np.arange(1, 5))) def test_astype_datetimes(self): import pandas._libs.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0)]) + s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == np.object_ s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)]) + s[1] = np.nan - self.assertEqual(s.dtype, 'M8[ns]') - s = s.astype('O') - self.assertEqual(s.dtype, np.object_) + assert s.dtype == 'M8[ns]' - def test_astype_str(self): - # GH4405 - digits = string.digits - s1 = Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]) - s2 = Series([digits * 10, tm.rands(63), tm.rands(64), nan, 1.0]) - types = (compat.text_type, np.str_) - for typ in types: - for s in (s1, s2): - res = s.astype(typ) - expec = s.map(compat.text_type) - assert_series_equal(res, expec) - - # GH9757 - # Test str and unicode on python 2.x and just str on python 3.x - for tt in set([str, compat.text_type]): - ts = Series([Timestamp('2010-01-04 00:00:00')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04')]) - assert_series_equal(s, expected) - - ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) - s = ts.astype(tt) - expected = Series([tt('2010-01-04 00:00:00-05:00')]) - assert_series_equal(s, expected) - - td = Series([Timedelta(1, unit='d')]) - s = td.astype(tt) - expected = Series([tt('1 days 00:00:00.000000000')]) - assert_series_equal(s, expected) + s = s.astype('O') + assert s.dtype == np.object_ + + @pytest.mark.parametrize("dtype", [compat.text_type, np.str_]) + @pytest.mark.parametrize("series", [Series([string.digits * 10, + tm.rands(63), + tm.rands(64), + tm.rands(1000)]), + Series([string.digits * 10, + tm.rands(63), + tm.rands(64), nan, 1.0])]) + def test_astype_str_map(self, dtype, series): + # see gh-4405 + result = series.astype(dtype) + expected = series.map(compat.text_type) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", [str, compat.text_type]) + def test_astype_str_cast(self, dtype): + # see gh-9757: test str and unicode on python 2.x + # and just str on python 3.x + ts = Series([Timestamp('2010-01-04 00:00:00')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04')]) + tm.assert_series_equal(s, expected) + + ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) + s = ts.astype(dtype) + + expected = Series([dtype('2010-01-04 00:00:00-05:00')]) + tm.assert_series_equal(s, expected) + + td = Series([Timedelta(1, unit='d')]) + s = td.astype(dtype) + + expected = Series([dtype('1 days 00:00:00.000000000')]) + tm.assert_series_equal(s, expected) def test_astype_unicode(self): - - # GH7758 - # a bit of magic is required to set default encoding encoding to utf-8 + # see gh-7758: A bit of magic is required to set + # default encoding to utf-8 digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), Series([u('データーサイエンス、お前はもう死んでいる')]), - ] former_encoding = None + if not compat.PY3: - # in python we can force the default encoding for this test + # In Python, we can force the default encoding for this test former_encoding = sys.getdefaultencoding() reload(sys) # noqa + sys.setdefaultencoding("utf-8") if sys.getdefaultencoding() == "utf-8": test_series.append(Series([u('野菜食べないとやばい') .encode("utf-8")])) + for s in test_series: res = s.astype("unicode") expec = s.map(compat.text_type) - assert_series_equal(res, expec) - # restore the former encoding + tm.assert_series_equal(res, expec) + + # Restore the former encoding if former_encoding is not None and former_encoding != "utf-8": reload(sys) # noqa sys.setdefaultencoding(former_encoding) def test_astype_dict(self): - # GH7271 + # see gh-7271 s = Series(range(0, 10, 2), name='abc') result = s.astype({'abc': str}) expected = Series(['0', '2', '4', '6', '8'], name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) result = s.astype({'abc': 'float64'}) expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype='float64', name='abc') - assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) + + with pytest.raises(KeyError): + s.astype({'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {'abc': str, 'def': str}) - self.assertRaises(KeyError, s.astype, {0: str}) + with pytest.raises(KeyError): + s.astype({0: str}) - def test_astype_generic_timestamp(self): + def test_astype_generic_timestamp_deprecated(self): # see gh-15524 data = [1] - s = Series(data) - dtype = np.datetime64 - result = s.astype(dtype) - expected = Series(data, dtype=dtype) - assert_series_equal(result, expected) - - s = Series(data) - dtype = np.timedelta64 - result = s.astype(dtype) - expected = Series(data, dtype=dtype) - assert_series_equal(result, expected) - - def test_astype_empty_constructor_equality(self): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.datetime64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + s = Series(data) + dtype = np.timedelta64 + result = s.astype(dtype) + expected = Series(data, dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("dtype", np.typecodes['All']) + def test_astype_empty_constructor_equality(self, dtype): # see gh-15524 - for dtype in np.typecodes['All']: - if dtype not in ('S', 'V'): # poor support (if any) currently - init_empty = Series([], dtype=dtype) - astype_empty = Series([]).astype(dtype) - - try: - assert_series_equal(init_empty, astype_empty) - except AssertionError as e: - name = np.dtype(dtype).name - msg = "{dtype} failed: ".format(dtype=name) + str(e) + if dtype not in ('S', 'V'): # poor support (if any) currently + with warnings.catch_warnings(record=True): + # Generic timestamp dtypes ('M' and 'm') are deprecated, + # but we test that already in series/test_constructors.py - raise AssertionError(msg) + init_empty = Series([], dtype=dtype) + as_type_empty = Series([]).astype(dtype) + tm.assert_series_equal(init_empty, as_type_empty) - def test_complexx(self): - # GH4819 - # complex access for ndarray compat + def test_complex(self): + # see gh-4819: complex access for ndarray compat a = np.arange(5, dtype=np.float64) b = Series(a + 4j * a) + tm.assert_numpy_array_equal(a, b.real) tm.assert_numpy_array_equal(4 * a, b.imag) @@ -198,23 +217,22 @@ def test_complexx(self): tm.assert_numpy_array_equal(4 * a, b.imag) def test_arg_for_errors_in_astype(self): - # issue #14878 - - sr = Series([1, 2, 3]) + # see gh-14878 + s = Series([1, 2, 3]) - with self.assertRaises(ValueError): - sr.astype(np.float64, errors=False) + with pytest.raises(ValueError): + s.astype(np.float64, errors=False) with tm.assert_produces_warning(FutureWarning): - sr.astype(np.int8, raise_on_error=True) + s.astype(np.int8, raise_on_error=True) - sr.astype(np.int8, errors='raise') + s.astype(np.int8, errors='raise') def test_intercept_astype_object(self): series = Series(date_range('1/1/2000', periods=10)) - # this test no longer makes sense as series is by default already - # M8[ns] + # This test no longer makes sense, as + # Series is by default already M8[ns]. expected = series.astype('object') df = DataFrame({'a': series, @@ -224,9 +242,9 @@ def test_intercept_astype_object(self): tm.assert_series_equal(df.dtypes, exp_dtypes) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() df = DataFrame({'a': series, 'b': ['foo'] * len(series)}) result = df.values.squeeze() - self.assertTrue((result[:, 0] == expected.values).all()) + assert (result[:, 0] == expected.values).all() diff --git a/pandas/types/cast.py b/pandas/types/cast.py index bc3cc79604ade..3954fb5c93da8 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -628,7 +628,7 @@ def astype_nansafe(arr, dtype, copy=True): "deprecated and will raise in a future version. " "Please pass in '{dtype}[ns]' instead.") warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=2) + FutureWarning, stacklevel=5) dtype = np.dtype(dtype.name + "[ns]") if copy: @@ -890,7 +890,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if dtype.name in ('datetime64', 'datetime64[ns]'): if dtype.name == 'datetime64': warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=2) + FutureWarning, stacklevel=5) dtype = _NS_DTYPE else: raise TypeError("cannot convert datetimelike to " @@ -907,7 +907,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): if dtype.name in ('timedelta64', 'timedelta64[ns]'): if dtype.name == 'timedelta64': warnings.warn(msg.format(dtype=dtype.name), - FutureWarning, stacklevel=2) + FutureWarning, stacklevel=5) dtype = _TD_DTYPE else: raise TypeError("cannot convert timedeltalike to "