diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 7c269e851e77f..c9feda6eeb8cf 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -144,6 +144,9 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +- Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, + :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` + and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`) - Remove argument ``squeeze`` from :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`32380`) - Removed ``keep_tz`` argument in :meth:`DatetimeIndex.to_series` (:issue:`29731`) - Remove arguments ``names`` and ``dtype`` from :meth:`Index.copy` and ``levels`` and ``codes`` from :meth:`MultiIndex.copy` (:issue:`35853`, :issue:`36685`) diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index ce49136e6b379..db1388672b37c 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -27,14 +27,6 @@ def try_parse_dates( dayfirst: bool = ..., default: datetime | None = ..., ) -> npt.NDArray[np.object_]: ... -def try_parse_date_and_time( - dates: npt.NDArray[np.object_], # object[:] - times: npt.NDArray[np.object_], # object[:] - date_parser=..., - time_parser=..., - dayfirst: bool = ..., - default: datetime | None = ..., -) -> npt.NDArray[np.object_]: ... def try_parse_year_month_day( years: npt.NDArray[np.object_], # object[:] months: npt.NDArray[np.object_], # object[:] diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 5c93edfee79f2..469e0721f1207 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -741,49 +741,6 @@ def try_parse_dates( return result.base # .base to access underlying ndarray -def try_parse_date_and_time( - object[:] dates, - object[:] times, - date_parser=None, - time_parser=None, - bint dayfirst=False, - default=None, -) -> np.ndarray: - cdef: - Py_ssize_t i, n - object[::1] result - - n = len(dates) - # TODO(cython3): Use len instead of `shape[0]` - if times.shape[0] != n: - raise ValueError('Length of dates and times must be equal') - result = np.empty(n, dtype='O') - - if date_parser is None: - if default is None: # GH2618 - date = datetime.now() - default = datetime(date.year, date.month, 1) - - parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default) - - else: - parse_date = date_parser - - if time_parser is None: - parse_time = lambda x: du_parse(x) - - else: - parse_time = time_parser - - for i in range(n): - d = parse_date(str(dates[i])) - t = parse_time(str(times[i])) - result[i] = datetime(d.year, d.month, d.day, - t.hour, t.minute, t.second) - - return result.base # .base to access underlying ndarray - - def try_parse_year_month_day( object[:] years, object[:] months, object[:] days ) -> np.ndarray: diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py deleted file mode 100644 index 34bba213593be..0000000000000 --- a/pandas/io/date_converters.py +++ /dev/null @@ -1,129 +0,0 @@ -"""This module is designed for community supported date conversion functions""" -from __future__ import annotations - -import warnings - -import numpy as np - -from pandas._libs.tslibs import parsing -from pandas._typing import npt -from pandas.util._exceptions import find_stack_level - - -def parse_date_time(date_col, time_col) -> npt.NDArray[np.object_]: - """ - Parse columns with dates and times into a single datetime column. - - .. deprecated:: 1.2 - """ - warnings.warn( - """ - Use pd.to_datetime(date_col + " " + time_col) instead to get a Pandas Series. - Use pd.to_datetime(date_col + " " + time_col).to_pydatetime() instead to get a Numpy array. -""", # noqa: E501 - FutureWarning, - stacklevel=find_stack_level(), - ) - date_col = _maybe_cast(date_col) - time_col = _maybe_cast(time_col) - return parsing.try_parse_date_and_time(date_col, time_col) - - -def parse_date_fields(year_col, month_col, day_col) -> npt.NDArray[np.object_]: - """ - Parse columns with years, months and days into a single date column. - - .. deprecated:: 1.2 - """ - warnings.warn( - """ - Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) instead to get a Pandas Series. - Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col}) and - np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. -""", # noqa: E501 - FutureWarning, - stacklevel=find_stack_level(), - ) - - year_col = _maybe_cast(year_col) - month_col = _maybe_cast(month_col) - day_col = _maybe_cast(day_col) - return parsing.try_parse_year_month_day(year_col, month_col, day_col) - - -def parse_all_fields( - year_col, month_col, day_col, hour_col, minute_col, second_col -) -> npt.NDArray[np.object_]: - """ - Parse columns with datetime information into a single datetime column. - - .. deprecated:: 1.2 - """ - - warnings.warn( - """ - Use pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, - "hour": hour_col, "minute": minute_col, second": second_col}) instead to get a Pandas Series. - Use ser = pd.to_datetime({"year": year_col, "month": month_col, "day": day_col, - "hour": hour_col, "minute": minute_col, second": second_col}) and - np.array([s.to_pydatetime() for s in ser]) instead to get a Numpy array. -""", # noqa: E501 - FutureWarning, - stacklevel=find_stack_level(), - ) - - year_col = _maybe_cast(year_col) - month_col = _maybe_cast(month_col) - day_col = _maybe_cast(day_col) - hour_col = _maybe_cast(hour_col) - minute_col = _maybe_cast(minute_col) - second_col = _maybe_cast(second_col) - return parsing.try_parse_datetime_components( - year_col, month_col, day_col, hour_col, minute_col, second_col - ) - - -def generic_parser(parse_func, *cols) -> np.ndarray: - """ - Use dateparser to parse columns with data information into a single datetime column. - - .. deprecated:: 1.2 - """ - - warnings.warn( - "Use pd.to_datetime instead.", - FutureWarning, - stacklevel=find_stack_level(), - ) - - N = _check_columns(cols) - results = np.empty(N, dtype=object) - - for i in range(N): - args = [c[i] for c in cols] - results[i] = parse_func(*args) - - return results - - -def _maybe_cast(arr: np.ndarray) -> np.ndarray: - if not arr.dtype.type == np.object_: - arr = np.array(arr, dtype=object) - return arr - - -def _check_columns(cols) -> int: - if not len(cols): - raise AssertionError("There must be at least 1 column") - - head, tail = cols[0], cols[1:] - - N = len(head) - - for i, n in enumerate(map(len, tail)): - if n != N: - raise AssertionError( - f"All columns must have the same length: {N}; column {i} has length {n}" - ) - - return N diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 45f6469a31f4f..41016f8f40b9f 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -78,8 +78,6 @@ from pandas.core.series import Series from pandas.core.tools import datetimes as tools -from pandas.io.date_converters import generic_parser - if TYPE_CHECKING: from pandas import DataFrame @@ -1135,17 +1133,14 @@ def converter(*date_cols): raise Exception("scalar parser") return result except Exception: - try: - return tools.to_datetime( - parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - dayfirst=dayfirst, - ), - errors="ignore", - ) - except Exception: - return generic_parser(date_parser, *date_cols) + return tools.to_datetime( + parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + dayfirst=dayfirst, + ), + errors="ignore", + ) return converter diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 9c8809b6099f9..40b08e6d68016 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -36,7 +36,6 @@ from pandas._testing._hypothesis import DATETIME_NO_TZ from pandas.core.indexes.datetimes import date_range -import pandas.io.date_converters as conv from pandas.io.parsers import read_csv xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") @@ -476,11 +475,7 @@ def test_date_col_as_index_col(all_parsers): @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), -) -def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): +def test_multiple_date_cols_int_cast(all_parsers): data = ( "KORD,19990127, 19:00:00, 18:56:00, 0.8100\n" "KORD,19990127, 20:00:00, 19:56:00, 0.0100\n" @@ -496,7 +491,7 @@ def test_multiple_date_cols_int_cast(all_parsers, date_parser, warning): "header": None, "prefix": "X", "parse_dates": parse_dates, - "date_parser": date_parser, + "date_parser": pd.to_datetime, } result = parser.read_csv_check_warnings( FutureWarning, @@ -1303,11 +1298,7 @@ def test_parse_dates_no_convert_thousands(all_parsers, data, kwargs, expected): @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), -) -def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warning): +def test_parse_date_time_multi_level_column_name(all_parsers): data = """\ D,T,A,B date, time,a,b @@ -1315,13 +1306,12 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni 2001-01-06, 00:00:00, 1.0, 11. """ parser = all_parsers - with tm.assert_produces_warning(warning, check_stacklevel=False): - result = parser.read_csv( - StringIO(data), - header=[0, 1], - parse_dates={"date_time": [0, 1]}, - date_parser=date_parser, - ) + result = parser.read_csv( + StringIO(data), + header=[0, 1], + parse_dates={"date_time": [0, 1]}, + date_parser=pd.to_datetime, + ) expected_data = [ [datetime(2001, 1, 5, 9, 0, 0), 0.0, 10.0], @@ -1332,10 +1322,6 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ([conv.parse_date_time, FutureWarning], [pd.to_datetime, None]), -) @pytest.mark.parametrize( "data,kwargs,expected", [ @@ -1408,10 +1394,9 @@ def test_parse_date_time_multi_level_column_name(all_parsers, date_parser, warni ), ], ) -def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warning): +def test_parse_date_time(all_parsers, data, kwargs, expected): parser = all_parsers - with tm.assert_produces_warning(warning, check_stacklevel=False): - result = parser.read_csv(StringIO(data), date_parser=date_parser, **kwargs) + result = parser.read_csv(StringIO(data), date_parser=pd.to_datetime, **kwargs) # Python can sometimes be flaky about how # the aggregated columns are entered, so @@ -1421,20 +1406,15 @@ def test_parse_date_time(all_parsers, data, kwargs, expected, date_parser, warni @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ([conv.parse_date_fields, FutureWarning], [pd.to_datetime, None]), -) -def test_parse_date_fields(all_parsers, date_parser, warning): +def test_parse_date_fields(all_parsers): parser = all_parsers data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." - with tm.assert_produces_warning(warning, check_stacklevel=False): - result = parser.read_csv( - StringIO(data), - header=0, - parse_dates={"ymd": [0, 1, 2]}, - date_parser=date_parser, - ) + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ymd": [0, 1, 2]}, + date_parser=pd.to_datetime, + ) expected = DataFrame( [[datetime(2001, 1, 10), 10.0], [datetime(2001, 2, 1), 11.0]], @@ -1444,27 +1424,19 @@ def test_parse_date_fields(all_parsers, date_parser, warning): @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ( - [conv.parse_all_fields, FutureWarning], - [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), None], - ), -) -def test_parse_date_all_fields(all_parsers, date_parser, warning): +def test_parse_date_all_fields(all_parsers): parser = all_parsers data = """\ year,month,day,hour,minute,second,a,b 2001,01,05,10,00,0,0.0,10. 2001,01,5,10,0,00,1.,11. """ - with tm.assert_produces_warning(warning, check_stacklevel=False): - result = parser.read_csv( - StringIO(data), - header=0, - date_parser=date_parser, - parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, - ) + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S"), + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) expected = DataFrame( [ [datetime(2001, 1, 5, 10, 0, 0), 0.0, 10.0], @@ -1476,27 +1448,19 @@ def test_parse_date_all_fields(all_parsers, date_parser, warning): @xfail_pyarrow -@pytest.mark.parametrize( - "date_parser, warning", - ( - [conv.parse_all_fields, FutureWarning], - [lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), None], - ), -) -def test_datetime_fractional_seconds(all_parsers, date_parser, warning): +def test_datetime_fractional_seconds(all_parsers): parser = all_parsers data = """\ year,month,day,hour,minute,second,a,b 2001,01,05,10,00,0.123456,0.0,10. 2001,01,5,10,0,0.500000,1.,11. """ - with tm.assert_produces_warning(warning, check_stacklevel=False): - result = parser.read_csv( - StringIO(data), - header=0, - date_parser=date_parser, - parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, - ) + result = parser.read_csv( + StringIO(data), + header=0, + date_parser=lambda x: pd.to_datetime(x, format="%Y %m %d %H %M %S.%f"), + parse_dates={"ymdHMS": [0, 1, 2, 3, 4, 5]}, + ) expected = DataFrame( [ [datetime(2001, 1, 5, 10, 0, 0, microsecond=123456), 0.0, 10.0], @@ -1512,17 +1476,20 @@ def test_generic(all_parsers): parser = all_parsers data = "year,month,day,a\n2001,01,10,10.\n2001,02,1,11." - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = parser.read_csv( - StringIO(data), - header=0, - parse_dates={"ym": [0, 1]}, - date_parser=lambda y, m: date(year=int(y), month=int(m), day=1), - ) + def parse_function(yy, mm): + return [date(year=int(y), month=int(m), day=1) for y, m in zip(yy, mm)] + + result = parser.read_csv( + StringIO(data), + header=0, + parse_dates={"ym": [0, 1]}, + date_parser=parse_function, + ) expected = DataFrame( [[date(2001, 1, 1), 10, 10.0], [date(2001, 2, 1), 1, 11.0]], columns=["ym", "day", "a"], ) + expected["ym"] = expected["ym"].astype("datetime64[ns]") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_date_converters.py b/pandas/tests/io/test_date_converters.py deleted file mode 100644 index a9fa27e091714..0000000000000 --- a/pandas/tests/io/test_date_converters.py +++ /dev/null @@ -1,43 +0,0 @@ -from datetime import datetime - -import numpy as np - -import pandas._testing as tm - -import pandas.io.date_converters as conv - - -def test_parse_date_time(): - - dates = np.array(["2007/1/3", "2008/2/4"], dtype=object) - times = np.array(["05:07:09", "06:08:00"], dtype=object) - expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) - with tm.assert_produces_warning(FutureWarning): - result = conv.parse_date_time(dates, times) - tm.assert_numpy_array_equal(result, expected) - - -def test_parse_date_fields(): - days = np.array([3, 4]) - months = np.array([1, 2]) - years = np.array([2007, 2008]) - expected = np.array([datetime(2007, 1, 3), datetime(2008, 2, 4)]) - - with tm.assert_produces_warning(FutureWarning): - result = conv.parse_date_fields(years, months, days) - tm.assert_numpy_array_equal(result, expected) - - -def test_parse_all_fields(): - hours = np.array([5, 6]) - minutes = np.array([7, 8]) - seconds = np.array([9, 0]) - - days = np.array([3, 4]) - years = np.array([2007, 2008]) - months = np.array([1, 2]) - expected = np.array([datetime(2007, 1, 3, 5, 7, 9), datetime(2008, 2, 4, 6, 8, 0)]) - - with tm.assert_produces_warning(FutureWarning): - result = conv.parse_all_fields(years, months, days, hours, minutes, seconds) - tm.assert_numpy_array_equal(result, expected)