From 7650f73b04df1e5b6c5947cd647f6095eb190fe6 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Sun, 28 Apr 2024 23:55:22 +0200 Subject: [PATCH 01/15] CLN: enforce the deprecation of exposing blocks in core.internals --- pandas/core/internals/__init__.py | 41 ------------------------------ pandas/tests/internals/test_api.py | 27 -------------------- 2 files changed, 68 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 89c8a4a27ca31..dcccb20d0c9d2 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -18,46 +18,5 @@ def __getattr__(name: str): # GH#55139 - import warnings - - if name == "create_block_manager_from_blocks": - # GH#33892 - warnings.warn( - f"{name} is deprecated and will be removed in a future version. " - "Use public APIs instead.", - DeprecationWarning, - # https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758 - # on hard-coding stacklevel - stacklevel=2, - ) - from pandas.core.internals.managers import create_block_manager_from_blocks - - return create_block_manager_from_blocks - - if name in [ - "Block", - "ExtensionBlock", - "DatetimeTZBlock", - ]: - warnings.warn( - f"{name} is deprecated and will be removed in a future version. " - "Use public APIs instead.", - DeprecationWarning, - # https://github.com/pandas-dev/pandas/pull/55139#pullrequestreview-1720690758 - # on hard-coding stacklevel - stacklevel=2, - ) - if name == "DatetimeTZBlock": - from pandas.core.internals.blocks import DatetimeTZBlock - - return DatetimeTZBlock - elif name == "ExtensionBlock": - from pandas.core.internals.blocks import ExtensionBlock - - return ExtensionBlock - else: - from pandas.core.internals.blocks import Block - - return Block raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 7ab8988521fdf..c189d5248b1f3 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -41,21 +41,6 @@ def test_namespace(): assert set(result) == set(expected + modules) -@pytest.mark.parametrize( - "name", - [ - "Block", - "ExtensionBlock", - "DatetimeTZBlock", - ], -) -def test_deprecations(name): - # GH#55139 - msg = f"{name} is deprecated.* Use public APIs instead" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - getattr(internals, name) - - def test_make_block_2d_with_dti(): # GH#41168 dti = pd.date_range("2012", periods=3, tz="UTC") @@ -65,18 +50,6 @@ def test_make_block_2d_with_dti(): assert blk.values.shape == (1, 3) -def test_create_block_manager_from_blocks_deprecated(): - # GH#33892 - # If they must, downstream packages should get this from internals.api, - # not internals. - msg = ( - "create_block_manager_from_blocks is deprecated and will be " - "removed in a future version. Use public APIs instead" - ) - with tm.assert_produces_warning(DeprecationWarning, match=msg): - internals.create_block_manager_from_blocks - - def test_create_dataframe_from_blocks(float_frame): block = float_frame._mgr.blocks[0] index = float_frame.index.copy() From 608077b852fc9004f36e5a848e1b9eb984787902 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Mon, 29 Apr 2024 01:28:45 +0200 Subject: [PATCH 02/15] remove the function __getattr__, and entries from __all__ --- pandas/core/internals/__init__.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index dcccb20d0c9d2..45758379e0bd6 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -6,17 +6,8 @@ ) __all__ = [ - "Block", - "DatetimeTZBlock", - "ExtensionBlock", "make_block", "BlockManager", "SingleBlockManager", "concatenate_managers", ] - - -def __getattr__(name: str): - # GH#55139 - - raise AttributeError(f"module 'pandas.core.internals' has no attribute '{name}'") From 99bc1d06ff379f89f414993a794ba0adacc4468a Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 30 Apr 2024 00:57:17 +0200 Subject: [PATCH 03/15] fix mypy error --- pandas/io/pytables.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d585c59dd5581..8a006639ed28e 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -124,7 +124,8 @@ npt, ) - from pandas.core.internals import Block + from pandas.core.internals.blocks import Block + # versioning attribute _version = "0.15.2" From 2791b0f24762d02dbfb01923335a1859e0caab8a Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 30 Apr 2024 01:08:19 +0200 Subject: [PATCH 04/15] add a note to v3.0.0 --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index c77348b365370..2c85245c4f4c8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -250,6 +250,7 @@ Removal of prior version deprecations/changes - Enforced deprecation of :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`57820`) - Enforced deprecation of :meth:`offsets.Tick.delta`, use ``pd.Timedelta(obj)`` instead (:issue:`55498`) - Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) +- Enforced deprecation of ``core.internals`` members ``Block``, ``ExtensionBlock``, and ``DatetimeTZBlock`` (:issue:`58467`) - Enforced deprecation of non-standard (``np.ndarray``, :class:`ExtensionArray`, :class:`Index`, or :class:`Series`) argument to :func:`api.extensions.take` (:issue:`52981`) - Enforced deprecation of parsing system timezone strings to ``tzlocal``, which depended on system timezone, pass the 'tz' keyword instead (:issue:`50791`) - Enforced deprecation of passing a dictionary to :meth:`SeriesGroupBy.agg` (:issue:`52268`) From 31b2719c92d6ad7218c35a59d7f5e9065ea51409 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Tue, 30 Apr 2024 13:50:01 +0200 Subject: [PATCH 05/15] skip parquet tests for minimum Pyarrow version --- pandas/tests/io/test_parquet.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 55be48eb572fd..b8fd6f1c5e9e0 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -649,6 +649,12 @@ def test_dtype_backend(self, engine, request): ], ) def test_read_empty_array(self, pa, dtype): + # GH#58467 + from pandas.compat._optional import VERSIONS + + pa_min_ver = VERSIONS.get("pyarrow") + if Version(pyarrow.__version__) == Version(pa_min_ver): + pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") # GH #41241 df = pd.DataFrame( { @@ -938,6 +944,11 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): + from pandas.compat._optional import VERSIONS + + pa_min_ver = VERSIONS.get("pyarrow") + if Version(pyarrow.__version__) == Version(pa_min_ver): + pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( pytest.mark.xfail( @@ -1107,6 +1118,11 @@ def test_infer_string_large_string_type(self, tmp_path, pa): class TestParquetFastParquet(Base): def test_basic(self, fp, df_full): + from pandas.compat._optional import VERSIONS + + pa_min_ver = VERSIONS.get("pyarrow") + if Version(pyarrow.__version__) == Version(pa_min_ver): + pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") df = df_full dti = pd.date_range("20130101", periods=3, tz="US/Eastern") From cd82e3c6a9f40a46a7d1a538d52fa9bd8cd3658f Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 08:39:44 +0200 Subject: [PATCH 06/15] fixup test_basic --- pandas/tests/io/test_parquet.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b8fd6f1c5e9e0..565b39eb41dcb 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -677,6 +677,12 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full + + from pandas.compat._optional import VERSIONS + + pa_min_ver = VERSIONS.get("pyarrow") + if Version(pyarrow.__version__) == Version(pa_min_ver): + pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") @@ -1118,11 +1124,6 @@ def test_infer_string_large_string_type(self, tmp_path, pa): class TestParquetFastParquet(Base): def test_basic(self, fp, df_full): - from pandas.compat._optional import VERSIONS - - pa_min_ver = VERSIONS.get("pyarrow") - if Version(pyarrow.__version__) == Version(pa_min_ver): - pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") df = df_full dti = pd.date_range("20130101", periods=3, tz="US/Eastern") From 3efbde9a718ab2063b53aa994f022c80689590b6 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 08:43:54 +0200 Subject: [PATCH 07/15] fix pre-commit error --- pandas/tests/io/test_parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 565b39eb41dcb..625bda32e0bb8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -676,10 +676,10 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): - df = df_full - from pandas.compat._optional import VERSIONS + df = df_full + pa_min_ver = VERSIONS.get("pyarrow") if Version(pyarrow.__version__) == Version(pa_min_ver): pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") From 756c736121611a2c9b0c4b9090befa0082bdb17b Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 11:42:42 +0200 Subject: [PATCH 08/15] skip pyarrow=10.0.1 in test_parquet.py --- pandas/tests/io/test_parquet.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 625bda32e0bb8..49368cb249699 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -50,6 +50,7 @@ ), ] +pa = pytest.importorskip("pyarrow", minversion="11.0.0") # setup engines & skips @pytest.fixture( @@ -649,12 +650,6 @@ def test_dtype_backend(self, engine, request): ], ) def test_read_empty_array(self, pa, dtype): - # GH#58467 - from pandas.compat._optional import VERSIONS - - pa_min_ver = VERSIONS.get("pyarrow") - if Version(pyarrow.__version__) == Version(pa_min_ver): - pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") # GH #41241 df = pd.DataFrame( { @@ -676,14 +671,8 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): - from pandas.compat._optional import VERSIONS - df = df_full - pa_min_ver = VERSIONS.get("pyarrow") - if Version(pyarrow.__version__) == Version(pa_min_ver): - pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") - # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") dti = dti._with_freq(None) # freq doesn't round-trip @@ -950,11 +939,6 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): - from pandas.compat._optional import VERSIONS - - pa_min_ver = VERSIONS.get("pyarrow") - if Version(pyarrow.__version__) == Version(pa_min_ver): - pytest.skip("pandas.core.internals' has no attribute 'DatetimeTZBlock") if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( pytest.mark.xfail( From c776b245b99e1c289ddfa82c0308148d32ad4bae Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 14:23:51 +0200 Subject: [PATCH 09/15] fix mypy error --- pandas/tests/io/test_parquet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 49368cb249699..61b25c774db36 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -50,8 +50,6 @@ ), ] -pa = pytest.importorskip("pyarrow", minversion="11.0.0") - # setup engines & skips @pytest.fixture( params=[ @@ -78,6 +76,9 @@ def engine(request): def pa(): if not _HAVE_PYARROW: pytest.skip("pyarrow is not installed") + + if Version(pyarrow.__version__) == "10.0.1": + pytest.skip("skip the minimum '10.0.1' pyarrow version") return "pyarrow" From eb318e7b60f966c0d933a43cf982f0f7afe6fc36 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 14:27:31 +0200 Subject: [PATCH 10/15] fix pre-commit error --- pandas/tests/io/test_parquet.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 61b25c774db36..e021fcbea91ea 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -50,6 +50,7 @@ ), ] + # setup engines & skips @pytest.fixture( params=[ From ec99c1f32dbe6f0b66ab66da3409c49ffeb1cba9 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 16:29:00 +0200 Subject: [PATCH 11/15] fixup test_parquet.py --- pandas/tests/io/test_parquet.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index e021fcbea91ea..5ce498c8fa6cd 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -77,9 +77,6 @@ def engine(request): def pa(): if not _HAVE_PYARROW: pytest.skip("pyarrow is not installed") - - if Version(pyarrow.__version__) == "10.0.1": - pytest.skip("skip the minimum '10.0.1' pyarrow version") return "pyarrow" @@ -658,6 +655,8 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) + if Version(pyarrow.__version__) == "10.0.1": + pytest.skip("skip the pyarrow version '10.0.1'") # GH 45694 expected = None if dtype == "float": @@ -674,6 +673,9 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full + + if Version(pyarrow.__version__) == "10.0.1": + pytest.skip("skip the pyarrow version '10.0.1'") # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") @@ -941,6 +943,8 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): + if Version(pyarrow.__version__) == "10.0.1": + pytest.skip("skip the pyarrow version '10.0.1'") if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( pytest.mark.xfail( From 0caa47ae95e05f1c106d97fc56404dde67aed340 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 16:34:12 +0200 Subject: [PATCH 12/15] fix pre-commit error --- pandas/tests/io/test_parquet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 5ce498c8fa6cd..76e3fbecebf9a 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -673,7 +673,7 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full - + if Version(pyarrow.__version__) == "10.0.1": pytest.skip("skip the pyarrow version '10.0.1'") From 07b1edfe03eeeaff78f8cfa1a3ead94d0c50b8a3 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 18:25:14 +0200 Subject: [PATCH 13/15] fixup test_parquet.py --- pandas/tests/io/test_parquet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 76e3fbecebf9a..5b5d7a83c7504 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -655,7 +655,7 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) - if Version(pyarrow.__version__) == "10.0.1": + if pyarrow.__version__ == "10.0.1": pytest.skip("skip the pyarrow version '10.0.1'") # GH 45694 expected = None @@ -674,7 +674,7 @@ class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full - if Version(pyarrow.__version__) == "10.0.1": + if pyarrow.__version__ == "10.0.1": pytest.skip("skip the pyarrow version '10.0.1'") # additional supported types for pyarrow @@ -943,7 +943,7 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): - if Version(pyarrow.__version__) == "10.0.1": + if pyarrow.__version__ == "10.0.1": pytest.skip("skip the pyarrow version '10.0.1'") if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( From e7dd400a62143c4917a7d1323c1c1de2808cf120 Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 18:32:25 +0200 Subject: [PATCH 14/15] replacee pytest.skip with pytest.importorskip --- pandas/tests/io/test_parquet.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 5b5d7a83c7504..c59d1e0b6f963 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -655,8 +655,7 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) - if pyarrow.__version__ == "10.0.1": - pytest.skip("skip the pyarrow version '10.0.1'") + pytest.importorskip("pyarrow", "10.0.1") # GH 45694 expected = None if dtype == "float": @@ -673,9 +672,7 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full - - if pyarrow.__version__ == "10.0.1": - pytest.skip("skip the pyarrow version '10.0.1'") + pytest.importorskip("pyarrow", "10.0.1") # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") @@ -943,8 +940,8 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): - if pyarrow.__version__ == "10.0.1": - pytest.skip("skip the pyarrow version '10.0.1'") + pytest.importorskip("pyarrow", "10.0.1") + if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker( pytest.mark.xfail( From 1ac29562b75b55a6cc036ad032c9791867bea82e Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva Date: Thu, 2 May 2024 19:05:01 +0200 Subject: [PATCH 15/15] skip pyarrow '10.0.1' --- pandas/tests/io/test_parquet.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index c59d1e0b6f963..2860b3a6483af 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -655,7 +655,7 @@ def test_read_empty_array(self, pa, dtype): "value": pd.array([], dtype=dtype), } ) - pytest.importorskip("pyarrow", "10.0.1") + pytest.importorskip("pyarrow", "11.0.0") # GH 45694 expected = None if dtype == "float": @@ -672,7 +672,7 @@ def test_read_empty_array(self, pa, dtype): class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): df = df_full - pytest.importorskip("pyarrow", "10.0.1") + pytest.importorskip("pyarrow", "11.0.0") # additional supported types for pyarrow dti = pd.date_range("20130101", periods=3, tz="Europe/Brussels") @@ -940,7 +940,7 @@ def test_timestamp_nanoseconds(self, pa): check_round_trip(df, pa, write_kwargs={"version": ver}) def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): - pytest.importorskip("pyarrow", "10.0.1") + pytest.importorskip("pyarrow", "11.0.0") if timezone_aware_date_list.tzinfo != datetime.timezone.utc: request.applymarker(