diff --git a/tests/column/schema_test.py b/tests/column/schema_test.py index 79d2b29..2284659 100644 --- a/tests/column/schema_test.py +++ b/tests/column/schema_test.py @@ -8,11 +8,11 @@ from tests.utils import pandas_version -@pytest.mark.skipif( - Version("2.0.0") > pandas_version(), - reason="no pyarrow support", -) def test_schema(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version( + "2.0.0", + ): + pytest.skip(reason="no pyarrow support") df = mixed_dataframe_1(library) namespace = df.__dataframe_namespace__() result = df.col("a").dtype diff --git a/tests/conftest.py b/tests/conftest.py index f791b57..f55d688 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -63,6 +63,9 @@ def pytest_generate_tests(metafunc: Any) -> None: ci_skip_ids = [ # polars does not allow to create a dataframe with non-unique columns "non_unique_column_names_test.py::test_repeated_columns[polars-lazy]", + # TODO: enable after modin adds implementation for standard + "scale_column_test.py::test_scale_column[modin]", + "scale_column_test.py::test_scale_column_polars_from_persisted_df[modin]", ] diff --git a/tests/dataframe/schema_test.py b/tests/dataframe/schema_test.py index 8c84b5d..f1980e7 100644 --- a/tests/dataframe/schema_test.py +++ b/tests/dataframe/schema_test.py @@ -1,20 +1,18 @@ from __future__ import annotations -import pandas as pd import pytest from packaging.version import Version -from packaging.version import parse from tests.utils import BaseHandler from tests.utils import mixed_dataframe_1 from tests.utils import pandas_version -@pytest.mark.skipif( - Version("2.0.0") > pandas_version(), - reason="no pyarrow support", -) def test_schema(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version( + "2.0.0", + ): + pytest.skip(reason="no pyarrow support") df = mixed_dataframe_1(library) namespace = df.__dataframe_namespace__() result = df.schema @@ -53,7 +51,7 @@ def test_schema(library: BaseHandler) -> None: assert isinstance(result["n"], namespace.Datetime) if not ( library.name in ("pandas-numpy", "pandas-nullable") - and parse(pd.__version__) < Version("2.0.0") + and pandas_version() < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["n"].time_unit == "ms" @@ -63,7 +61,7 @@ def test_schema(library: BaseHandler) -> None: assert isinstance(result["o"], namespace.Datetime) if not ( library.name in ("pandas-numpy", "pandas-nullable") - and parse(pd.__version__) < Version("2.0.0") + and pandas_version() < Version("2.0.0") ): # pragma: no cover (coverage bug?) # pandas non-nanosecond support only came in 2.0 assert result["o"].time_unit == "us" @@ -72,7 +70,7 @@ def test_schema(library: BaseHandler) -> None: assert result["o"].time_zone is None if not ( library.name in ("pandas-numpy", "pandas-nullable") - and parse(pd.__version__) < Version("2.0.0") + and pandas_version() < Version("2.0.0") ): # pandas non-nanosecond support only came in 2.0 - before that, these would be 'float' assert isinstance(result["p"], namespace.Duration) diff --git a/tests/groupby/groupby_any_all_test.py b/tests/groupby/groupby_any_all_test.py index 5de8f3f..12600e6 100644 --- a/tests/groupby/groupby_any_all_test.py +++ b/tests/groupby/groupby_any_all_test.py @@ -4,7 +4,6 @@ import pytest from packaging.version import Version from packaging.version import parse -from polars.exceptions import SchemaError from tests.utils import BaseHandler from tests.utils import bool_dataframe_2 @@ -42,7 +41,13 @@ def test_groupby_boolean( def test_group_by_invalid_any_all(library: BaseHandler) -> None: df = integer_dataframe_4(library).persist() - with pytest.raises((TypeError, SchemaError)): + + exceptions = (TypeError,) + if library.name == "polars-lazy": + from polars.exceptions import SchemaError + + exceptions = (TypeError, SchemaError) + with pytest.raises(exceptions): df.group_by("key").any() - with pytest.raises((TypeError, SchemaError)): + with pytest.raises(exceptions): df.group_by("key").all() diff --git a/tests/integration/scale_column_test.py b/tests/integration/scale_column_test.py index 4f07c8f..22f72d9 100644 --- a/tests/integration/scale_column_test.py +++ b/tests/integration/scale_column_test.py @@ -1,44 +1,51 @@ from __future__ import annotations -import pandas as pd -import polars as pl import pytest from packaging.version import Version -from packaging.version import parse -from polars.testing import assert_series_equal +from tests.utils import BaseHandler +from tests.utils import compare_column_with_reference +from tests.utils import pandas_version +from tests.utils import polars_version -@pytest.mark.skipif( - parse(pd.__version__) < Version("2.1.0"), - reason="pandas doesn't support 3.8", -) -def test_scale_column_pandas() -> None: - s = pd.Series([1, 2, 3], name="a") - ser = s.__column_consortium_standard__() - ser = ser - ser.mean() - result = ser.column - pd.testing.assert_series_equal(result, pd.Series([-1, 0, 1.0], name="a")) +def test_scale_column(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable"): + if pandas_version() < Version("2.1.0"): + pytest.skip(reason="pandas doesn't support 3.8") + import pandas as pd + + s = pd.Series([1, 2, 3], name="a") + ser = s.__column_consortium_standard__() + elif library.name == "polars-lazy": + if polars_version() < Version("0.19.0"): + pytest.skip(reason="before consortium standard in polars") + import polars as pl + + s = pl.Series("a", [1, 2, 3]) + ser = s.__column_consortium_standard__() -@pytest.mark.skipif( - parse(pl.__version__) < Version("0.19.0"), - reason="before consortium standard in polars", -) -def test_scale_column_polars() -> None: - s = pl.Series("a", [1, 2, 3]) - ser = s.__column_consortium_standard__() + ns = ser.__column_namespace__() ser = ser - ser.mean() - result = pl.select(ser.column)["a"] - assert_series_equal(result, pl.Series("a", [-1, 0, 1.0])) + compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64) + + +def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable"): + if pandas_version() < Version("2.1.0"): + pytest.skip(reason="pandas doesn't support 3.8") + import pandas as pd + + df = pd.DataFrame({"a": [1, 2, 3]}) + ser = df.__dataframe_consortium_standard__().col("a") + elif library.name == "polars-lazy": + if polars_version() < Version("0.19.0"): + pytest.skip(reason="before consortium standard in polars") + import polars as pl + df = pl.DataFrame({"a": [1, 2, 3]}) + ser = df.__dataframe_consortium_standard__().col("a") -@pytest.mark.skipif( - parse(pl.__version__) < Version("0.19.0"), - reason="before consortium standard in polars", -) -def test_scale_column_polars_from_persisted_df() -> None: - df = pl.DataFrame({"a": [1, 2, 3]}) - ser = df.__dataframe_consortium_standard__().col("a") + ns = ser.__column_namespace__() ser = ser - ser.mean() - result = pl.select(ser.persist().column)["a"] - assert_series_equal(result, pl.Series("a", [-1, 0, 1.0])) + compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64) diff --git a/tests/integration/upstream_test.py b/tests/integration/upstream_test.py index abbfc98..caad0e9 100644 --- a/tests/integration/upstream_test.py +++ b/tests/integration/upstream_test.py @@ -7,7 +7,7 @@ class TestPolars: def test_dataframe(self) -> None: - import polars as pl + pl = pytest.importorskip("polars") if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover # before consortium standard in polars @@ -20,7 +20,7 @@ def test_dataframe(self) -> None: assert result == expected def test_lazyframe(self) -> None: - import polars as pl + pl = pytest.importorskip("polars") if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover # before consortium standard in polars diff --git a/tests/namespace/column_from_1d_array_test.py b/tests/namespace/column_from_1d_array_test.py index 95b3b8c..699df39 100644 --- a/tests/namespace/column_from_1d_array_test.py +++ b/tests/namespace/column_from_1d_array_test.py @@ -98,15 +98,14 @@ def test_datetime_from_1d_array(library: BaseHandler) -> None: compare_column_with_reference(result.col("result"), expected, dtype=ns.Datetime) -@pytest.mark.skipif( - Version("0.19.9") > polars_version(), - reason="upstream bug", -) -@pytest.mark.skipif( - Version("2.0.0") > pandas_version(), - reason="pandas before non-nano", -) def test_duration_from_1d_array(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version( + "2.0.0", + ): + pytest.skip(reason="pandas before non-nano") + if library.name == "polars-lazy" and polars_version() < Version("0.19.9"): + pytest.skip(reason="upstream bug") + ser = integer_dataframe_1(library).persist().col("a") ns = ser.__column_namespace__() arr = np.array([timedelta(1), timedelta(2)], dtype="timedelta64[ms]") diff --git a/tests/namespace/concat_test.py b/tests/namespace/concat_test.py index 2fd8327..ad31739 100644 --- a/tests/namespace/concat_test.py +++ b/tests/namespace/concat_test.py @@ -1,6 +1,5 @@ from __future__ import annotations -import polars as pl import pytest from tests.utils import BaseHandler @@ -23,6 +22,11 @@ def test_concat_mismatch(library: BaseHandler) -> None: df1 = integer_dataframe_1(library).persist() df2 = integer_dataframe_4(library).persist() ns = df1.__dataframe_namespace__() + exceptions = (ValueError,) + if library.name == "polars-lazy": + import polars as pl + + exceptions = (ValueError, pl.exceptions.ShapeError) # TODO check the error - with pytest.raises((ValueError, pl.exceptions.ShapeError)): + with pytest.raises(exceptions): _ = ns.concat([df1, df2]).persist() diff --git a/tests/namespace/convert_to_standard_column_test.py b/tests/namespace/convert_to_standard_column_test.py index 3fdfcaf..2e9707a 100644 --- a/tests/namespace/convert_to_standard_column_test.py +++ b/tests/namespace/convert_to_standard_column_test.py @@ -1,24 +1,29 @@ from __future__ import annotations -import pandas as pd -import polars as pl import pytest from packaging.version import Version +from tests.utils import BaseHandler from tests.utils import pandas_version from tests.utils import polars_version -@pytest.mark.skipif( - Version("0.19.0") > polars_version() or Version("2.1.0") > pandas_version(), - reason="before consortium standard in polars/pandas", -) -def test_convert_to_std_column() -> None: - s = pl.Series([1, 2, 3]).__column_consortium_standard__() - assert float(s.mean()) == 2 - s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__() - assert float(s.mean()) == 2 - s = pd.Series([1, 2, 3]).__column_consortium_standard__() - assert float(s.mean()) == 2 - s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__() - assert float(s.mean()) == 2 +def test_convert_to_std_column(library: BaseHandler) -> None: + if library.name in ("pandas-numpy", "pandas-nullable"): + if pandas_version() < Version("2.1.0"): + pytest.skip(reason="before consortium standard in pandas") + import pandas as pd + + s = pd.Series([1, 2, 3]).__column_consortium_standard__() + assert float(s.mean()) == 2 + s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__() + assert float(s.mean()) == 2 + elif library.name == "polars-lazy": + if polars_version() < Version("0.19.0"): + pytest.skip(reason="before consortium standard in polars") + import polars as pl + + s = pl.Series([1, 2, 3]).__column_consortium_standard__() + assert float(s.mean()) == 2 + s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__() + assert float(s.mean()) == 2 diff --git a/tests/namespace/is_dtype_test.py b/tests/namespace/is_dtype_test.py index 4d2ce86..6426742 100644 --- a/tests/namespace/is_dtype_test.py +++ b/tests/namespace/is_dtype_test.py @@ -21,11 +21,11 @@ (("string", "unsigned integer"), ["e", "f", "g", "h", "l"]), ], ) -@pytest.mark.skipif( - Version("2.0.0") > pandas_version(), - reason="before pandas got non-nano support", -) def test_is_dtype(library: BaseHandler, dtype: str, expected: list[str]) -> None: + if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version( + "2.0.0", + ): + pytest.skip(reason="pandas before non-nano") df = mixed_dataframe_1(library).persist() namespace = df.__dataframe_namespace__() result = [i for i in df.column_names if namespace.is_dtype(df.schema[i], dtype)] diff --git a/tests/utils.py b/tests/utils.py index 0cf80f6..22fb0bd 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -152,7 +152,12 @@ def convert_to_standard_compliant_dataframe( ) -> DataFrame: # TODO: type return import pandas as pd - import polars as pl + + try: + polars_installed = True + import polars as pl + except ModuleNotFoundError: + polars_installed = False if isinstance(df, pd.DataFrame): import dataframe_api_compat.pandas_standard @@ -163,7 +168,7 @@ def convert_to_standard_compliant_dataframe( api_version=api_version, ) ) - elif isinstance(df, (pl.DataFrame, pl.LazyFrame)): + elif polars_installed and isinstance(df, (pl.DataFrame, pl.LazyFrame)): import dataframe_api_compat.polars_standard df_lazy = df.lazy() if isinstance(df, pl.DataFrame) else df