Skip to content

Commit

Permalink
add an ability to run tests if polars is not installed
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev committed Apr 23, 2024
1 parent af46c22 commit 52336b8
Show file tree
Hide file tree
Showing 11 changed files with 107 additions and 81 deletions.
8 changes: 4 additions & 4 deletions tests/column/schema_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@
from tests.utils import pandas_version


@pytest.mark.skipif(
Version("2.0.0") > pandas_version(),
reason="no pyarrow support",
)
def test_schema(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version(
"2.0.0",
):
pytest.skip(reason="no pyarrow support")
df = mixed_dataframe_1(library)
namespace = df.__dataframe_namespace__()
result = df.col("a").dtype
Expand Down
3 changes: 3 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def pytest_generate_tests(metafunc: Any) -> None:
ci_skip_ids = [
# polars does not allow to create a dataframe with non-unique columns
"non_unique_column_names_test.py::test_repeated_columns[polars-lazy]",
# TODO: enable after modin adds implementation for standard
"scale_column_test.py::test_scale_column[modin]",
"scale_column_test.py::test_scale_column_polars_from_persisted_df[modin]",
]


Expand Down
16 changes: 7 additions & 9 deletions tests/dataframe/schema_test.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
from __future__ import annotations

import pandas as pd
import pytest
from packaging.version import Version
from packaging.version import parse

from tests.utils import BaseHandler
from tests.utils import mixed_dataframe_1
from tests.utils import pandas_version


@pytest.mark.skipif(
Version("2.0.0") > pandas_version(),
reason="no pyarrow support",
)
def test_schema(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version(
"2.0.0",
):
pytest.skip(reason="no pyarrow support")
df = mixed_dataframe_1(library)
namespace = df.__dataframe_namespace__()
result = df.schema
Expand Down Expand Up @@ -53,7 +51,7 @@ def test_schema(library: BaseHandler) -> None:
assert isinstance(result["n"], namespace.Datetime)
if not (
library.name in ("pandas-numpy", "pandas-nullable")
and parse(pd.__version__) < Version("2.0.0")
and pandas_version() < Version("2.0.0")
): # pragma: no cover (coverage bug?)
# pandas non-nanosecond support only came in 2.0
assert result["n"].time_unit == "ms"
Expand All @@ -63,7 +61,7 @@ def test_schema(library: BaseHandler) -> None:
assert isinstance(result["o"], namespace.Datetime)
if not (
library.name in ("pandas-numpy", "pandas-nullable")
and parse(pd.__version__) < Version("2.0.0")
and pandas_version() < Version("2.0.0")
): # pragma: no cover (coverage bug?)
# pandas non-nanosecond support only came in 2.0
assert result["o"].time_unit == "us"
Expand All @@ -72,7 +70,7 @@ def test_schema(library: BaseHandler) -> None:
assert result["o"].time_zone is None
if not (
library.name in ("pandas-numpy", "pandas-nullable")
and parse(pd.__version__) < Version("2.0.0")
and pandas_version() < Version("2.0.0")
):
# pandas non-nanosecond support only came in 2.0 - before that, these would be 'float'
assert isinstance(result["p"], namespace.Duration)
Expand Down
11 changes: 8 additions & 3 deletions tests/groupby/groupby_any_all_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import pytest
from packaging.version import Version
from packaging.version import parse
from polars.exceptions import SchemaError

from tests.utils import BaseHandler
from tests.utils import bool_dataframe_2
Expand Down Expand Up @@ -42,7 +41,13 @@ def test_groupby_boolean(

def test_group_by_invalid_any_all(library: BaseHandler) -> None:
df = integer_dataframe_4(library).persist()
with pytest.raises((TypeError, SchemaError)):

exceptions = (TypeError,)
if library.name == "polars-lazy":
from polars.exceptions import SchemaError

exceptions = (TypeError, SchemaError)
with pytest.raises(exceptions):
df.group_by("key").any()
with pytest.raises((TypeError, SchemaError)):
with pytest.raises(exceptions):
df.group_by("key").all()
71 changes: 39 additions & 32 deletions tests/integration/scale_column_test.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,51 @@
from __future__ import annotations

import pandas as pd
import polars as pl
import pytest
from packaging.version import Version
from packaging.version import parse
from polars.testing import assert_series_equal

from tests.utils import BaseHandler
from tests.utils import compare_column_with_reference
from tests.utils import pandas_version
from tests.utils import polars_version

@pytest.mark.skipif(
parse(pd.__version__) < Version("2.1.0"),
reason="pandas doesn't support 3.8",
)
def test_scale_column_pandas() -> None:
s = pd.Series([1, 2, 3], name="a")
ser = s.__column_consortium_standard__()
ser = ser - ser.mean()
result = ser.column
pd.testing.assert_series_equal(result, pd.Series([-1, 0, 1.0], name="a"))

def test_scale_column(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable"):
if pandas_version() < Version("2.1.0"):
pytest.skip(reason="pandas doesn't support 3.8")
import pandas as pd

s = pd.Series([1, 2, 3], name="a")
ser = s.__column_consortium_standard__()
elif library.name == "polars-lazy":
if polars_version() < Version("0.19.0"):
pytest.skip(reason="before consortium standard in polars")
import polars as pl

s = pl.Series("a", [1, 2, 3])
ser = s.__column_consortium_standard__()

@pytest.mark.skipif(
parse(pl.__version__) < Version("0.19.0"),
reason="before consortium standard in polars",
)
def test_scale_column_polars() -> None:
s = pl.Series("a", [1, 2, 3])
ser = s.__column_consortium_standard__()
ns = ser.__column_namespace__()
ser = ser - ser.mean()
result = pl.select(ser.column)["a"]
assert_series_equal(result, pl.Series("a", [-1, 0, 1.0]))
compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64)


def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable"):
if pandas_version() < Version("2.1.0"):
pytest.skip(reason="pandas doesn't support 3.8")
import pandas as pd

df = pd.DataFrame({"a": [1, 2, 3]})
ser = df.__dataframe_consortium_standard__().col("a")
elif library.name == "polars-lazy":
if polars_version() < Version("0.19.0"):
pytest.skip(reason="before consortium standard in polars")
import polars as pl

df = pl.DataFrame({"a": [1, 2, 3]})
ser = df.__dataframe_consortium_standard__().col("a")

@pytest.mark.skipif(
parse(pl.__version__) < Version("0.19.0"),
reason="before consortium standard in polars",
)
def test_scale_column_polars_from_persisted_df() -> None:
df = pl.DataFrame({"a": [1, 2, 3]})
ser = df.__dataframe_consortium_standard__().col("a")
ns = ser.__column_namespace__()
ser = ser - ser.mean()
result = pl.select(ser.persist().column)["a"]
assert_series_equal(result, pl.Series("a", [-1, 0, 1.0]))
compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64)
4 changes: 2 additions & 2 deletions tests/integration/upstream_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TestPolars:
def test_dataframe(self) -> None:
import polars as pl
pl = pytest.importorskip("polars")

if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover
# before consortium standard in polars
Expand All @@ -20,7 +20,7 @@ def test_dataframe(self) -> None:
assert result == expected

def test_lazyframe(self) -> None:
import polars as pl
pl = pytest.importorskip("polars")

if parse(pl.__version__) < Version("0.19.0"): # pragma: no cover
# before consortium standard in polars
Expand Down
15 changes: 7 additions & 8 deletions tests/namespace/column_from_1d_array_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,14 @@ def test_datetime_from_1d_array(library: BaseHandler) -> None:
compare_column_with_reference(result.col("result"), expected, dtype=ns.Datetime)


@pytest.mark.skipif(
Version("0.19.9") > polars_version(),
reason="upstream bug",
)
@pytest.mark.skipif(
Version("2.0.0") > pandas_version(),
reason="pandas before non-nano",
)
def test_duration_from_1d_array(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version(
"2.0.0",
):
pytest.skip(reason="pandas before non-nano")
if library.name == "polars-lazy" and polars_version() < Version("0.19.9"):
pytest.skip(reason="upstream bug")

ser = integer_dataframe_1(library).persist().col("a")
ns = ser.__column_namespace__()
arr = np.array([timedelta(1), timedelta(2)], dtype="timedelta64[ms]")
Expand Down
8 changes: 6 additions & 2 deletions tests/namespace/concat_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from __future__ import annotations

import polars as pl
import pytest

from tests.utils import BaseHandler
Expand All @@ -23,6 +22,11 @@ def test_concat_mismatch(library: BaseHandler) -> None:
df1 = integer_dataframe_1(library).persist()
df2 = integer_dataframe_4(library).persist()
ns = df1.__dataframe_namespace__()
exceptions = (ValueError,)
if library.name == "polars-lazy":
import polars as pl

exceptions = (ValueError, pl.exceptions.ShapeError)
# TODO check the error
with pytest.raises((ValueError, pl.exceptions.ShapeError)):
with pytest.raises(exceptions):
_ = ns.concat([df1, df2]).persist()
35 changes: 20 additions & 15 deletions tests/namespace/convert_to_standard_column_test.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
from __future__ import annotations

import pandas as pd
import polars as pl
import pytest
from packaging.version import Version

from tests.utils import BaseHandler
from tests.utils import pandas_version
from tests.utils import polars_version


@pytest.mark.skipif(
Version("0.19.0") > polars_version() or Version("2.1.0") > pandas_version(),
reason="before consortium standard in polars/pandas",
)
def test_convert_to_std_column() -> None:
s = pl.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pd.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__()
assert float(s.mean()) == 2
def test_convert_to_std_column(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable"):
if pandas_version() < Version("2.1.0"):
pytest.skip(reason="before consortium standard in pandas")
import pandas as pd

s = pd.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__()
assert float(s.mean()) == 2
elif library.name == "polars-lazy":
if polars_version() < Version("0.19.0"):
pytest.skip(reason="before consortium standard in polars")
import polars as pl

s = pl.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
8 changes: 4 additions & 4 deletions tests/namespace/is_dtype_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
(("string", "unsigned integer"), ["e", "f", "g", "h", "l"]),
],
)
@pytest.mark.skipif(
Version("2.0.0") > pandas_version(),
reason="before pandas got non-nano support",
)
def test_is_dtype(library: BaseHandler, dtype: str, expected: list[str]) -> None:
if library.name in ("pandas-numpy", "pandas-nullable") and pandas_version() < Version(
"2.0.0",
):
pytest.skip(reason="pandas before non-nano")
df = mixed_dataframe_1(library).persist()
namespace = df.__dataframe_namespace__()
result = [i for i in df.column_names if namespace.is_dtype(df.schema[i], dtype)]
Expand Down
9 changes: 7 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ def convert_to_standard_compliant_dataframe(
) -> DataFrame:
# TODO: type return
import pandas as pd
import polars as pl

try:
polars_installed = True
import polars as pl
except ModuleNotFoundError:
polars_installed = False

if isinstance(df, pd.DataFrame):
import dataframe_api_compat.pandas_standard
Expand All @@ -163,7 +168,7 @@ def convert_to_standard_compliant_dataframe(
api_version=api_version,
)
)
elif isinstance(df, (pl.DataFrame, pl.LazyFrame)):
elif polars_installed and isinstance(df, (pl.DataFrame, pl.LazyFrame)):
import dataframe_api_compat.polars_standard

df_lazy = df.lazy() if isinstance(df, pl.DataFrame) else df
Expand Down

0 comments on commit 52336b8

Please sign in to comment.