Skip to content

Commit

Permalink
Add standard entrypoints tests for Modin (#76)
Browse files Browse the repository at this point in the history
Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
  • Loading branch information
anmyachev authored Apr 25, 2024
1 parent 4296995 commit 3dc9b67
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 34 deletions.
3 changes: 1 addition & 2 deletions dataframe_api_compat/modin_standard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,7 @@ def map_standard_dtype_to_pandas_dtype(dtype: DType) -> Any:
def convert_to_standard_compliant_column(
ser: pd.Series[Any],
api_version: str | None = None,
) -> Column: # pragma: no cover
# TODO: remove pragma after after modin implements `__column_consortium_standard__`
) -> Column:
if ser.name is not None and not isinstance(ser.name, str):
msg = f"Expected column with string name, got: {ser.name}"
raise ValueError(msg)
Expand Down
3 changes: 1 addition & 2 deletions dataframe_api_compat/modin_standard/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,7 @@ def rename(self, mapping: Mapping[str, str]) -> DataFrame:
self.dataframe.rename(columns=mapping),
)

def get_column_names(self) -> list[str]: # pragma: no cover
# TODO: add a test after modin implements `__dataframe_consortium_standard__`
def get_column_names(self) -> list[str]:
# DO NOT REMOVE
# This one is used in upstream tests - even if deprecated,
# just leave it in for backwards compatibility
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev-modin.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
covdefaults
modin[ray]
modin[ray]@git+https://github.com/modin-project/modin@main
pre-commit
pytest
pytest-cov
4 changes: 2 additions & 2 deletions tests/column/comparisons_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_column_comparisons(
other = df.col("b")
result = df.assign(getattr(ser, comparison)(other).rename("result"))
expected_ns_dtype = getattr(ns, expected_dtype)
if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
if comparison == "__pow__" and library.name == "polars-lazy":
# TODO
result = result.cast({"result": ns.Int64()})
expected_ns_dtype = ns.Int64
Expand Down Expand Up @@ -79,7 +79,7 @@ def test_column_comparisons_scalar(
other = 3
result = df.assign(getattr(ser, comparison)(other).rename("result"))
expected_ns_dtype = getattr(ns, expected_dtype)
if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
if comparison == "__pow__" and library.name == "polars-lazy":
result = result.cast({"result": ns.Int64()})
expected_ns_dtype = ns.Int64
compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype)
Expand Down
28 changes: 19 additions & 9 deletions tests/column/name_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
from __future__ import annotations

import pandas as pd
import pytest
from packaging.version import Version
from packaging.version import parse

from tests.utils import BaseHandler
from tests.utils import convert_to_standard_compliant_dataframe
from tests.utils import integer_dataframe_1
from tests.utils import pandas_version


def test_name(library: BaseHandler) -> None:
Expand All @@ -17,15 +16,26 @@ def test_name(library: BaseHandler) -> None:


def test_pandas_name_if_0_named_column() -> None:
import pandas as pd

df = convert_to_standard_compliant_dataframe(pd.DataFrame({0: [1, 2, 3]}))
assert df.column_names == [0] # type: ignore[comparison-overlap]
assert [col.name for col in df.iter_columns()] == [0] # type: ignore[comparison-overlap]


@pytest.mark.skipif(
parse(pd.__version__) < Version("2.1.0"),
reason="before consoritum standard",
)
def test_invalid_name_pandas() -> None:
with pytest.raises(ValueError):
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
def test_invalid_column_name(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable"):
import pandas as pd

if pandas_version() < Version("2.1.0"): # pragma: no cover
pytest.skip(reason="before consoritum standard")
with pytest.raises(ValueError):
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
elif library.name == "modin":
import modin.pandas as pd

with pytest.raises(ValueError):
pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
else: # pragma: no cover
msg = f"Not supported library: {library}"
raise AssertionError(msg)
4 changes: 2 additions & 2 deletions tests/column/pow_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_int_powers_column(library: BaseHandler) -> None:
ser = df.col("a")
other = df.col("b") * 1
result = df.assign(ser.__pow__(other).rename("result"))
if library.name in ("polars", "polars-lazy"):
if library.name == "polars-lazy":
result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]}
expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}
Expand All @@ -46,7 +46,7 @@ def test_int_powers_scalar_column(library: BaseHandler) -> None:
ser = df.col("a")
other = 1
result = df.assign(ser.__pow__(other).rename("result"))
if library.name in ("polars", "polars-lazy"):
if library.name == "polars-lazy":
result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]}
expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}
Expand Down
6 changes: 2 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,8 @@ def pytest_generate_tests(metafunc: Any) -> None:
ci_skip_ids = [
# polars does not allow to create a dataframe with non-unique columns
"non_unique_column_names_test.py::test_repeated_columns[polars-lazy]",
# TODO: enable after modin adds implementation for standard
"scale_column_test.py::test_scale_column[modin]",
"scale_column_test.py::test_scale_column_polars_from_persisted_df[modin]",
"convert_to_standard_column_test.py::test_convert_to_std_column[modin]",
# it is impossible to create a series with a name different from the string type
"name_test.py::test_invalid_column_name[polars-lazy]",
]


Expand Down
12 changes: 11 additions & 1 deletion tests/integration/scale_column_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@ def test_scale_column(library: BaseHandler) -> None:

s = pl.Series("a", [1, 2, 3])
ser = s.__column_consortium_standard__()
elif library.name == "modin":
import modin.pandas as pd

s = pd.Series([1, 2, 3], name="a")
ser = s.__column_consortium_standard__()
else: # pragma: no cover
msg = f"Not supported library: {library}"
raise AssertionError(msg)
Expand All @@ -33,7 +38,7 @@ def test_scale_column(library: BaseHandler) -> None:
compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64)


def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
def test_scale_column_from_persisted_df(library: BaseHandler) -> None:
if library.name in ("pandas-numpy", "pandas-nullable"):
if pandas_version() < Version("2.1.0"): # pragma: no cover
pytest.skip(reason="pandas doesn't support 3.8")
Expand All @@ -48,6 +53,11 @@ def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:

df = pl.DataFrame({"a": [1, 2, 3]})
ser = df.__dataframe_consortium_standard__().col("a")
elif library.name == "modin":
import modin.pandas as pd

df = pd.DataFrame({"a": [1, 2, 3]})
ser = df.__dataframe_consortium_standard__().col("a")
else: # pragma: no cover
msg = f"Not supported library: {library}"
raise AssertionError(msg)
Expand Down
20 changes: 20 additions & 0 deletions tests/integration/upstream_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,23 @@ def test_pandas(self) -> None:

ser = pd.Series([1, 2, 3], name="a")
assert ser.name == "a"


class TestModin:
def test_pandas(self) -> None:
"""
Test some basic methods of the dataframe consortium standard.
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
this is just to check that the entry point works as expected.
"""
pd = pytest.importorskip("modin.pandas")

df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = df_pd.__dataframe_consortium_standard__()
result_1 = df.get_column_names()
expected_1 = ["a", "b"]
assert result_1 == expected_1

ser = pd.Series([1, 2, 3], name="a")
assert ser.name == "a"
26 changes: 18 additions & 8 deletions tests/namespace/convert_to_standard_column_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,29 @@ def test_convert_to_std_column(library: BaseHandler) -> None:
pytest.skip(reason="before consortium standard in pandas")
import pandas as pd

s = pd.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__()
assert float(s.mean()) == 2
ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
ser_with_name = pd.Series(
[1, 2, 3],
name="alice",
).__column_consortium_standard__()
elif library.name == "polars-lazy":
if polars_version() < Version("0.19.0"): # pragma: no cover
pytest.skip(reason="before consortium standard in polars")
import polars as pl

s = pl.Series([1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
assert float(s.mean()) == 2
ser = pl.Series([1, 2, 3]).__column_consortium_standard__()
ser_with_name = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
elif library.name == "modin":
import modin.pandas as pd

ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
ser_with_name = pd.Series(
[1, 2, 3],
name="alice",
).__column_consortium_standard__()
else: # pragma: no cover
msg = f"Not supported library: {library}"
raise AssertionError(msg)

assert float(ser.mean()) == 2
assert float(ser_with_name.mean()) == 2
4 changes: 2 additions & 2 deletions tests/namespace/sorted_indices_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_column_sorted_indices_ascending(library: BaseHandler) -> None:
"b": [4, 4, 3, 1, 2],
"result": [3, 4, 2, 1, 0],
}
if library.name in ("polars", "polars-lazy"):
if library.name == "polars-lazy":
result = result.cast({"result": ns.Int64()})
try:
compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
Expand All @@ -44,7 +44,7 @@ def test_column_sorted_indices_descending(library: BaseHandler) -> None:
"b": [4, 4, 3, 1, 2],
"result": [0, 1, 2, 4, 3],
}
if library.name in ("polars", "polars-lazy"):
if library.name == "polars-lazy":
result = result.cast({"result": ns.Int64()})
try:
compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
Expand Down
1 change: 0 additions & 1 deletion tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ def convert_to_standard_compliant_dataframe(
df: pd.DataFrame | pl.DataFrame,
api_version: str | None = None,
) -> DataFrame:
# TODO: type return
import pandas as pd

try:
Expand Down

0 comments on commit 3dc9b67

Please sign in to comment.