Add standard entrypoints tests for Modin (#76)

Signed-off-by: Anatoly Myachev <anatoly.myachev@intel.com>
data-apis · Apr 25, 2024 · 3dc9b67 · 3dc9b67
1 parent 4296995
commit 3dc9b67
Show file tree

Hide file tree

Showing 12 changed files with 79 additions and 34 deletions.
diff --git a/dataframe_api_compat/modin_standard/__init__.py b/dataframe_api_compat/modin_standard/__init__.py
@@ -168,8 +168,7 @@ def map_standard_dtype_to_pandas_dtype(dtype: DType) -> Any:
 def convert_to_standard_compliant_column(
     ser: pd.Series[Any],
     api_version: str | None = None,
-) -> Column:  # pragma: no cover
-    # TODO: remove pragma after after modin implements `__column_consortium_standard__`
+) -> Column:
     if ser.name is not None and not isinstance(ser.name, str):
         msg = f"Expected column with string name, got: {ser.name}"
         raise ValueError(msg)

diff --git a/dataframe_api_compat/modin_standard/dataframe_object.py b/dataframe_api_compat/modin_standard/dataframe_object.py
@@ -211,8 +211,7 @@ def rename(self, mapping: Mapping[str, str]) -> DataFrame:
             self.dataframe.rename(columns=mapping),
         )
 
-    def get_column_names(self) -> list[str]:  # pragma: no cover
-        # TODO: add a test after modin implements `__dataframe_consortium_standard__`
+    def get_column_names(self) -> list[str]:
         # DO NOT REMOVE
         # This one is used in upstream tests - even if deprecated,
         # just leave it in for backwards compatibility

diff --git a/requirements-dev-modin.txt b/requirements-dev-modin.txt
@@ -1,5 +1,5 @@
 covdefaults
-modin[ray]
+modin[ray]@git+https://github.com/modin-project/modin@main
 pre-commit
 pytest
 pytest-cov
diff --git a/tests/column/comparisons_test.py b/tests/column/comparisons_test.py
@@ -41,7 +41,7 @@ def test_column_comparisons(
     other = df.col("b")
     result = df.assign(getattr(ser, comparison)(other).rename("result"))
     expected_ns_dtype = getattr(ns, expected_dtype)
-    if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
+    if comparison == "__pow__" and library.name == "polars-lazy":
         # TODO
         result = result.cast({"result": ns.Int64()})
         expected_ns_dtype = ns.Int64
@@ -79,7 +79,7 @@ def test_column_comparisons_scalar(
     other = 3
     result = df.assign(getattr(ser, comparison)(other).rename("result"))
     expected_ns_dtype = getattr(ns, expected_dtype)
-    if comparison == "__pow__" and library.name in ("polars", "polars-lazy"):
+    if comparison == "__pow__" and library.name == "polars-lazy":
         result = result.cast({"result": ns.Int64()})
         expected_ns_dtype = ns.Int64
     compare_column_with_reference(result.col("result"), expected_data, expected_ns_dtype)

diff --git a/tests/column/name_test.py b/tests/column/name_test.py
@@ -1,13 +1,12 @@
 from __future__ import annotations
 
-import pandas as pd
 import pytest
 from packaging.version import Version
-from packaging.version import parse
 
 from tests.utils import BaseHandler
 from tests.utils import convert_to_standard_compliant_dataframe
 from tests.utils import integer_dataframe_1
+from tests.utils import pandas_version
 
 
 def test_name(library: BaseHandler) -> None:
@@ -17,15 +16,26 @@ def test_name(library: BaseHandler) -> None:
 
 
 def test_pandas_name_if_0_named_column() -> None:
+    import pandas as pd
+
     df = convert_to_standard_compliant_dataframe(pd.DataFrame({0: [1, 2, 3]}))
     assert df.column_names == [0]  # type: ignore[comparison-overlap]
     assert [col.name for col in df.iter_columns()] == [0]  # type: ignore[comparison-overlap]
 
 
-@pytest.mark.skipif(
-    parse(pd.__version__) < Version("2.1.0"),
-    reason="before consoritum standard",
-)
-def test_invalid_name_pandas() -> None:
-    with pytest.raises(ValueError):
-        pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
+def test_invalid_column_name(library: BaseHandler) -> None:
+    if library.name in ("pandas-numpy", "pandas-nullable"):
+        import pandas as pd
+
+        if pandas_version() < Version("2.1.0"):  # pragma: no cover
+            pytest.skip(reason="before consoritum standard")
+        with pytest.raises(ValueError):
+            pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
+    elif library.name == "modin":
+        import modin.pandas as pd
+
+        with pytest.raises(ValueError):
+            pd.Series([1, 2, 3], name=0).__column_consortium_standard__()
+    else:  # pragma: no cover
+        msg = f"Not supported library: {library}"
+        raise AssertionError(msg)
diff --git a/tests/column/pow_test.py b/tests/column/pow_test.py
@@ -33,7 +33,7 @@ def test_int_powers_column(library: BaseHandler) -> None:
     ser = df.col("a")
     other = df.col("b") * 1
     result = df.assign(ser.__pow__(other).rename("result"))
-    if library.name in ("polars", "polars-lazy"):
+    if library.name == "polars-lazy":
         result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
     expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 32, 729]}
     expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}
@@ -46,7 +46,7 @@ def test_int_powers_scalar_column(library: BaseHandler) -> None:
     ser = df.col("a")
     other = 1
     result = df.assign(ser.__pow__(other).rename("result"))
-    if library.name in ("polars", "polars-lazy"):
+    if library.name == "polars-lazy":
         result = result.cast({name: ns.Int64() for name in ("a", "b", "result")})
     expected = {"a": [1, 2, 3], "b": [4, 5, 6], "result": [1, 2, 3]}
     expected_dtype = {name: ns.Int64 for name in ("a", "b", "result")}

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -63,10 +63,8 @@ def pytest_generate_tests(metafunc: Any) -> None:
 ci_skip_ids = [
     # polars does not allow to create a dataframe with non-unique columns
     "non_unique_column_names_test.py::test_repeated_columns[polars-lazy]",
-    # TODO: enable after modin adds implementation for standard
-    "scale_column_test.py::test_scale_column[modin]",
-    "scale_column_test.py::test_scale_column_polars_from_persisted_df[modin]",
-    "convert_to_standard_column_test.py::test_convert_to_std_column[modin]",
+    # it is impossible to create a series with a name different from the string type
+    "name_test.py::test_invalid_column_name[polars-lazy]",
 ]
 
 

diff --git a/tests/integration/scale_column_test.py b/tests/integration/scale_column_test.py
@@ -24,6 +24,11 @@ def test_scale_column(library: BaseHandler) -> None:
 
         s = pl.Series("a", [1, 2, 3])
         ser = s.__column_consortium_standard__()
+    elif library.name == "modin":
+        import modin.pandas as pd
+
+        s = pd.Series([1, 2, 3], name="a")
+        ser = s.__column_consortium_standard__()
     else:  # pragma: no cover
         msg = f"Not supported library: {library}"
         raise AssertionError(msg)
@@ -33,7 +38,7 @@ def test_scale_column(library: BaseHandler) -> None:
     compare_column_with_reference(ser, [-1, 0, 1.0], dtype=ns.Float64)
 
 
-def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
+def test_scale_column_from_persisted_df(library: BaseHandler) -> None:
     if library.name in ("pandas-numpy", "pandas-nullable"):
         if pandas_version() < Version("2.1.0"):  # pragma: no cover
             pytest.skip(reason="pandas doesn't support 3.8")
@@ -48,6 +53,11 @@ def test_scale_column_polars_from_persisted_df(library: BaseHandler) -> None:
 
         df = pl.DataFrame({"a": [1, 2, 3]})
         ser = df.__dataframe_consortium_standard__().col("a")
+    elif library.name == "modin":
+        import modin.pandas as pd
+
+        df = pd.DataFrame({"a": [1, 2, 3]})
+        ser = df.__dataframe_consortium_standard__().col("a")
     else:  # pragma: no cover
         msg = f"Not supported library: {library}"
         raise AssertionError(msg)

diff --git a/tests/integration/upstream_test.py b/tests/integration/upstream_test.py
@@ -63,3 +63,23 @@ def test_pandas(self) -> None:
 
         ser = pd.Series([1, 2, 3], name="a")
         assert ser.name == "a"
+
+
+class TestModin:
+    def test_pandas(self) -> None:
+        """
+        Test some basic methods of the dataframe consortium standard.
+
+        Full testing is done at https://github.com/data-apis/dataframe-api-compat,
+        this is just to check that the entry point works as expected.
+        """
+        pd = pytest.importorskip("modin.pandas")
+
+        df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        df = df_pd.__dataframe_consortium_standard__()
+        result_1 = df.get_column_names()
+        expected_1 = ["a", "b"]
+        assert result_1 == expected_1
+
+        ser = pd.Series([1, 2, 3], name="a")
+        assert ser.name == "a"
diff --git a/tests/namespace/convert_to_standard_column_test.py b/tests/namespace/convert_to_standard_column_test.py
@@ -14,19 +14,29 @@ def test_convert_to_std_column(library: BaseHandler) -> None:
             pytest.skip(reason="before consortium standard in pandas")
         import pandas as pd
 
-        s = pd.Series([1, 2, 3]).__column_consortium_standard__()
-        assert float(s.mean()) == 2
-        s = pd.Series([1, 2, 3], name="alice").__column_consortium_standard__()
-        assert float(s.mean()) == 2
+        ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
+        ser_with_name = pd.Series(
+            [1, 2, 3],
+            name="alice",
+        ).__column_consortium_standard__()
     elif library.name == "polars-lazy":
         if polars_version() < Version("0.19.0"):  # pragma: no cover
             pytest.skip(reason="before consortium standard in polars")
         import polars as pl
 
-        s = pl.Series([1, 2, 3]).__column_consortium_standard__()
-        assert float(s.mean()) == 2
-        s = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
-        assert float(s.mean()) == 2
+        ser = pl.Series([1, 2, 3]).__column_consortium_standard__()
+        ser_with_name = pl.Series("bob", [1, 2, 3]).__column_consortium_standard__()
+    elif library.name == "modin":
+        import modin.pandas as pd
+
+        ser = pd.Series([1, 2, 3]).__column_consortium_standard__()
+        ser_with_name = pd.Series(
+            [1, 2, 3],
+            name="alice",
+        ).__column_consortium_standard__()
     else:  # pragma: no cover
         msg = f"Not supported library: {library}"
         raise AssertionError(msg)
+
+    assert float(ser.mean()) == 2
+    assert float(ser_with_name.mean()) == 2
diff --git a/tests/namespace/sorted_indices_test.py b/tests/namespace/sorted_indices_test.py
@@ -20,7 +20,7 @@ def test_column_sorted_indices_ascending(library: BaseHandler) -> None:
         "b": [4, 4, 3, 1, 2],
         "result": [3, 4, 2, 1, 0],
     }
-    if library.name in ("polars", "polars-lazy"):
+    if library.name == "polars-lazy":
         result = result.cast({"result": ns.Int64()})
     try:
         compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)
@@ -44,7 +44,7 @@ def test_column_sorted_indices_descending(library: BaseHandler) -> None:
         "b": [4, 4, 3, 1, 2],
         "result": [0, 1, 2, 4, 3],
     }
-    if library.name in ("polars", "polars-lazy"):
+    if library.name == "polars-lazy":
         result = result.cast({"result": ns.Int64()})
     try:
         compare_dataframe_with_reference(result, expected_1, dtype=ns.Int64)

diff --git a/tests/utils.py b/tests/utils.py
@@ -150,7 +150,6 @@ def convert_to_standard_compliant_dataframe(
     df: pd.DataFrame | pl.DataFrame,
     api_version: str | None = None,
 ) -> DataFrame:
-    # TODO: type return
     import pandas as pd
 
     try: