Use Narwhals to support more DataFrame types

Remove support for Python 3.7
mwouts · Dec 15, 2024 · 227e094 · 227e094
1 parent a3dfd99
commit 227e094
Show file tree

Hide file tree

Showing 15 changed files with 399 additions and 144 deletions.
diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
@@ -40,18 +40,20 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12", "3.13"]
+        python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"]
         pandas-version: [latest]
         numpy-version: [latest]
         include:
-          - python-version: 3.7
-            pandas-version: '<1.0'
           - python-version: 3.9
             pandas-version: '<2.0'
             numpy-version: '<2.0'
           - python-version: "3.13"
             pandas-version: pre
             polars: true
+          - python-version: "3.13"
+            modin: true
+          - python-version: "3.13"
+            uninstall_narwhals: true
           - python-version: "3.13"
             uninstall_jinja2: true
     runs-on: ubuntu-20.04
@@ -85,10 +87,17 @@ jobs:
 
       - name: Install polars
         if: matrix.polars
-        run: pip install -e .[polars]
+        run: pip install polars
+
+      - name: Install modin
+        if: matrix.modin
+        run: pip install modin[all]
+
+      - name: Uninstall narwhals
+        if: matrix.uninstall_narwhals
+        run: pip uninstall narwhals -y
 
       - name: Install shiny
-        if: matrix.python-version != '3.7'
         run: pip install "shiny>=1.0"
 
       - name: Uninstall jinja2

diff --git a/docs/_toc.yml b/docs/_toc.yml
@@ -32,3 +32,4 @@ parts:
     chapters:
     - file: sample_dataframes
     - file: polars_dataframes
+    - file: modin_dataframes
diff --git a/docs/changelog.md b/docs/changelog.md
@@ -1,6 +1,13 @@
 ITables ChangeLog
 =================
 
+2.3.0-dev
+---------
+
+**Added**
+- In addition to Pandas and Polars, ITables now support Modin DataFrames ([#325](https://github.com/mwouts/itables/issues/325)). Under the hoods we use [Narwhals](https://github.com/narwhals-dev/narwhals) to handle the different types of DataFrames. Thanks to [Dea María Léon](https://github.com/DeaMariaLeon) and to [Marco Gorelli](https://github.com/MarcoGorelli) for making this work, and for developing Narwhals too!
+
+
 2.2.4 (2024-12-07)
 ------------------
 

diff --git a/docs/modin_dataframes.md b/docs/modin_dataframes.md
@@ -0,0 +1,163 @@
+---
+jupytext:
+  formats: md:myst
+  notebook_metadata_filter: -jupytext.text_representation.jupytext_version
+  text_representation:
+    extension: .md
+    format_name: myst
+    format_version: 0.13
+kernelspec:
+  display_name: itables
+  language: python
+  name: itables
+---
+
+# Modin DataFrames
+
+In this notebook we make sure that our test dataframes are displayed nicely with the default `itables` settings.
+
+```{code-cell}
+from itables import init_notebook_mode, show
+from itables.sample_dfs import get_dict_of_test_modin_dfs
+
+dict_of_test_dfs = get_dict_of_test_modin_dfs()
+init_notebook_mode(all_interactive=True)
+```
+
+## empty
+
+```{code-cell}
+show(dict_of_test_dfs["empty"])
+```
+
+## No rows
+
+```{code-cell}
+show(dict_of_test_dfs["no_rows"])
+```
+
+## No rows one column
+
+```{code-cell}
+show(dict_of_test_dfs["no_rows_one_column"])
+```
+
+## No columns
+
+```{code-cell}
+show(dict_of_test_dfs["no_columns"])
+```
+
+## No columns one row
+
+```{code-cell}
+show(dict_of_test_dfs["no_columns_one_row"])
+```
+
+## bool
+
+```{code-cell}
+show(dict_of_test_dfs["bool"])
+```
+
+## Nullable boolean
+
+```{code-cell}
+show(dict_of_test_dfs["nullable_boolean"])
+```
+
+## int
+
+```{code-cell}
+show(dict_of_test_dfs["int"])
+```
+
+## Nullable integer
+
+```{code-cell}
+show(dict_of_test_dfs["nullable_int"])
+```
+
+## float
+
+```{code-cell}
+show(dict_of_test_dfs["float"])
+```
+
+## str
+
+```{code-cell}
+show(dict_of_test_dfs["str"])
+```
+
+## time
+
+```{code-cell}
+show(dict_of_test_dfs["time"])
+```
+
+## object
+
+```{code-cell}
+show(dict_of_test_dfs["object"])
+```
+
+## ordered_categories
+
+```{code-cell}
+show(dict_of_test_dfs["ordered_categories"])
+```
+
+## ordered_categories_in_multiindex
+
+```{code-cell}
+show(dict_of_test_dfs["ordered_categories_in_multiindex"])
+```
+
+## countries
+
+```{code-cell}
+:tags: [full-width]
+
+show(dict_of_test_dfs["countries"])
+```
+
+## capital
+
+```{code-cell}
+show(dict_of_test_dfs["capital"])
+```
+
+## int_float_str
+
+```{code-cell}
+show(dict_of_test_dfs["int_float_str"])
+```
+
+## wide
+
+```{code-cell}
+:tags: [full-width]
+
+show(dict_of_test_dfs["wide"], maxBytes=100000, maxColumns=100, scrollX=True)
+```
+
+## long_column_names
+
+```{code-cell}
+:tags: [full-width]
+
+show(dict_of_test_dfs["long_column_names"], scrollX=True)
+```
+
+## named_column_index
+
+```{code-cell}
+show(dict_of_test_dfs["named_column_index"])
+```
+
+## big_integers
+
+```{code-cell}
+show(dict_of_test_dfs["big_integers"])
+```
diff --git a/docs/polars_dataframes.md b/docs/polars_dataframes.md
@@ -19,9 +19,9 @@ dataframes are displayed nicely with the default `itables` settings.
 
 ```{code-cell}
 from itables import init_notebook_mode, show
-from itables.sample_dfs import get_dict_of_test_dfs
+from itables.sample_dfs import get_dict_of_test_polars_dfs
 
-dict_of_test_dfs = get_dict_of_test_dfs(polars=True)
+dict_of_test_dfs = get_dict_of_test_polars_dfs()
 init_notebook_mode(all_interactive=True)
 ```
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -18,16 +18,15 @@ classifiers = [
   "Intended Audience :: Science/Research",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
 ]
-requires-python = ">= 3.7"
-dependencies = ["IPython", "pandas", "numpy"]
+requires-python = ">= 3.8"
+dependencies = ["IPython", "pandas", "numpy", "narwhals>=1.18.3"]
 dynamic = ["version"]
 
 [project.optional-dependencies]

diff --git a/src/itables/datatables_format.py b/src/itables/datatables_format.py
@@ -6,12 +6,6 @@
 import pandas as pd
 import pandas.io.formats.format as fmt
 
-try:
-    import polars as pl
-except ImportError:
-    pl = None
-
-
 JS_MAX_SAFE_INTEGER = 2**53 - 1
 JS_MIN_SAFE_INTEGER = -(2**53 - 1)
 
@@ -91,8 +85,7 @@ def datatables_rows(df, count=None, warn_on_unexpected_types=False, pure_json=Fa
         assert missing_columns > 0
         empty_columns = [[None] * len(df)] * missing_columns
 
-    try:
-        # Pandas DataFrame
+    if isinstance(df, pd.DataFrame):
         data = list(
             zip(
                 *(empty_columns + [_format_column(x, pure_json) for _, x in df.items()])
@@ -108,17 +101,19 @@ def datatables_rows(df, count=None, warn_on_unexpected_types=False, pure_json=Fa
             cls=generate_encoder(warn_on_unexpected_types),
             allow_nan=not pure_json,
         )
-    except AttributeError:
-        # Polars DataFrame
+    else:
+        # Polars, Modin, or other
+        import narwhals as nw
+
+        df = nw.from_native(df)
         data = df.rows()
-        import polars as pl
 
         has_bigints = any(
             (
-                x.dtype == pl.Int64
+                x.dtype == nw.Int64
                 and ((x > JS_MAX_SAFE_INTEGER).any() or (x < JS_MIN_SAFE_INTEGER).any())
             )
-            or (x.dtype == pl.UInt64 and (x > JS_MAX_SAFE_INTEGER).any())
+            or (x.dtype == nw.UInt64 and (x > JS_MAX_SAFE_INTEGER).any())
             for x in (df[col] for col in df.columns)
         )
         js = json.dumps(data, cls=generate_encoder(False), allow_nan=not pure_json)

diff --git a/src/itables/downsample.py b/src/itables/downsample.py
@@ -2,15 +2,13 @@
 
 import pandas as pd
 
-from .datatables_format import _isetitem
-
 
 def nbytes(df):
-    try:
+    if isinstance(df, pd.DataFrame):
         return sum(x.values.nbytes for _, x in df.items())
-    except AttributeError:
-        # Polars DataFrame
-        return df.estimated_size()
+
+    # Narwhals
+    return df.estimated_size()
 
 
 def as_nbytes(mem):
@@ -97,31 +95,36 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
         second_half = max_rows // 2
         first_half = max_rows - second_half
         if second_half:
-            try:
+            if isinstance(df, pd.DataFrame):
                 df = pd.concat((df.iloc[:first_half], df.iloc[-second_half:]))
-            except AttributeError:
-                df = df.head(first_half).vstack(df.tail(second_half))
+            else:
+                from narwhals import concat
+
+                df = concat([df.head(first_half), df.tail(second_half)], how="vertical")
         else:
-            try:
+            if isinstance(df, pd.DataFrame):
                 df = df.iloc[:first_half]
-            except AttributeError:
+            else:
                 df = df.head(first_half)
 
     if len(df.columns) > max_columns > 0:
         second_half = max_columns // 2
         first_half = max_columns - second_half
         if second_half:
-            try:
+            if isinstance(df, pd.DataFrame):
                 df = pd.concat(
                     (df.iloc[:, :first_half], df.iloc[:, -second_half:]), axis=1
                 )
-            except AttributeError:
-                df = df[df.columns[:first_half]].hstack(df[df.columns[-second_half:]])
+            else:
+                first_and_last_columns = (
+                    df.columns[:first_half] + df.columns[-second_half:]
+                )
+                df = df.select(first_and_last_columns)
         else:
-            try:
+            if isinstance(df, pd.DataFrame):
                 df = df.iloc[:, :first_half]
-            except AttributeError:
-                df = df[df.columns[:first_half]]
+            else:
+                df = df.select(df.columns[:first_half])
 
     df_nbytes = nbytes(df)
     if df_nbytes > max_bytes > 0:
@@ -144,13 +147,6 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio=
             )
 
         # max_bytes is smaller than the average size of one cell
-        try:
-            df = df.iloc[:1, :1]
-            _isetitem(df, 0, ["..."])
-        except AttributeError:
-            import polars as pl  # noqa
-
-            df = pl.DataFrame({df.columns[0]: ["..."]})
-        return df
+        return pd.DataFrame({df.columns[0]: ["..."]})
 
     return df