From 227e094ee672d5b40e0e7f8b44cf1c20edc5467d Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 19:18:42 +0000 Subject: [PATCH 1/9] Use Narwhals to support more DataFrame types Remove support for Python 3.7 --- .github/workflows/continuous-integration.yml | 19 +- docs/_toc.yml | 1 + docs/changelog.md | 7 + docs/modin_dataframes.md | 163 ++++++++++++++++++ docs/polars_dataframes.md | 4 +- pyproject.toml | 5 +- src/itables/datatables_format.py | 21 +-- src/itables/downsample.py | 46 +++-- src/itables/javascript.py | 98 +++++++---- src/itables/sample_dfs.py | 103 +++++++---- src/itables/version.py | 2 +- ...y => test_documentation_notebooks_pass.py} | 11 +- tests/test_downsample.py | 3 +- tests/test_modin.py | 26 +++ tests/test_polars.py | 34 ++-- 15 files changed, 399 insertions(+), 144 deletions(-) create mode 100644 docs/modin_dataframes.md rename tests/{test_documentation_notebooks_run.py => test_documentation_notebooks_pass.py} (88%) create mode 100644 tests/test_modin.py diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index c5b8628e..e3de5960 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -40,18 +40,20 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.7, 3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] + python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] pandas-version: [latest] numpy-version: [latest] include: - - python-version: 3.7 - pandas-version: '<1.0' - python-version: 3.9 pandas-version: '<2.0' numpy-version: '<2.0' - python-version: "3.13" pandas-version: pre polars: true + - python-version: "3.13" + modin: true + - python-version: "3.13" + uninstall_narwhals: true - python-version: "3.13" uninstall_jinja2: true runs-on: ubuntu-20.04 @@ -85,10 +87,17 @@ jobs: - name: Install polars if: matrix.polars - run: pip install -e .[polars] + run: pip install polars + + - name: Install modin + if: matrix.modin + run: pip install modin[all] + + - name: Uninstall narwhals + if: matrix.uninstall_narwhals + run: pip uninstall narwhals -y - name: Install shiny - if: matrix.python-version != '3.7' run: pip install "shiny>=1.0" - name: Uninstall jinja2 diff --git a/docs/_toc.yml b/docs/_toc.yml index 391b7095..51504d97 100644 --- a/docs/_toc.yml +++ b/docs/_toc.yml @@ -32,3 +32,4 @@ parts: chapters: - file: sample_dataframes - file: polars_dataframes + - file: modin_dataframes diff --git a/docs/changelog.md b/docs/changelog.md index 2d038283..689fb511 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -1,6 +1,13 @@ ITables ChangeLog ================= +2.3.0-dev +--------- + +**Added** +- In addition to Pandas and Polars, ITables now support Modin DataFrames ([#325](https://github.com/mwouts/itables/issues/325)). Under the hoods we use [Narwhals](https://github.com/narwhals-dev/narwhals) to handle the different types of DataFrames. Thanks to [Dea María Léon](https://github.com/DeaMariaLeon) and to [Marco Gorelli](https://github.com/MarcoGorelli) for making this work, and for developing Narwhals too! + + 2.2.4 (2024-12-07) ------------------ diff --git a/docs/modin_dataframes.md b/docs/modin_dataframes.md new file mode 100644 index 00000000..5c06eff1 --- /dev/null +++ b/docs/modin_dataframes.md @@ -0,0 +1,163 @@ +--- +jupytext: + formats: md:myst + notebook_metadata_filter: -jupytext.text_representation.jupytext_version + text_representation: + extension: .md + format_name: myst + format_version: 0.13 +kernelspec: + display_name: itables + language: python + name: itables +--- + +# Modin DataFrames + +In this notebook we make sure that our test dataframes are displayed nicely with the default `itables` settings. + +```{code-cell} +from itables import init_notebook_mode, show +from itables.sample_dfs import get_dict_of_test_modin_dfs + +dict_of_test_dfs = get_dict_of_test_modin_dfs() +init_notebook_mode(all_interactive=True) +``` + +## empty + +```{code-cell} +show(dict_of_test_dfs["empty"]) +``` + +## No rows + +```{code-cell} +show(dict_of_test_dfs["no_rows"]) +``` + +## No rows one column + +```{code-cell} +show(dict_of_test_dfs["no_rows_one_column"]) +``` + +## No columns + +```{code-cell} +show(dict_of_test_dfs["no_columns"]) +``` + +## No columns one row + +```{code-cell} +show(dict_of_test_dfs["no_columns_one_row"]) +``` + +## bool + +```{code-cell} +show(dict_of_test_dfs["bool"]) +``` + +## Nullable boolean + +```{code-cell} +show(dict_of_test_dfs["nullable_boolean"]) +``` + +## int + +```{code-cell} +show(dict_of_test_dfs["int"]) +``` + +## Nullable integer + +```{code-cell} +show(dict_of_test_dfs["nullable_int"]) +``` + +## float + +```{code-cell} +show(dict_of_test_dfs["float"]) +``` + +## str + +```{code-cell} +show(dict_of_test_dfs["str"]) +``` + +## time + +```{code-cell} +show(dict_of_test_dfs["time"]) +``` + +## object + +```{code-cell} +show(dict_of_test_dfs["object"]) +``` + +## ordered_categories + +```{code-cell} +show(dict_of_test_dfs["ordered_categories"]) +``` + +## ordered_categories_in_multiindex + +```{code-cell} +show(dict_of_test_dfs["ordered_categories_in_multiindex"]) +``` + +## countries + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["countries"]) +``` + +## capital + +```{code-cell} +show(dict_of_test_dfs["capital"]) +``` + +## int_float_str + +```{code-cell} +show(dict_of_test_dfs["int_float_str"]) +``` + +## wide + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["wide"], maxBytes=100000, maxColumns=100, scrollX=True) +``` + +## long_column_names + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["long_column_names"], scrollX=True) +``` + +## named_column_index + +```{code-cell} +show(dict_of_test_dfs["named_column_index"]) +``` + +## big_integers + +```{code-cell} +show(dict_of_test_dfs["big_integers"]) +``` diff --git a/docs/polars_dataframes.md b/docs/polars_dataframes.md index 3f952e55..8ec62370 100644 --- a/docs/polars_dataframes.md +++ b/docs/polars_dataframes.md @@ -19,9 +19,9 @@ dataframes are displayed nicely with the default `itables` settings. ```{code-cell} from itables import init_notebook_mode, show -from itables.sample_dfs import get_dict_of_test_dfs +from itables.sample_dfs import get_dict_of_test_polars_dfs -dict_of_test_dfs = get_dict_of_test_dfs(polars=True) +dict_of_test_dfs = get_dict_of_test_polars_dfs() init_notebook_mode(all_interactive=True) ``` diff --git a/pyproject.toml b/pyproject.toml index fa2d7323..b0f83515 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,6 @@ classifiers = [ "Intended Audience :: Science/Research", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", @@ -26,8 +25,8 @@ classifiers = [ "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] -requires-python = ">= 3.7" -dependencies = ["IPython", "pandas", "numpy"] +requires-python = ">= 3.8" +dependencies = ["IPython", "pandas", "numpy", "narwhals>=1.18.3"] dynamic = ["version"] [project.optional-dependencies] diff --git a/src/itables/datatables_format.py b/src/itables/datatables_format.py index ee0a7f26..4d51a083 100644 --- a/src/itables/datatables_format.py +++ b/src/itables/datatables_format.py @@ -6,12 +6,6 @@ import pandas as pd import pandas.io.formats.format as fmt -try: - import polars as pl -except ImportError: - pl = None - - JS_MAX_SAFE_INTEGER = 2**53 - 1 JS_MIN_SAFE_INTEGER = -(2**53 - 1) @@ -91,8 +85,7 @@ def datatables_rows(df, count=None, warn_on_unexpected_types=False, pure_json=Fa assert missing_columns > 0 empty_columns = [[None] * len(df)] * missing_columns - try: - # Pandas DataFrame + if isinstance(df, pd.DataFrame): data = list( zip( *(empty_columns + [_format_column(x, pure_json) for _, x in df.items()]) @@ -108,17 +101,19 @@ def datatables_rows(df, count=None, warn_on_unexpected_types=False, pure_json=Fa cls=generate_encoder(warn_on_unexpected_types), allow_nan=not pure_json, ) - except AttributeError: - # Polars DataFrame + else: + # Polars, Modin, or other + import narwhals as nw + + df = nw.from_native(df) data = df.rows() - import polars as pl has_bigints = any( ( - x.dtype == pl.Int64 + x.dtype == nw.Int64 and ((x > JS_MAX_SAFE_INTEGER).any() or (x < JS_MIN_SAFE_INTEGER).any()) ) - or (x.dtype == pl.UInt64 and (x > JS_MAX_SAFE_INTEGER).any()) + or (x.dtype == nw.UInt64 and (x > JS_MAX_SAFE_INTEGER).any()) for x in (df[col] for col in df.columns) ) js = json.dumps(data, cls=generate_encoder(False), allow_nan=not pure_json) diff --git a/src/itables/downsample.py b/src/itables/downsample.py index 2fdec645..d26cc64a 100644 --- a/src/itables/downsample.py +++ b/src/itables/downsample.py @@ -2,15 +2,13 @@ import pandas as pd -from .datatables_format import _isetitem - def nbytes(df): - try: + if isinstance(df, pd.DataFrame): return sum(x.values.nbytes for _, x in df.items()) - except AttributeError: - # Polars DataFrame - return df.estimated_size() + + # Narwhals + return df.estimated_size() def as_nbytes(mem): @@ -97,31 +95,36 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio= second_half = max_rows // 2 first_half = max_rows - second_half if second_half: - try: + if isinstance(df, pd.DataFrame): df = pd.concat((df.iloc[:first_half], df.iloc[-second_half:])) - except AttributeError: - df = df.head(first_half).vstack(df.tail(second_half)) + else: + from narwhals import concat + + df = concat([df.head(first_half), df.tail(second_half)], how="vertical") else: - try: + if isinstance(df, pd.DataFrame): df = df.iloc[:first_half] - except AttributeError: + else: df = df.head(first_half) if len(df.columns) > max_columns > 0: second_half = max_columns // 2 first_half = max_columns - second_half if second_half: - try: + if isinstance(df, pd.DataFrame): df = pd.concat( (df.iloc[:, :first_half], df.iloc[:, -second_half:]), axis=1 ) - except AttributeError: - df = df[df.columns[:first_half]].hstack(df[df.columns[-second_half:]]) + else: + first_and_last_columns = ( + df.columns[:first_half] + df.columns[-second_half:] + ) + df = df.select(first_and_last_columns) else: - try: + if isinstance(df, pd.DataFrame): df = df.iloc[:, :first_half] - except AttributeError: - df = df[df.columns[:first_half]] + else: + df = df.select(df.columns[:first_half]) df_nbytes = nbytes(df) if df_nbytes > max_bytes > 0: @@ -144,13 +147,6 @@ def _downsample(df, max_rows=0, max_columns=0, max_bytes=0, target_aspect_ratio= ) # max_bytes is smaller than the average size of one cell - try: - df = df.iloc[:1, :1] - _isetitem(df, 0, ["..."]) - except AttributeError: - import polars as pl # noqa - - df = pl.DataFrame({df.columns[0]: ["..."]}) - return df + return pd.DataFrame({df.columns[0]: ["..."]}) return df diff --git a/src/itables/javascript.py b/src/itables/javascript.py index cf69b551..ec590622 100644 --- a/src/itables/javascript.py +++ b/src/itables/javascript.py @@ -18,12 +18,6 @@ except ImportError: pd_style = None -try: - import polars as pl -except ImportError: - # Define pl.Series as pd.Series - import pandas as pl - from IPython.display import HTML, display import itables.options as opt @@ -48,7 +42,7 @@ _ORIGINAL_DATAFRAME_STYLE_REPR_HTML = ( None if pd_style is None else pd_style.Styler._repr_html_ ) -_ORIGINAL_POLARS_DATAFRAME_REPR_HTML = pl.DataFrame._repr_html_ +_ORIGINAL_DATAFRAME_REPR_HTML_OTHER_LIBS = {} _CONNECTED = True DEFAULT_LAYOUT = { "topStart": "pageLength", @@ -94,17 +88,48 @@ def init_notebook_mode( pd.Series._repr_html_ = _datatables_repr_ if pd_style is not None: pd_style.Styler._repr_html_ = _datatables_repr_ - pl.DataFrame._repr_html_ = _datatables_repr_ - pl.Series._repr_html_ = _datatables_repr_ else: pd.DataFrame._repr_html_ = _ORIGINAL_DATAFRAME_REPR_HTML if pd_style is not None: pd_style.Styler._repr_html_ = _ORIGINAL_DATAFRAME_STYLE_REPR_HTML - pl.DataFrame._repr_html_ = _ORIGINAL_POLARS_DATAFRAME_REPR_HTML + if hasattr(pd.Series, "_repr_html_"): del pd.Series._repr_html_ - if hasattr(pl.Series, "_repr_html_"): - del pl.Series._repr_html_ + + try: + import narwhals as nw + except ImportError: + pass + else: + for name, lib in [ + ("polars", nw.dependencies.get_polars()), + ("modin", nw.dependencies.get_modin()), + ]: + if lib is None: + continue + + if all_interactive: + if name not in _ORIGINAL_DATAFRAME_REPR_HTML_OTHER_LIBS: + try: + _ORIGINAL_DATAFRAME_REPR_HTML_OTHER_LIBS[name] = ( + lib.DataFrame._repr_html_ + ) + except AttributeError: + pass + + lib.DataFrame._repr_html_ = _datatables_repr_ + lib.Series._repr_html_ = _datatables_repr_ + + else: + if name in _ORIGINAL_DATAFRAME_REPR_HTML_OTHER_LIBS: + lib.DataFrame._repr_html_ = ( + _ORIGINAL_DATAFRAME_REPR_HTML_OTHER_LIBS[name] + ) + elif hasattr(lib.DataFrame, "_repr_html_"): + del lib.DataFrame._repr_html_ + + if hasattr(lib.Series, "_repr_html_"): + del lib.Series._repr_html_ display(HTML(read_package_file("html/init_datatables.html"))) @@ -153,10 +178,9 @@ def _table_header( """This function returns the HTML table header. Rows are not included.""" # Generate table head using pandas.to_html(), see issue 63 pattern = re.compile(r".*(.*)", flags=re.MULTILINE | re.DOTALL) - try: + if isinstance(df, pd.DataFrame): html_header = df.head(0).to_html(escape=False) - except AttributeError: - # Polars DataFrames + else: html_header = pd.DataFrame(data=[], columns=df.columns, dtype=float).to_html() match = pattern.match(html_header) thead = match.groups()[0] @@ -340,16 +364,24 @@ def to_html_datatable( if isinstance(df, (np.ndarray, np.generic)): df = pd.DataFrame(df) - if isinstance(df, (pd.Series, pl.Series)): + # Convert Series (Pandas, Polars, Modin, etc...) to DataFrames + try: df = df.to_frame() + except AttributeError: + pass + + if not isinstance(df, pd.DataFrame): + # We use narwhals to get the data and downsample if necessary + import narwhals as nw + + df = nw.from_native(df) if showIndex == "auto": - try: + if isinstance(df, pd.DataFrame): showIndex = df.index.name is not None or not isinstance( df.index, pd.RangeIndex ) - except AttributeError: - # Polars DataFrame + else: showIndex = False maxBytes = kwargs.pop("maxBytes", 0) @@ -403,11 +435,8 @@ def to_html_datatable( classes = " ".join(classes) if not showIndex: - try: + if isinstance(df, pd.DataFrame): df = df.set_index(pd.RangeIndex(len(df.index))) - except AttributeError: - # Polars DataFrames - pass table_header = _table_header( df, @@ -504,16 +533,16 @@ def get_itables_extension_arguments(df, caption=None, selected_rows=None, **kwar if isinstance(df, (np.ndarray, np.generic)): df = pd.DataFrame(df) - if isinstance(df, (pd.Series, pl.Series)): + # Convert series to a (single column) dataframe + if hasattr(df, "to_frame"): df = df.to_frame() if showIndex == "auto": - try: + if isinstance(df, pd.DataFrame): showIndex = df.index.name is not None or not isinstance( df.index, pd.RangeIndex ) - except AttributeError: - # Polars DataFrame + else: showIndex = False maxBytes = kwargs.pop("maxBytes", 0) @@ -536,12 +565,8 @@ def get_itables_extension_arguments(df, caption=None, selected_rows=None, **kwar if isinstance(classes, list): classes = " ".join(classes) - if not showIndex: - try: - df = df.set_index(pd.RangeIndex(len(df.index))) - except AttributeError: - # Polars DataFrames - pass + if not showIndex and isinstance(df, pd.DataFrame): + df = df.set_index(pd.RangeIndex(len(df.index))) if showIndex: df = safe_reset_index(df) @@ -714,16 +739,15 @@ def to_html_datatable_using_to_html( if isinstance(df, (np.ndarray, np.generic)): df = pd.DataFrame(df) - if isinstance(df, (pd.Series, pl.Series)): + if hasattr(df, "to_frame"): df = df.to_frame() if showIndex == "auto": - try: + if isinstance(df, pd.DataFrame): showIndex = df.index.name is not None or not isinstance( df.index, pd.RangeIndex ) - except AttributeError: - # Polars DataFrame + else: showIndex = False _adjust_layout( diff --git a/src/itables/sample_dfs.py b/src/itables/sample_dfs.py index 3b814d07..84a5ee87 100644 --- a/src/itables/sample_dfs.py +++ b/src/itables/sample_dfs.py @@ -100,7 +100,7 @@ def get_df_complex_index(): return df -def get_dict_of_test_dfs(N=100, M=100, polars=False): +def get_dict_of_test_dfs(N=100, M=100): NM_values = np.reshape(np.linspace(start=0.0, stop=1.0, num=N * M), (N, M)) test_dfs = { @@ -261,29 +261,43 @@ def get_dict_of_test_dfs(N=100, M=100, polars=False): } ), } + return test_dfs - if polars: - import polars as pl - import pyarrow as pa - - polars_dfs = {} - for key, df in test_dfs.items(): - if key == "multiindex": - # Since Polars 1.2, pl.from_pandas fails with this error: - # ValueError: Pandas dataframe contains non-unique indices and/or column names. - # Polars dataframes require unique string names for columns. - # See https://github.com/pola-rs/polars/issues/18130 - df.index = df.index.tolist() - try: - polars_dfs[key] = pl.from_pandas(df) - except (pa.ArrowInvalid, ValueError): - pass - return polars_dfs - return test_dfs +def get_dict_of_test_polars_dfs(**kwargs): + + import polars as pl + import pyarrow as pa + + polars_dfs = {} + for key, df in get_dict_of_test_dfs(**kwargs).items(): + if key == "multiindex": + # Since Polars 1.2, pl.from_pandas fails with this error: + # ValueError: Pandas dataframe contains non-unique indices and/or column names. + # Polars dataframes require unique string names for columns. + # See https://github.com/pola-rs/polars/issues/18130 + df.index = df.index.tolist() + try: + polars_dfs[key] = pl.from_pandas(df) + except (pa.ArrowInvalid, ValueError): + pass + return polars_dfs + + +def get_dict_of_test_modin_dfs(**kwargs): + + import modin.pandas as mpd + + modin_dfs = {} + for key, df in get_dict_of_test_dfs(**kwargs).items(): + # Modin does not implement MultiIndex as of Dec 2024 + if key in {"multiindex", "duplicated_columns", "complex_index"}: + continue + modin_dfs[key] = mpd.DataFrame(df) + return modin_dfs -def get_dict_of_test_series(polars=False): +def get_dict_of_test_series(): series = {} for df_name, df in get_dict_of_test_dfs().items(): if len(df.columns) > 6: @@ -292,28 +306,43 @@ def get_dict_of_test_series(polars=False): # Case of duplicate columns if not isinstance(df[col], pd.Series): continue - series["{}.{}".format(df_name, col)] = df[col] + if isinstance(col, tuple): + col_name = "/".join(str(x) for x in col) + else: + col_name = str(col) + series[f"{df_name}.{col_name}"] = df[col] + + return series - if polars: - import polars as pl - import pyarrow as pa - polars_series = {} - for key in series: - try: - polars_series[key] = pl.from_pandas(series[key]) - except (pa.ArrowInvalid, ValueError): - pass +def get_dict_of_test_polars_series(): + import polars as pl + import pyarrow as pa - # Add a Polar table with unsigned integers - # https://github.com/mwouts/itables/issues/192 - # https://github.com/mwouts/itables/issues/299 - polars_series["u32"] = pl.Series([1, 2, 5]).cast(pl.UInt32) - polars_series["u64"] = pl.Series([1, 2, 2**40]).cast(pl.UInt64) + series = get_dict_of_test_series() + polars_series = {} + for key in series: + try: + polars_series[key] = pl.from_pandas(series[key]) + except (pa.ArrowInvalid, ValueError): + pass - return polars_series + # Add a Polar table with unsigned integers + # https://github.com/mwouts/itables/issues/192 + # https://github.com/mwouts/itables/issues/299 + polars_series["u32"] = pl.Series([1, 2, 5]).cast(pl.UInt32) + polars_series["u64"] = pl.Series([1, 2, 2**40]).cast(pl.UInt64) - return series + return polars_series + + +def get_dict_of_test_modin_series(): + import modin.pandas as mpd + + return { + name: mpd.Series(value.rename(name)) + for name, value in get_dict_of_test_series().items() + } @lru_cache() diff --git a/src/itables/version.py b/src/itables/version.py index 3468a8f4..947b8ea5 100644 --- a/src/itables/version.py +++ b/src/itables/version.py @@ -1,3 +1,3 @@ """ITables' version number""" -__version__ = "2.2.4" +__version__ = "2.3.0-dev" diff --git a/tests/test_documentation_notebooks_run.py b/tests/test_documentation_notebooks_pass.py similarity index 88% rename from tests/test_documentation_notebooks_run.py rename to tests/test_documentation_notebooks_pass.py index 3905b65c..f00e6db4 100644 --- a/tests/test_documentation_notebooks_run.py +++ b/tests/test_documentation_notebooks_pass.py @@ -8,11 +8,6 @@ from itables import init_notebook_mode from itables.javascript import pd_style -try: - import polars as pl -except ImportError: - pl = None - pytestmark = pytest.mark.skipif(sys.version_info < (3, 8), reason="Require Python>=3.8") @@ -27,8 +22,10 @@ def list_doc_notebooks(): "notebook", list_doc_notebooks(), ids=lambda notebook: notebook.stem ) def test_run_documentation_notebooks(notebook): - if "polars" in notebook.stem and pl is None: - pytest.skip("Polars is not available") + if "polars" in notebook.stem: + pytest.importorskip("polars") + if "modin" in notebook.stem: + pytest.importorskip("modin") if "pandas_style" in notebook.stem and pd_style is None: pytest.skip("Pandas Style is not available") if "shiny" in notebook.stem: diff --git a/tests/test_downsample.py b/tests/test_downsample.py index 5fa2ad8f..b5d5db54 100644 --- a/tests/test_downsample.py +++ b/tests/test_downsample.py @@ -11,6 +11,7 @@ ) try: + import narwhals as nw import polars as pl except ImportError: pl = None @@ -31,7 +32,7 @@ def large_tables(N=1000, M=1000): pd.DataFrame("abcdefg", columns=range(M), index=range(N)), ] if pl is not None: - dfs.extend([pl.from_pandas(df) for df in dfs]) + dfs.extend([nw.from_native(pl.from_pandas(df)) for df in dfs]) return dfs diff --git a/tests/test_modin.py b/tests/test_modin.py new file mode 100644 index 00000000..faac93f8 --- /dev/null +++ b/tests/test_modin.py @@ -0,0 +1,26 @@ +import pytest + +from itables import to_html_datatable +from itables.sample_dfs import get_dict_of_test_modin_dfs, get_dict_of_test_modin_series + +pytest.importorskip("modin") + + +@pytest.fixture(params=get_dict_of_test_modin_dfs().items(), ids=lambda param: param[0]) +def df(request): + return request.param[1] + + +@pytest.fixture( + params=get_dict_of_test_modin_series().items(), ids=lambda param: param[0] +) +def x(request): + return request.param[1] + + +def test_show_modin_series(x, use_to_html): + to_html_datatable(x, use_to_html) + + +def test_show_modin_df(df, use_to_html): + to_html_datatable(df, use_to_html) diff --git a/tests/test_polars.py b/tests/test_polars.py index 23523943..05b9931a 100644 --- a/tests/test_polars.py +++ b/tests/test_polars.py @@ -2,31 +2,39 @@ from itables import to_html_datatable from itables.javascript import datatables_rows -from itables.sample_dfs import get_dict_of_test_dfs, get_dict_of_test_series +from itables.sample_dfs import ( + get_dict_of_test_polars_dfs, + get_dict_of_test_polars_series, +) -try: - import polars # noqa -except ImportError as e: - pytest.skip(str(e), allow_module_level=True) +pl = pytest.importorskip("polars") -@pytest.mark.parametrize( - "name,x", [(name, x) for name, x in get_dict_of_test_series(polars=True).items()] +@pytest.fixture( + params=get_dict_of_test_polars_dfs().items(), ids=lambda param: param[0] ) -def test_show_polars_series(name, x, use_to_html): - to_html_datatable(x, use_to_html) +def df(request): + return request.param[1] -@pytest.mark.parametrize( - "name,df", [(name, df) for name, df in get_dict_of_test_dfs(polars=True).items()] +@pytest.fixture( + params=get_dict_of_test_polars_series().items(), ids=lambda param: param[0] ) -def test_show_polars_df(name, df, use_to_html): +def x(request): + return request.param[1] + + +def test_show_polars_series(x, use_to_html): + to_html_datatable(x, use_to_html) + + +def test_show_polars_df(df, use_to_html): to_html_datatable(df, use_to_html) def test_encode_mixed_contents(): # Make sure that the bigint escape works for mixed content # 291 - df = polars.DataFrame( + df = pl.DataFrame( { "bigint": [1666767918216000000], "int": [1699300000000], From 3a8c97ca08641aa1640adab106ebe747ea383459 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 21:18:45 +0000 Subject: [PATCH 2/9] Name the various test configs --- .github/workflows/continuous-integration.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index e3de5960..02727719 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -37,6 +37,14 @@ jobs: uses: github/codeql-action/analyze@v3 pytest: + name: > + Python ${{ matrix.python-version }} + ${{ format('(pandas {0})', matrix.pandas-version) }} + ${{ format('(numpy {0})', matrix.numpy-version) }} + ${{ matrix.polars && '(polars)' }} + ${{ matrix.modin && '(modin)' }} + ${{ matrix.uninstall_narwhals && '(Uninstall narwhals)' }} + ${{ matrix.uninstall_jinja2 && '(Uninstall jinja2)' }} strategy: fail-fast: false matrix: From 29c57000e50876ec1aba13f896874b6b03ca15ca Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 21:20:40 +0000 Subject: [PATCH 3/9] modin fails to install on Python 3.13 --- .github/workflows/continuous-integration.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 02727719..a1120374 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -58,7 +58,9 @@ jobs: - python-version: "3.13" pandas-version: pre polars: true - - python-version: "3.13" + - python-version: "3.12" + modin: false + - python-version: "3.12" modin: true - python-version: "3.13" uninstall_narwhals: true From 8767ac3d80f8241e1320e96b3b45ea078d9f655e Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 21:24:57 +0000 Subject: [PATCH 4/9] Remove the optional dependency on polars as we use narwhals instead --- pyproject.toml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b0f83515..78b0f9ed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,13 +30,15 @@ dependencies = ["IPython", "pandas", "numpy", "narwhals>=1.18.3"] dynamic = ["version"] [project.optional-dependencies] -polars = ["polars", "pyarrow"] style = ["matplotlib"] samples = ["pytz", "world_bank_data"] widget = ["anywidget", "traitlets"] -all = ["itables[polars,style,samples,widget]"] +all = ["itables[style,samples,widget]"] test = [ "itables[all]", + # Polars + "polars", + "pyarrow", # Pytest "pytest", "pytest-cov", From 5961993541a231ee298e9fd24d6c47bce6228c61 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 21:26:40 +0000 Subject: [PATCH 5/9] Shorter job names (latest is the default) --- .github/workflows/continuous-integration.yml | 24 +++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index a1120374..a9282290 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -38,25 +38,27 @@ jobs: pytest: name: > - Python ${{ matrix.python-version }} - ${{ format('(pandas {0})', matrix.pandas-version) }} - ${{ format('(numpy {0})', matrix.numpy-version) }} + ${{ matrix.python-version }} + ${{ matrix.pandas-version && format('(pandas {0})', matrix.pandas-version)}} + ${{ matrix.numpy-version && format('(numpy {0})', matrix.numpy-version)}} ${{ matrix.polars && '(polars)' }} ${{ matrix.modin && '(modin)' }} - ${{ matrix.uninstall_narwhals && '(Uninstall narwhals)' }} - ${{ matrix.uninstall_jinja2 && '(Uninstall jinja2)' }} + ${{ matrix.uninstall_narwhals && '(without narwhals)' }} + ${{ matrix.uninstall_jinja2 && '(without jinja2)' }} strategy: fail-fast: false matrix: python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] - pandas-version: [latest] - numpy-version: [latest] + pandas-version: [''] + numpy-version: [''] include: + - python-version: 3.9 - python-version: 3.9 pandas-version: '<2.0' numpy-version: '<2.0' - python-version: "3.13" pandas-version: pre + - python-version: "3.13" polars: true - python-version: "3.12" modin: false @@ -82,17 +84,17 @@ jobs: - name: Install a development version of 'itables' run: pip install -e .[test] - - name: Install pandas latest - if: matrix.pandas-version == 'latest' + - name: Install latest pandas + if: ${{!matrix.pandas-version}} run: pip install pandas - name: Install pandas pre-release if: matrix.pandas-version == 'pre' run: pip install pandas --pre - name: Install pandas ${{ matrix.pandas-version }} - if: matrix.pandas-version != 'pre' && matrix.pandas-version != 'latest' + if: matrix.pandas-version && matrix.pandas-version != 'pre' run: pip install 'pandas${{ matrix.pandas-version }}' - name: Install numpy ${{ matrix.numpy-version }} - if: matrix.numpy-version != 'latest' + if: matrix.numpy-version run: pip install 'numpy${{ matrix.numpy-version }}' - name: Install polars From 6250fffcf8038b893e5fa441bc20c0fd3f58451c Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 22:03:21 +0000 Subject: [PATCH 6/9] Header should be h1 --- docs/select.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/select.md b/docs/select.md index d3f1e87e..750d0cdc 100644 --- a/docs/select.md +++ b/docs/select.md @@ -12,7 +12,7 @@ kernelspec: name: itables --- -## Row selection +# Row selection The [select](https://datatables.net/extensions/select) extension let you select rows (or cells). When you do so, only the selected rows are exported From 056aa66b762bff72254ca754f469233127d9d443 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Sun, 15 Dec 2024 22:12:38 +0000 Subject: [PATCH 7/9] Keep narwhals when testing polars! --- .github/workflows/continuous-integration.yml | 32 +++++++++----------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index a9282290..99f7630b 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -41,16 +41,14 @@ jobs: ${{ matrix.python-version }} ${{ matrix.pandas-version && format('(pandas {0})', matrix.pandas-version)}} ${{ matrix.numpy-version && format('(numpy {0})', matrix.numpy-version)}} - ${{ matrix.polars && '(polars)' }} - ${{ matrix.modin && '(modin)' }} - ${{ matrix.uninstall_narwhals && '(without narwhals)' }} - ${{ matrix.uninstall_jinja2 && '(without jinja2)' }} + ${{ matrix.modifiers}} strategy: fail-fast: false matrix: python-version: [3.8, 3.9, "3.10", "3.11", "3.12", "3.13"] pandas-version: [''] numpy-version: [''] + modifiers: [''] include: - python-version: 3.9 - python-version: 3.9 @@ -58,16 +56,14 @@ jobs: numpy-version: '<2.0' - python-version: "3.13" pandas-version: pre - - python-version: "3.13" - polars: true - - python-version: "3.12" - modin: false - python-version: "3.12" - modin: true + modifiers: "modin" + - python-version: "3.13" + modifiers: "polars" - python-version: "3.13" - uninstall_narwhals: true + modifiers: "uninstall_narwhals" - python-version: "3.13" - uninstall_jinja2: true + modifiers: "uninstall_jinja2" runs-on: ubuntu-20.04 steps: - name: Checkout @@ -98,24 +94,24 @@ jobs: run: pip install 'numpy${{ matrix.numpy-version }}' - name: Install polars - if: matrix.polars + if: matrix.modifiers == 'polars' run: pip install polars - name: Install modin - if: matrix.modin + if: matrix.modifiers == 'modin' run: pip install modin[all] - name: Uninstall narwhals - if: matrix.uninstall_narwhals + if: matrix.modifiers == 'uninstall_narwhals' run: pip uninstall narwhals -y - - name: Install shiny - run: pip install "shiny>=1.0" - - name: Uninstall jinja2 - if: matrix.uninstall_jinja2 + if: matrix.modifiers == 'uninstall_jinja2' run: pip uninstall jinja2 -y + - name: Install shiny + run: pip install "shiny>=1.0" + - name: Install a Jupyter Kernel run: python -m ipykernel install --name itables --user From 0e1f29391e5f574ac6a00106e92a3777e23eb0e4 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 26 Dec 2024 16:24:19 +0100 Subject: [PATCH 8/9] Update environment.yml --- environment.yml | 52 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 34 insertions(+), 18 deletions(-) diff --git a/environment.yml b/environment.yml index b747e3e4..99d03aa5 100644 --- a/environment.yml +++ b/environment.yml @@ -2,29 +2,45 @@ name: itables channels: - conda-forge dependencies: - - nodejs - - python - - jupyter - - jupyterlab>=4 - - jupyter-book + # Jupyter + - jupyterlab + - jupyterlab_execute_time + - jupyter-resource-usage + + # Documentation and tests + - ipykernel - jupytext + - jupyter-book - nbconvert - - ipykernel + + # Packages + - nodejs + + # Pre-commit hooks + - pre-commit + + # Tests + - pytest + - pytest-xdist + - pytest-cov + + # Pandas and Pandas Style - pandas - matplotlib + + # Polars - polars - pyarrow - - pytest - - pytest-xdist - - pytest-cov - - pre-commit - - pip - - setuptools - - twine - - ghp-import + + # Modin + - modin-ray + + # Widget + - anywidget + + # Streamlit + - streamlit + + # Shiny - shiny - shinywidgets - - streamlit - - anywidget - - pip: - - world_bank_data From f3d81a2e04906d654ce3a6407d2725e552906d26 Mon Sep 17 00:00:00 2001 From: Marc Wouts Date: Thu, 26 Dec 2024 17:06:47 +0100 Subject: [PATCH 9/9] Test Ibis dataframes --- .github/workflows/continuous-integration.yml | 10 +- docs/ibis_dataframes.md | 183 +++++++++++++++++++ environment.yml | 3 + src/itables/javascript.py | 1 + src/itables/sample_dfs.py | 24 +++ tests/test_documentation_notebooks_pass.py | 2 + tests/test_ibis.py | 43 +++++ 7 files changed, 264 insertions(+), 2 deletions(-) create mode 100644 docs/ibis_dataframes.md create mode 100644 tests/test_ibis.py diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml index 99f7630b..4634a5ab 100644 --- a/.github/workflows/continuous-integration.yml +++ b/.github/workflows/continuous-integration.yml @@ -56,10 +56,12 @@ jobs: numpy-version: '<2.0' - python-version: "3.13" pandas-version: pre - - python-version: "3.12" - modifiers: "modin" - python-version: "3.13" modifiers: "polars" + - python-version: "3.13" + modifiers: "ibis" + - python-version: "3.12" + modifiers: "modin" - python-version: "3.13" modifiers: "uninstall_narwhals" - python-version: "3.13" @@ -97,6 +99,10 @@ jobs: if: matrix.modifiers == 'polars' run: pip install polars + - name: Install ibis + if: matrix.modifiers == 'ibis' + run: pip install 'ibis-framework[duckdb]' + - name: Install modin if: matrix.modifiers == 'modin' run: pip install modin[all] diff --git a/docs/ibis_dataframes.md b/docs/ibis_dataframes.md new file mode 100644 index 00000000..72bd6ecd --- /dev/null +++ b/docs/ibis_dataframes.md @@ -0,0 +1,183 @@ +--- +jupytext: + formats: md:myst + notebook_metadata_filter: -jupytext.text_representation.jupytext_version + text_representation: + extension: .md + format_name: myst + format_version: 0.13 +kernelspec: + display_name: itables + language: python + name: itables +--- + +# Sample dataframes + +In this notebook we make sure that our test [Ibis](https://ibis-project.org/) dataframes are displayed nicely with the default `itables` settings. + +```{code-cell} +from itables import init_notebook_mode, show +from itables.sample_dfs import get_dict_of_test_ibis_dfs + +dict_of_test_dfs = get_dict_of_test_ibis_dfs() +init_notebook_mode(all_interactive=True) +``` + +## empty + +```{code-cell} +show(dict_of_test_dfs["empty"]) +``` + +## No rows + +```{code-cell} +show(dict_of_test_dfs["no_rows"]) +``` + +## No rows one column + +```{code-cell} +show(dict_of_test_dfs["no_rows_one_column"]) +``` + +## No columns + +```{code-cell} +show(dict_of_test_dfs["no_columns"]) +``` + +## No columns one row + +```{code-cell} +show(dict_of_test_dfs["no_columns_one_row"]) +``` + +## bool + +```{code-cell} +show(dict_of_test_dfs["bool"]) +``` + +## Nullable boolean + +```{code-cell} +show(dict_of_test_dfs["nullable_boolean"]) +``` + +## int + +```{code-cell} +show(dict_of_test_dfs["int"]) +``` + +## Nullable integer + +```{code-cell} +show(dict_of_test_dfs["nullable_int"]) +``` + +## float + +```{code-cell} +show(dict_of_test_dfs["float"]) +``` + +## str + +```{code-cell} +show(dict_of_test_dfs["str"]) +``` + +## time + +```{code-cell} +show(dict_of_test_dfs["time"]) +``` + +## object + +```{code-cell} +show(dict_of_test_dfs["object"]) +``` + +## ordered_categories + +```{code-cell} +show(dict_of_test_dfs["ordered_categories"]) +``` + +## ordered_categories_in_multiindex + +```{code-cell} +show(dict_of_test_dfs["ordered_categories_in_multiindex"]) +``` + +## multiindex + +```{code-cell} +show(dict_of_test_dfs["multiindex"]) +``` + +## countries + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["countries"]) +``` + +## capital + +```{code-cell} +show(dict_of_test_dfs["capital"]) +``` + +## complex_index + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["complex_index"]) +``` + +## int_float_str + +```{code-cell} +show(dict_of_test_dfs["int_float_str"]) +``` + +## wide + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["wide"], maxBytes=100000, maxColumns=100, scrollX=True) +``` + +## long_column_names + +```{code-cell} +:tags: [full-width] + +show(dict_of_test_dfs["long_column_names"], scrollX=True) +``` + +## duplicated_columns + +```{code-cell} +show(dict_of_test_dfs["duplicated_columns"]) +``` + +## named_column_index + +```{code-cell} +show(dict_of_test_dfs["named_column_index"]) +``` + +## big_integers + +```{code-cell} +show(dict_of_test_dfs["big_integers"]) +``` diff --git a/environment.yml b/environment.yml index 99d03aa5..0230c183 100644 --- a/environment.yml +++ b/environment.yml @@ -32,6 +32,9 @@ dependencies: - polars - pyarrow + # Ibis + - ibis-duckdb + # Modin - modin-ray diff --git a/src/itables/javascript.py b/src/itables/javascript.py index ec590622..d7b31d1f 100644 --- a/src/itables/javascript.py +++ b/src/itables/javascript.py @@ -104,6 +104,7 @@ def init_notebook_mode( for name, lib in [ ("polars", nw.dependencies.get_polars()), ("modin", nw.dependencies.get_modin()), + ("ibis", nw.dependencies.get_ibis()), ]: if lib is None: continue diff --git a/src/itables/sample_dfs.py b/src/itables/sample_dfs.py index 84a5ee87..b53e8e75 100644 --- a/src/itables/sample_dfs.py +++ b/src/itables/sample_dfs.py @@ -284,6 +284,26 @@ def get_dict_of_test_polars_dfs(**kwargs): return polars_dfs +def get_dict_of_test_ibis_dfs(**kwargs): + + import ibis + + ibis_dfs = {} + + t = ibis.table(dict(one="string", two="float", three="int32"), name="my_data") + ibis_dfs["table"] = t + + ibis_dfs["table_select"] = t.select("two", "one") + + for key, df in get_dict_of_test_dfs(**kwargs).items(): + try: + ibis_dfs[key] = ibis.memtable(df) + except (TypeError, ibis.common.exceptions.IbisInputError): + pass + + return ibis_dfs + + def get_dict_of_test_modin_dfs(**kwargs): import modin.pandas as mpd @@ -336,6 +356,10 @@ def get_dict_of_test_polars_series(): return polars_series +def get_dict_of_test_ibis_series(): + return {name: value for name, value in get_dict_of_test_ibis_dfs().items()} + + def get_dict_of_test_modin_series(): import modin.pandas as mpd diff --git a/tests/test_documentation_notebooks_pass.py b/tests/test_documentation_notebooks_pass.py index f00e6db4..130ff8b3 100644 --- a/tests/test_documentation_notebooks_pass.py +++ b/tests/test_documentation_notebooks_pass.py @@ -24,6 +24,8 @@ def list_doc_notebooks(): def test_run_documentation_notebooks(notebook): if "polars" in notebook.stem: pytest.importorskip("polars") + if "ibis" in notebook.stem: + pytest.importorskip("ibis") if "modin" in notebook.stem: pytest.importorskip("modin") if "pandas_style" in notebook.stem and pd_style is None: diff --git a/tests/test_ibis.py b/tests/test_ibis.py new file mode 100644 index 00000000..23298be9 --- /dev/null +++ b/tests/test_ibis.py @@ -0,0 +1,43 @@ +import pytest + +from itables import to_html_datatable +from itables.javascript import datatables_rows +from itables.sample_dfs import get_dict_of_test_ibis_dfs, get_dict_of_test_ibis_series + +ibis = pytest.importorskip("ibis") + + +@pytest.fixture(params=get_dict_of_test_ibis_dfs().items(), ids=lambda param: param[0]) +def df(request): + return request.param[1] + + +@pytest.fixture( + params=get_dict_of_test_ibis_series().items(), ids=lambda param: param[0] +) +def x(request): + return request.param[1] + + +def test_show_ibis_series(x, use_to_html): + to_html_datatable(x, use_to_html) + + +def test_show_ibis_df(df, use_to_html): + to_html_datatable(df, use_to_html) + + +def test_encode_mixed_contents(): + # Make sure that the bigint escape works for mixed content # 291 + df = ibis.DataFrame( + { + "bigint": [1666767918216000000], + "int": [1699300000000], + "float": [0.9510565400123596], + "neg": [-0.30901700258255005], + } + ) + assert ( + datatables_rows(df) + == '[[BigInt("1666767918216000000"), 1699300000000, 0.9510565400123596, -0.30901700258255005]]' + )