Skip to content

Commit

Permalink
feat(python): add style namespace (which defers to Great Tables) (#16809
Browse files Browse the repository at this point in the history
)

Co-authored-by: Marco Edward Gorelli <marcogorelli@protonmail.com>
  • Loading branch information
machow and MarcoGorelli authored Jun 13, 2024
1 parent e29d28e commit 8965a68
Show file tree
Hide file tree
Showing 11 changed files with 295 additions and 1 deletion.
151 changes: 151 additions & 0 deletions docs/src/python/user-guide/misc/styling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# --8<-- [start:dataframe]
import polars as pl
import polars.selectors as cs

path = "docs/data/iris.csv"

df = (
pl.scan_csv(path)
.group_by("species")
.agg(cs.starts_with("petal").mean().round(3))
.collect()
)
print(df)
# --8<-- [end:dataframe]

# --8<-- [start:structure-header]
df.style.tab_header(title="Iris Data", subtitle="Mean measurement values per species")
# --8<-- [end:structure-header]

# --8<-- [start:structure-header-out]
print(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
).as_raw_html()
)
# --8<-- [end:structure-header-out]


# --8<-- [start:structure-stub]
df.style.tab_stub(rowname_col="species")
# --8<-- [end:structure-stub]

# --8<-- [start:structure-stub-out]
print(df.style.tab_stub(rowname_col="species").as_raw_html())
# --8<-- [end:structure-stub-out]

# --8<-- [start:structure-spanner]
(
df.style.tab_spanner("Petal", cs.starts_with("petal")).cols_label(
petal_length="Length", petal_width="Width"
)
)
# --8<-- [end:structure-spanner]

# --8<-- [start:structure-spanner-out]
print(
df.style.tab_spanner("Petal", cs.starts_with("petal"))
.cols_label(petal_length="Length", petal_width="Width")
.as_raw_html()
)
# --8<-- [end:structure-spanner-out]

# --8<-- [start:format-number]
df.style.fmt_number("petal_width", decimals=1)
# --8<-- [end:format-number]


# --8<-- [start:format-number-out]
print(df.style.fmt_number("petal_width", decimals=1).as_raw_html())
# --8<-- [end:format-number-out]


# --8<-- [start:style-simple]
from great_tables import loc, style

df.style.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
# --8<-- [end:style-simple]

# --8<-- [start:style-simple-out]
from great_tables import loc, style

print(
df.style.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
).as_raw_html()
)
# --8<-- [end:style-simple-out]


# --8<-- [start:style-bold-column]
from great_tables import loc, style

df.style.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
)
# --8<-- [end:style-bold-column]

# --8<-- [start:style-bold-column-out]
from great_tables import loc, style

print(
df.style.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
).as_raw_html()
)
# --8<-- [end:style-bold-column-out]

# --8<-- [start:full-example]
from great_tables import loc, style

(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
)
.tab_stub(rowname_col="species")
.cols_label(petal_length="Length", petal_width="Width")
.tab_spanner("Petal", cs.starts_with("petal"))
.fmt_number("petal_width", decimals=2)
.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
)
# --8<-- [end:full-example]

# --8<-- [start:full-example-out]
from great_tables import loc, style

print(
df.style.tab_header(
title="Iris Data", subtitle="Mean measurement values per species"
)
.tab_stub(rowname_col="species")
.cols_label(petal_length="Length", petal_width="Width")
.tab_spanner("Petal", cs.starts_with("petal"))
.fmt_number("petal_width", decimals=2)
.tab_style(
style.fill("yellow"),
loc.body(
rows=pl.col("petal_length") == pl.col("petal_length").max(),
),
)
.tab_style(
style.text(weight="bold"),
loc.body(columns="species"),
)
.as_raw_html()
)
# --8<-- [end:full-example-out]
1 change: 1 addition & 0 deletions docs/user-guide/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ pip install 'polars[numpy,fsspec]'
| xlsx2csv | Support for reading from Excel files |
| deltalake | Support for reading from Delta Lake Tables |
| plot | Support for plotting Dataframes |
| style | Support for styling Dataframes |
| timezone | Timezone support, only needed if 1. you are on Python < 3.9 and/or 2. you are on Windows, otherwise no dependencies will be installed |

### Rust
Expand Down
65 changes: 65 additions & 0 deletions docs/user-guide/misc/styling.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Styling

Data in a Polars `DataFrame` can be styled for presentation use the `DataFrame.style` property. This returns a `GT` object from [Great Tables](https://posit-dev.github.io/great-tables/articles/intro.html), which enables structuring, formatting, and styling for table display.

{{code_block('user-guide/misc/styling','dataframe',[])}}

```python exec="on" result="text" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:dataframe"
```

## Structure: add header title

{{code_block('user-guide/misc/styling','structure-header',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-header-out"
```

## Structure: add row stub

{{code_block('user-guide/misc/styling','structure-stub',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-stub-out"
```

## Structure: add column spanner

{{code_block('user-guide/misc/styling','structure-spanner',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:structure-spanner-out"
```

## Format: limit decimal places

{{code_block('user-guide/misc/styling','format-number',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:format-number-out"
```

## Style: highlight max row

{{code_block('user-guide/misc/styling','style-simple',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:style-simple-out"
```

## Style: bold species column

{{code_block('user-guide/misc/styling','style-bold-column',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:style-bold-column-out"
```

## Full example

{{code_block('user-guide/misc/styling','full-example',[])}}

```python exec="on" session="user-guide/misc/styling"
--8<-- "python/user-guide/misc/styling.py:full-example-out"
```
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ nav:
- Misc:
- user-guide/misc/multiprocessing.md
- user-guide/misc/visualization.md
- user-guide/misc/styling.md
- user-guide/misc/comparison.md

- API reference: api/index.md
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ This page gives an overview of all public DataFrame methods.
modify_select
miscellaneous
plot
style

.. currentmodule:: polars

Expand Down
7 changes: 7 additions & 0 deletions py-polars/docs/source/reference/dataframe/style.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
=====
Style
=====

.. currentmodule:: polars

.. autoproperty:: DataFrame.style
59 changes: 59 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,14 @@
UInt64,
)
from polars.dependencies import (
_GREAT_TABLES_AVAILABLE,
_HVPLOT_AVAILABLE,
_PANDAS_AVAILABLE,
_PYARROW_AVAILABLE,
_check_for_numpy,
_check_for_pandas,
_check_for_pyarrow,
great_tables,
hvplot,
import_optional,
)
Expand Down Expand Up @@ -113,6 +115,7 @@
import jax
import numpy.typing as npt
import torch
from great_tables import GT
from hvplot.plotting.core import hvPlotTabularPolars
from xlsxwriter import Workbook

Expand Down Expand Up @@ -608,6 +611,62 @@ def plot(self) -> hvPlotTabularPolars:
hvplot.post_patch()
return hvplot.plotting.core.hvPlotTabularPolars(self)

@property
@unstable()
def style(self) -> GT:
"""
Create a Great Table for styling.
.. warning::
This functionality is currently considered **unstable**. It may be
changed at any point without it being considered a breaking change.
Polars does not implement styling logic itself, but instead defers to
the Great Tables package. Please see the `Great Tables reference <https://posit-dev.github.io/great-tables/reference/>`_
for more information and documentation.
Examples
--------
Import some styling helpers, and create example data:
>>> import polars.selectors as cs
>>> from great_tables import loc, style
>>> df = pl.DataFrame(
... {
... "site_id": [0, 1, 2],
... "measure_a": [5, 4, 6],
... "measure_b": [7, 3, 3],
... }
... )
Emphasize the site_id as row names:
>>> df.style.tab_stub(rowname_col="site_id") # doctest: +SKIP
Fill the background for the highest measure_a value row:
>>> df.style.tab_style(
... style.fill("yellow"),
... loc.body(rows=pl.col("measure_a") == pl.col("measure_a").max()),
... ) # doctest: +SKIP
Put a spanner (high-level label) over measure columns:
>>> df.style.tab_spanner(
... "Measures", cs.starts_with("measure")
... ) # doctest: +SKIP
Format measure_b values to two decimal places:
>>> df.style.fmt_number("measure_b", decimals=2) # doctest: +SKIP
"""
if not _GREAT_TABLES_AVAILABLE:
msg = "great_tables is required for `.style`"
raise ModuleNotFoundError(msg)

return great_tables.GT(self)

@property
def shape(self) -> tuple[int, int]:
"""
Expand Down
4 changes: 4 additions & 0 deletions py-polars/polars/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
_DELTALAKE_AVAILABLE = True
_FSSPEC_AVAILABLE = True
_GEVENT_AVAILABLE = True
_GREAT_TABLES_AVAILABLE = True
_HVPLOT_AVAILABLE = True
_HYPOTHESIS_AVAILABLE = True
_NUMPY_AVAILABLE = True
Expand Down Expand Up @@ -152,6 +153,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
import deltalake
import fsspec
import gevent
import great_tables
import hvplot
import hypothesis
import numpy
Expand All @@ -175,6 +177,7 @@ def _lazy_import(module_name: str) -> tuple[ModuleType, bool]:
# heavy/optional third party libs
deltalake, _DELTALAKE_AVAILABLE = _lazy_import("deltalake")
fsspec, _FSSPEC_AVAILABLE = _lazy_import("fsspec")
great_tables, _GREAT_TABLES_AVAILABLE = _lazy_import("great_tables")
hvplot, _HVPLOT_AVAILABLE = _lazy_import("hvplot")
hypothesis, _HYPOTHESIS_AVAILABLE = _lazy_import("hypothesis")
numpy, _NUMPY_AVAILABLE = _lazy_import("numpy")
Expand Down Expand Up @@ -301,6 +304,7 @@ def import_optional(
"deltalake",
"fsspec",
"gevent",
"great_tables",
"hvplot",
"numpy",
"pandas",
Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/meta/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def _get_dependency_info() -> dict[str, str]:
"fastexcel",
"fsspec",
"gevent",
"great_tables",
"hvplot",
"matplotlib",
"nest_asyncio",
Expand Down
4 changes: 3 additions & 1 deletion py-polars/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@ plot = ["hvplot >= 0.9.1"]
pyarrow = ["pyarrow >= 7.0.0"]
pydantic = ["pydantic"]
sqlalchemy = ["sqlalchemy", "pandas"]
style = ["great-tables >= 0.8.0"]
timezone = ["backports.zoneinfo; python_version < '3.9'", "tzdata; platform_system == 'Windows'"]
xlsx2csv = ["xlsx2csv >= 0.8.0"]
xlsxwriter = ["xlsxwriter"]
all = [
"polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]",
"polars[adbc,async,cloudpickle,connectorx,deltalake,fastexcel,fsspec,gevent,style,numpy,pandas,plot,pyarrow,pydantic,iceberg,sqlalchemy,timezone,xlsx2csv,xlsxwriter]",
]

[tool.maturin]
Expand Down Expand Up @@ -89,6 +90,7 @@ module = [
"deltalake.*",
"fsspec.*",
"gevent",
"great_tables",
"hvplot.*",
"jax.*",
"kuzu",
Expand Down
Loading

0 comments on commit 8965a68

Please sign in to comment.