diff --git a/MANIFEST.in b/MANIFEST.in index a006660e5fb..4d5c34f622c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -6,3 +6,4 @@ prune doc/generated global-exclude .DS_Store include versioneer.py include xarray/_version.py +recursive-include xarray/static * diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9d3e64badb8..12bed8f332e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -36,6 +36,12 @@ New Features ``pip install git+https://github.com/andrewgsavage/pint.git@refs/pull/6/head)``. Even with it, interaction with non-numpy array libraries, e.g. dask or sparse, is broken. +- Added new :py:meth:`Dataset._repr_html_` and :py:meth:`DataArray._repr_html_` to improve + representation of objects in jupyter. By default this feature is turned off + for now. Enable it with :py:meth:`xarray.set_options(display_style="html")`. + (:pull:`3425`) by `Benoit Bovy `_ and + `Julia Signell `_. + Bug fixes ~~~~~~~~~ - Fix regression introduced in v0.14.0 that would cause a crash if dask is installed diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 08d4f54764f..cba0c74aa3a --- a/setup.py +++ b/setup.py @@ -104,5 +104,7 @@ tests_require=TESTS_REQUIRE, url=URL, packages=find_packages(), - package_data={"xarray": ["py.typed", "tests/data/*"]}, + package_data={ + "xarray": ["py.typed", "tests/data/*", "static/css/*", "static/html/*"] + }, ) diff --git a/xarray/core/common.py b/xarray/core/common.py index 45d860a1797..1a8cf34ed39 100644 --- a/xarray/core/common.py +++ b/xarray/core/common.py @@ -1,5 +1,6 @@ import warnings from contextlib import suppress +from html import escape from textwrap import dedent from typing import ( Any, @@ -18,10 +19,10 @@ import numpy as np import pandas as pd -from . import dtypes, duck_array_ops, formatting, ops +from . import dtypes, duck_array_ops, formatting, formatting_html, ops from .arithmetic import SupportsArithmetic from .npcompat import DTypeLike -from .options import _get_keep_attrs +from .options import OPTIONS, _get_keep_attrs from .pycompat import dask_array_type from .rolling_exp import RollingExp from .utils import Frozen, ReprObject, either_dict_or_kwargs @@ -134,6 +135,11 @@ def __array__(self: Any, dtype: DTypeLike = None) -> np.ndarray: def __repr__(self) -> str: return formatting.array_repr(self) + def _repr_html_(self): + if OPTIONS["display_style"] == "text": + return f"
{escape(repr(self))}
" + return formatting_html.array_repr(self) + def _iter(self: Any) -> Iterator[Any]: for n in range(len(self)): yield self[n] diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 12d5cbdc9f3..eba580f84bd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3,6 +3,7 @@ import sys import warnings from collections import defaultdict +from html import escape from numbers import Number from pathlib import Path from typing import ( @@ -39,6 +40,7 @@ dtypes, duck_array_ops, formatting, + formatting_html, groupby, ops, resample, @@ -1619,6 +1621,11 @@ def to_zarr( def __repr__(self) -> str: return formatting.dataset_repr(self) + def _repr_html_(self): + if OPTIONS["display_style"] == "text": + return f"
{escape(repr(self))}
" + return formatting_html.dataset_repr(self) + def info(self, buf=None) -> None: """ Concise summary of a Dataset variables and attributes. diff --git a/xarray/core/formatting_html.py b/xarray/core/formatting_html.py new file mode 100644 index 00000000000..b03ecc12962 --- /dev/null +++ b/xarray/core/formatting_html.py @@ -0,0 +1,274 @@ +import uuid +import pkg_resources +from collections import OrderedDict +from functools import partial +from html import escape + +from .formatting import inline_variable_array_repr, short_data_repr + + +CSS_FILE_PATH = "/".join(("static", "css", "style.css")) +CSS_STYLE = pkg_resources.resource_string("xarray", CSS_FILE_PATH).decode("utf8") + + +ICONS_SVG_PATH = "/".join(("static", "html", "icons-svg-inline.html")) +ICONS_SVG = pkg_resources.resource_string("xarray", ICONS_SVG_PATH).decode("utf8") + + +def short_data_repr_html(array): + """Format "data" for DataArray and Variable.""" + internal_data = getattr(array, "variable", array)._data + if hasattr(internal_data, "_repr_html_"): + return internal_data._repr_html_() + return escape(short_data_repr(array)) + + +def format_dims(dims, coord_names): + if not dims: + return "" + + dim_css_map = { + k: " class='xr-has-index'" if k in coord_names else "" for k, v in dims.items() + } + + dims_li = "".join( + f"
  • " f"{escape(dim)}: {size}
  • " + for dim, size in dims.items() + ) + + return f"" + + +def summarize_attrs(attrs): + attrs_dl = "".join( + f"
    {escape(k)} :
    " f"
    {escape(str(v))}
    " + for k, v in attrs.items() + ) + + return f"
    {attrs_dl}
    " + + +def _icon(icon_name): + # icon_name should be defined in xarray/static/html/icon-svg-inline.html + return ( + "" + "" + "" + "".format(icon_name) + ) + + +def _summarize_coord_multiindex(name, coord): + preview = f"({', '.join(escape(l) for l in coord.level_names)})" + return summarize_variable( + name, coord, is_index=True, dtype="MultiIndex", preview=preview + ) + + +def summarize_coord(name, var): + is_index = name in var.dims + if is_index: + coord = var.variable.to_index_variable() + if coord.level_names is not None: + coords = {} + coords[name] = _summarize_coord_multiindex(name, coord) + for lname in coord.level_names: + var = coord.get_level_variable(lname) + coords[lname] = summarize_variable(lname, var) + return coords + + return {name: summarize_variable(name, var, is_index)} + + +def summarize_coords(variables): + coords = {} + for k, v in variables.items(): + coords.update(**summarize_coord(k, v)) + + vars_li = "".join(f"
  • {v}
  • " for v in coords.values()) + + return f"" + + +def summarize_variable(name, var, is_index=False, dtype=None, preview=None): + variable = var.variable if hasattr(var, "variable") else var + + cssclass_idx = " class='xr-has-index'" if is_index else "" + dims_str = f"({', '.join(escape(dim) for dim in var.dims)})" + name = escape(name) + dtype = dtype or var.dtype + + # "unique" ids required to expand/collapse subsections + attrs_id = "attrs-" + str(uuid.uuid4()) + data_id = "data-" + str(uuid.uuid4()) + disabled = "" if len(var.attrs) else "disabled" + + preview = preview or escape(inline_variable_array_repr(variable, 35)) + attrs_ul = summarize_attrs(var.attrs) + data_repr = short_data_repr_html(variable) + + attrs_icon = _icon("icon-file-text2") + data_icon = _icon("icon-database") + + return ( + f"
    {name}
    " + f"
    {dims_str}
    " + f"
    {dtype}
    " + f"
    {preview}
    " + f"" + f"" + f"" + f"" + f"
    {attrs_ul}
    " + f"
    {data_repr}
    " + ) + + +def summarize_vars(variables): + vars_li = "".join( + f"
  • {summarize_variable(k, v)}
  • " + for k, v in variables.items() + ) + + return f"" + + +def collapsible_section( + name, inline_details="", details="", n_items=None, enabled=True, collapsed=False +): + # "unique" id to expand/collapse the section + data_id = "section-" + str(uuid.uuid4()) + + has_items = n_items is not None and n_items + n_items_span = "" if n_items is None else f" ({n_items})" + enabled = "" if enabled and has_items else "disabled" + collapsed = "" if collapsed or not has_items else "checked" + tip = " title='Expand/collapse section'" if enabled else "" + + return ( + f"" + f"" + f"
    {inline_details}
    " + f"
    {details}
    " + ) + + +def _mapping_section(mapping, name, details_func, max_items_collapse, enabled=True): + n_items = len(mapping) + collapsed = n_items >= max_items_collapse + + return collapsible_section( + name, + details=details_func(mapping), + n_items=n_items, + enabled=enabled, + collapsed=collapsed, + ) + + +def dim_section(obj): + dim_list = format_dims(obj.dims, list(obj.coords)) + + return collapsible_section( + "Dimensions", inline_details=dim_list, enabled=False, collapsed=True + ) + + +def array_section(obj): + # "unique" id to expand/collapse the section + data_id = "section-" + str(uuid.uuid4()) + collapsed = "" + preview = escape(inline_variable_array_repr(obj.variable, max_width=70)) + data_repr = short_data_repr_html(obj) + data_icon = _icon("icon-database") + + return ( + "
    " + f"" + f"" + f"
    {preview}
    " + f"
    {data_repr}
    " + "
    " + ) + + +coord_section = partial( + _mapping_section, + name="Coordinates", + details_func=summarize_coords, + max_items_collapse=25, +) + + +datavar_section = partial( + _mapping_section, + name="Data variables", + details_func=summarize_vars, + max_items_collapse=15, +) + + +attr_section = partial( + _mapping_section, + name="Attributes", + details_func=summarize_attrs, + max_items_collapse=10, +) + + +def _obj_repr(header_components, sections): + header = f"
    {''.join(h for h in header_components)}
    " + sections = "".join(f"
  • {s}
  • " for s in sections) + + return ( + "
    " + f"{ICONS_SVG}" + "
    " + f"{header}" + f"
      {sections}
    " + "
    " + "
    " + ) + + +def array_repr(arr): + dims = OrderedDict((k, v) for k, v in zip(arr.dims, arr.shape)) + + obj_type = "xarray.{}".format(type(arr).__name__) + arr_name = "'{}'".format(arr.name) if getattr(arr, "name", None) else "" + coord_names = list(arr.coords) if hasattr(arr, "coords") else [] + + header_components = [ + "
    {}
    ".format(obj_type), + "
    {}
    ".format(arr_name), + format_dims(dims, coord_names), + ] + + sections = [array_section(arr)] + + if hasattr(arr, "coords"): + sections.append(coord_section(arr.coords)) + + sections.append(attr_section(arr.attrs)) + + return _obj_repr(header_components, sections) + + +def dataset_repr(ds): + obj_type = "xarray.{}".format(type(ds).__name__) + + header_components = [f"
    {escape(obj_type)}
    "] + + sections = [ + dim_section(ds), + coord_section(ds.coords), + datavar_section(ds.data_vars), + attr_section(ds.attrs), + ] + + return _obj_repr(header_components, sections) diff --git a/xarray/core/options.py b/xarray/core/options.py index 2f464a33fb1..72f9ad8e1fa 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -8,6 +8,7 @@ CMAP_SEQUENTIAL = "cmap_sequential" CMAP_DIVERGENT = "cmap_divergent" KEEP_ATTRS = "keep_attrs" +DISPLAY_STYLE = "display_style" OPTIONS = { @@ -19,9 +20,11 @@ CMAP_SEQUENTIAL: "viridis", CMAP_DIVERGENT: "RdBu_r", KEEP_ATTRS: "default", + DISPLAY_STYLE: "text", } _JOIN_OPTIONS = frozenset(["inner", "outer", "left", "right", "exact"]) +_DISPLAY_OPTIONS = frozenset(["text", "html"]) def _positive_integer(value): @@ -35,6 +38,7 @@ def _positive_integer(value): FILE_CACHE_MAXSIZE: _positive_integer, WARN_FOR_UNCLOSED_FILES: lambda value: isinstance(value, bool), KEEP_ATTRS: lambda choice: choice in [True, False, "default"], + DISPLAY_STYLE: _DISPLAY_OPTIONS.__contains__, } @@ -98,6 +102,9 @@ class set_options: attrs, ``False`` to always discard them, or ``'default'`` to use original logic that attrs should only be kept in unambiguous circumstances. Default: ``'default'``. + - ``display_style``: display style to use in jupyter for xarray objects. + Default: ``'text'``. Other options are ``'html'``. + You can use ``set_options`` either as a context manager: diff --git a/xarray/static/css/style.css b/xarray/static/css/style.css new file mode 100644 index 00000000000..536b8ab6103 --- /dev/null +++ b/xarray/static/css/style.css @@ -0,0 +1,310 @@ +/* CSS stylesheet for displaying xarray objects in jupyterlab. + * + */ + +.xr-wrap { + min-width: 300px; + max-width: 700px; +} + +.xr-header { + padding-top: 6px; + padding-bottom: 6px; + margin-bottom: 4px; + border-bottom: solid 1px #ddd; +} + +.xr-header > div, +.xr-header > ul { + display: inline; + margin-top: 0; + margin-bottom: 0; +} + +.xr-obj-type, +.xr-array-name { + margin-left: 2px; + margin-right: 10px; +} + +.xr-obj-type { + color: #555; +} + +.xr-array-name { + color: #000; +} + +.xr-sections { + padding-left: 0 !important; + display: grid; + grid-template-columns: 150px auto auto 1fr 20px 20px; +} + +.xr-section-item { + display: contents; +} + +.xr-section-item input { + display: none; +} + +.xr-section-item input + label { + color: #ccc; +} + +.xr-section-item input:enabled + label { + cursor: pointer; + color: #555; +} + +.xr-section-item input:enabled + label:hover { + color: #000; +} + +.xr-section-summary { + grid-column: 1; + color: #555; + font-weight: 500; +} + +.xr-section-summary > span { + display: inline-block; + padding-left: 0.5em; +} + +.xr-section-summary-in:disabled + label { + color: #555; +} + +.xr-section-summary-in + label:before { + display: inline-block; + content: '►'; + font-size: 11px; + width: 15px; + text-align: center; +} + +.xr-section-summary-in:disabled + label:before { + color: #ccc; +} + +.xr-section-summary-in:checked + label:before { + content: '▼'; +} + +.xr-section-summary-in:checked + label > span { + display: none; +} + +.xr-section-summary, +.xr-section-inline-details { + padding-top: 4px; + padding-bottom: 4px; +} + +.xr-section-inline-details { + grid-column: 2 / -1; +} + +.xr-section-details { + display: none; + grid-column: 1 / -1; + margin-bottom: 5px; +} + +.xr-section-summary-in:checked ~ .xr-section-details { + display: contents; +} + +.xr-array-wrap { + grid-column: 1 / -1; + display: grid; + grid-template-columns: 20px auto; +} + +.xr-array-wrap > label { + grid-column: 1; + vertical-align: top; +} + +.xr-preview { + color: #888; +} + +.xr-array-preview, +.xr-array-data { + padding: 0 5px !important; + grid-column: 2; +} + +.xr-array-data, +.xr-array-in:checked ~ .xr-array-preview { + display: none; +} + +.xr-array-in:checked ~ .xr-array-data, +.xr-array-preview { + display: inline-block; +} + +.xr-dim-list { + display: inline-block !important; + list-style: none; + padding: 0 !important; + margin: 0; +} + +.xr-dim-list li { + display: inline-block; + padding: 0; + margin: 0; +} + +.xr-dim-list:before { + content: '('; +} + +.xr-dim-list:after { + content: ')'; +} + +.xr-dim-list li:not(:last-child):after { + content: ','; + padding-right: 5px; +} + +.xr-has-index { + font-weight: bold; +} + +.xr-var-list, +.xr-var-item { + display: contents; +} + +.xr-var-item > div, +.xr-var-item label, +.xr-var-item > .xr-var-name span { + background-color: #fcfcfc; + margin-bottom: 0; +} + +.xr-var-item > .xr-var-name:hover span { + padding-right: 5px; +} + +.xr-var-list > li:nth-child(odd) > div, +.xr-var-list > li:nth-child(odd) > label, +.xr-var-list > li:nth-child(odd) > .xr-var-name span { + background-color: #efefef; +} + +.xr-var-name { + grid-column: 1; +} + +.xr-var-dims { + grid-column: 2; +} + +.xr-var-dtype { + grid-column: 3; + text-align: right; + color: #555; +} + +.xr-var-preview { + grid-column: 4; +} + +.xr-var-name, +.xr-var-dims, +.xr-var-dtype, +.xr-preview, +.xr-attrs dt { + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; + padding-right: 10px; +} + +.xr-var-name:hover, +.xr-var-dims:hover, +.xr-var-dtype:hover, +.xr-attrs dt:hover { + overflow: visible; + width: auto; + z-index: 1; +} + +.xr-var-attrs, +.xr-var-data { + display: none; + background-color: #fff !important; + padding-bottom: 5px !important; +} + +.xr-var-attrs-in:checked ~ .xr-var-attrs, +.xr-var-data-in:checked ~ .xr-var-data { + display: block; +} + +.xr-var-data > table { + float: right; +} + +.xr-var-name span, +.xr-var-data, +.xr-attrs { + padding-left: 25px !important; +} + +.xr-attrs, +.xr-var-attrs, +.xr-var-data { + grid-column: 1 / -1; +} + +dl.xr-attrs { + padding: 0; + margin: 0; + display: grid; + grid-template-columns: 125px auto; +} + +.xr-attrs dt, dd { + padding: 0; + margin: 0; + float: left; + padding-right: 10px; + width: auto; +} + +.xr-attrs dt { + font-weight: normal; + grid-column: 1; +} + +.xr-attrs dt:hover span { + display: inline-block; + background: #fff; + padding-right: 10px; +} + +.xr-attrs dd { + grid-column: 2; + white-space: pre-wrap; + word-break: break-all; +} + +.xr-icon-database, +.xr-icon-file-text2 { + display: inline-block; + vertical-align: middle; + width: 1em; + height: 1.5em !important; + stroke-width: 0; + stroke: currentColor; + fill: currentColor; +} diff --git a/xarray/static/html/icons-svg-inline.html b/xarray/static/html/icons-svg-inline.html new file mode 100644 index 00000000000..c44f89c4304 --- /dev/null +++ b/xarray/static/html/icons-svg-inline.html @@ -0,0 +1,17 @@ + + + +Show/Hide data repr + + + + + +Show/Hide attributes + + + + + + + diff --git a/xarray/tests/test_formatting_html.py b/xarray/tests/test_formatting_html.py new file mode 100644 index 00000000000..e7f54b22d06 --- /dev/null +++ b/xarray/tests/test_formatting_html.py @@ -0,0 +1,132 @@ +from distutils.version import LooseVersion + +import numpy as np +import pandas as pd +import pytest + +import xarray as xr +from xarray.core import formatting_html as fh + + +@pytest.fixture +def dataarray(): + return xr.DataArray(np.random.RandomState(0).randn(4, 6)) + + +@pytest.fixture +def dask_dataarray(dataarray): + pytest.importorskip("dask") + return dataarray.chunk() + + +@pytest.fixture +def multiindex(): + mindex = pd.MultiIndex.from_product( + [["a", "b"], [1, 2]], names=("level_1", "level_2") + ) + return xr.Dataset({}, {"x": mindex}) + + +@pytest.fixture +def dataset(): + times = pd.date_range("2000-01-01", "2001-12-31", name="time") + annual_cycle = np.sin(2 * np.pi * (times.dayofyear.values / 365.25 - 0.28)) + + base = 10 + 15 * annual_cycle.reshape(-1, 1) + tmin_values = base + 3 * np.random.randn(annual_cycle.size, 3) + tmax_values = base + 10 + 3 * np.random.randn(annual_cycle.size, 3) + + return xr.Dataset( + { + "tmin": (("time", "location"), tmin_values), + "tmax": (("time", "location"), tmax_values), + }, + {"time": times, "location": ["", "IN", "IL"]}, + attrs={"description": "Test data."}, + ) + + +def test_short_data_repr_html(dataarray): + data_repr = fh.short_data_repr_html(dataarray) + assert data_repr.startswith("array") + + +def test_short_data_repr_html_dask(dask_dataarray): + import dask + + if LooseVersion(dask.__version__) < "2.0.0": + assert not hasattr(dask_dataarray.data, "_repr_html_") + data_repr = fh.short_data_repr_html(dask_dataarray) + assert ( + data_repr + == "dask.array<xarray-<this-array>, shape=(4, 6), dtype=float64, chunksize=(4, 6)>" + ) + else: + assert hasattr(dask_dataarray.data, "_repr_html_") + data_repr = fh.short_data_repr_html(dask_dataarray) + assert data_repr == dask_dataarray.data._repr_html_() + + +def test_format_dims_no_dims(): + dims, coord_names = {}, [] + formatted = fh.format_dims(dims, coord_names) + assert formatted == "" + + +def test_format_dims_unsafe_dim_name(): + dims, coord_names = {"": 3, "y": 2}, [] + formatted = fh.format_dims(dims, coord_names) + assert "<x>" in formatted + + +def test_format_dims_non_index(): + dims, coord_names = {"x": 3, "y": 2}, ["time"] + formatted = fh.format_dims(dims, coord_names) + assert "class='xr-has-index'" not in formatted + + +def test_format_dims_index(): + dims, coord_names = {"x": 3, "y": 2}, ["x"] + formatted = fh.format_dims(dims, coord_names) + assert "class='xr-has-index'" in formatted + + +def test_summarize_attrs_with_unsafe_attr_name_and_value(): + attrs = {"": 3, "y": ""} + formatted = fh.summarize_attrs(attrs) + assert "
    <x> :
    " in formatted + assert "
    y :
    " in formatted + assert "
    3
    " in formatted + assert "
    <pd.DataFrame>
    " in formatted + + +def test_repr_of_dataarray(dataarray): + formatted = fh.array_repr(dataarray) + assert "dim_0" in formatted + # has an expandable data section + assert formatted.count("class='xr-array-in' type='checkbox' >") == 1 + # coords and attrs don't have an items so they'll be be disabled and collapsed + assert ( + formatted.count("class='xr-section-summary-in' type='checkbox' disabled >") == 2 + ) + + +def test_summary_of_multiindex_coord(multiindex): + idx = multiindex.x.variable.to_index_variable() + formatted = fh._summarize_coord_multiindex("foo", idx) + assert "(level_1, level_2)" in formatted + assert "MultiIndex" in formatted + assert "foo" in formatted + + +def test_repr_of_multiindex(multiindex): + formatted = fh.dataset_repr(multiindex) + assert "(x)" in formatted + + +def test_repr_of_dataset(dataset): + formatted = fh.dataset_repr(dataset) + # coords, attrs, and data_vars are expanded + assert ( + formatted.count("class='xr-section-summary-in' type='checkbox' checked>") == 3 + ) diff --git a/xarray/tests/test_options.py b/xarray/tests/test_options.py index 2aa77ecd6b3..f155acbf494 100644 --- a/xarray/tests/test_options.py +++ b/xarray/tests/test_options.py @@ -67,6 +67,16 @@ def test_nested_options(): assert OPTIONS["display_width"] == original +def test_display_style(): + original = "text" + assert OPTIONS["display_style"] == original + with pytest.raises(ValueError): + xarray.set_options(display_style="invalid_str") + with xarray.set_options(display_style="html"): + assert OPTIONS["display_style"] == "html" + assert OPTIONS["display_style"] == original + + def create_test_dataset_attrs(seed=0): ds = create_test_data(seed) ds.attrs = {"attr1": 5, "attr2": "history", "attr3": {"nested": "more_info"}} @@ -164,3 +174,30 @@ def test_merge_attr_retention(self): # option doesn't affect this result = merge([da1, da2]) assert result.attrs == original_attrs + + def test_display_style_text(self): + ds = create_test_dataset_attrs() + text = ds._repr_html_() + assert text.startswith("
    ")
    +        assert "'nested'" in text
    +        assert "<xarray.Dataset>" in text
    +
    +    def test_display_style_html(self):
    +        ds = create_test_dataset_attrs()
    +        with xarray.set_options(display_style="html"):
    +            html = ds._repr_html_()
    +            assert html.startswith("
    ") + assert "'nested'" in html + + def test_display_dataarray_style_text(self): + da = create_test_dataarray_attrs() + text = da._repr_html_() + assert text.startswith("
    ")
    +        assert "<xarray.DataArray 'var1'" in text
    +
    +    def test_display_dataarray_style_html(self):
    +        da = create_test_dataarray_attrs()
    +        with xarray.set_options(display_style="html"):
    +            html = da._repr_html_()
    +            assert html.startswith("
    ") + assert "#x27;nested'" in html