Skip to content

Commit

Permalink
BUG: respect data transformer for chart.serve() and chart.save()
Browse files Browse the repository at this point in the history
  • Loading branch information
jakevdp committed May 29, 2019
1 parent 35fad6b commit fc96dc6
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 19 deletions.
13 changes: 12 additions & 1 deletion altair/vegalite/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,26 @@
from ..utils.core import sanitize_dataframe
from ..utils.data import (
MaxRowsError, limit_rows, sample, to_csv, to_json, to_values,
check_data_type, DataTransformerRegistry
check_data_type
)
from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry


@curry
def default_data_transformer(data, max_rows=5000):
return pipe(data, limit_rows(max_rows=max_rows), to_values)


class DataTransformerRegistry(_DataTransformerRegistry):
def disable_max_rows(self):
"""Disable the MaxRowsError."""
if self.active == 'default':
options = {'max_rows': None}
else:
options = self.options
return self.enable(**options)


__all__ = (
'DataTransformerRegistry',
'MaxRowsError',
Expand Down
33 changes: 33 additions & 0 deletions altair/vegalite/v2/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

import pandas as pd
import pytest

from .. import data as alt


@pytest.fixture
def sample_data():
return pd.DataFrame({'x': range(10), 'y': range(10)})


def test_disable_max_rows(sample_data):
with alt.data_transformers.enable('default', max_rows=5):
# Ensure max rows error is raised.
with pytest.raises(alt.MaxRowsError):
alt.data_transformers.get()(sample_data)

# Ensure that max rows error is properly disabled.
with alt.data_transformers.disable_max_rows():
alt.data_transformers.get()(sample_data)

try:
with alt.data_transformers.enable('json'):
# Ensure that there is no TypeError for non-max_rows transformers.
with alt.data_transformers.disable_max_rows():
jsonfile = alt.data_transformers.get()(sample_data)
except:
jsonfile = {}
finally:
if jsonfile:
os.remove(jsonfile['url'])
8 changes: 4 additions & 4 deletions altair/vegalite/v3/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@ def save(self, fp, format=None, override_data_transformer=True,
the format to write: one of ['json', 'html', 'png', 'svg'].
If not specified, the format will be determined from the filename.
override_data_transformer : boolean (optional)
If True (default), then the save action will be done with the
default data_transformer with max_rows set to None. If False,
then use the currently active data transformer.
If True (default), then the save action will be done with
the MaxRowsError disabled. If False, then do not change the data
transformer.
scale_factor : float
For svg or png formats, scale the image by this factor when saving.
This can be used to control the size or resolution of the output.
Expand All @@ -470,7 +470,7 @@ def save(self, fp, format=None, override_data_transformer=True,
# that save() will succeed even for large datasets that would
# normally trigger a MaxRowsError
if override_data_transformer:
with data_transformers.enable('default', max_rows=None):
with data_transformers.disable_max_rows():
result = save(**kwds)
else:
result = save(**kwds)
Expand Down
33 changes: 33 additions & 0 deletions altair/vegalite/v3/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

import pandas as pd
import pytest

from .. import data as alt


@pytest.fixture
def sample_data():
return pd.DataFrame({'x': range(10), 'y': range(10)})


def test_disable_max_rows(sample_data):
with alt.data_transformers.enable('default', max_rows=5):
# Ensure max rows error is raised.
with pytest.raises(alt.MaxRowsError):
alt.data_transformers.get()(sample_data)

# Ensure that max rows error is properly disabled.
with alt.data_transformers.disable_max_rows():
alt.data_transformers.get()(sample_data)

try:
with alt.data_transformers.enable('json'):
# Ensure that there is no TypeError for non-max_rows transformers.
with alt.data_transformers.disable_max_rows():
jsonfile = alt.data_transformers.get()(sample_data)
except:
jsonfile = {}
finally:
if jsonfile:
os.remove(jsonfile['url'])
27 changes: 13 additions & 14 deletions doc/user_guide/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,21 @@ error is a way of preventing that.

You can get around it in a few ways:

Disabling MaxRowsError
~~~~~~~~~~~~~~~~~~~~~~
If you are certain you would like to embed your dataset within the visualization
specification, you can disable the ``MaxRows`` check with the following::

alt.data_transformers.disable_max_rows()

If you choose this route, please be careful: if you are making multiple plots
with the dataset in a particular notebook, the notebook will grow very large
and performance may suffer.

Passing Data by URL
~~~~~~~~~~~~~~~~~~~
The preferred solution to working with large datasets is to not embed the data
in the notebook, but rather pass it to the chart by URL.
A better solution when working with large datasets is to not embed the data
in the notebook, but rather store it separately pass it to the chart by URL.
This not only addresses the issue of large notebooks, but also leads to better
interactivity performance with large datasets.

Expand Down Expand Up @@ -199,15 +210,3 @@ And then enable the data transformer::
alt.data_transformers.enable('data_server')

Note that this may not approach on some cloud-based Jupyter notebook services.

Disabling MaxRows
~~~~~~~~~~~~~~~~~
If you are certain you would like to embed your dataset within the visualization
specification, you can disable the ``MaxRows`` check by modifying the arguments
to the default data transformer::

alt.data_transformers.enable(max_rows=None)

If you choose this route, please be careful: if you are making multiple plots
with the dataset in a particular notebook, the notebook will grow very large
and performance may suffer.

0 comments on commit fc96dc6

Please sign in to comment.