Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add disable_maxrows function & make serve() and save() respect data_transformer #1538

Merged
merged 1 commit into from
May 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion altair/vegalite/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,26 @@
from ..utils.core import sanitize_dataframe
from ..utils.data import (
MaxRowsError, limit_rows, sample, to_csv, to_json, to_values,
check_data_type, DataTransformerRegistry
check_data_type
)
from ..utils.data import DataTransformerRegistry as _DataTransformerRegistry


@curry
def default_data_transformer(data, max_rows=5000):
return pipe(data, limit_rows(max_rows=max_rows), to_values)


class DataTransformerRegistry(_DataTransformerRegistry):
def disable_max_rows(self):
"""Disable the MaxRowsError."""
options = self.options
if self.active == 'default':
options = options.copy()
options['max_rows'] = None
return self.enable(**options)


__all__ = (
'DataTransformerRegistry',
'MaxRowsError',
Expand Down
33 changes: 33 additions & 0 deletions altair/vegalite/v2/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

import pandas as pd
import pytest

from .. import data as alt


@pytest.fixture
def sample_data():
return pd.DataFrame({'x': range(10), 'y': range(10)})


def test_disable_max_rows(sample_data):
with alt.data_transformers.enable('default', max_rows=5):
# Ensure max rows error is raised.
with pytest.raises(alt.MaxRowsError):
alt.data_transformers.get()(sample_data)

# Ensure that max rows error is properly disabled.
with alt.data_transformers.disable_max_rows():
alt.data_transformers.get()(sample_data)

try:
with alt.data_transformers.enable('json'):
# Ensure that there is no TypeError for non-max_rows transformers.
with alt.data_transformers.disable_max_rows():
jsonfile = alt.data_transformers.get()(sample_data)
except:
jsonfile = {}
finally:
if jsonfile:
os.remove(jsonfile['url'])
8 changes: 4 additions & 4 deletions altair/vegalite/v3/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,9 +445,9 @@ def save(self, fp, format=None, override_data_transformer=True,
the format to write: one of ['json', 'html', 'png', 'svg'].
If not specified, the format will be determined from the filename.
override_data_transformer : boolean (optional)
If True (default), then the save action will be done with the
default data_transformer with max_rows set to None. If False,
then use the currently active data transformer.
If True (default), then the save action will be done with
the MaxRowsError disabled. If False, then do not change the data
transformer.
scale_factor : float
For svg or png formats, scale the image by this factor when saving.
This can be used to control the size or resolution of the output.
Expand All @@ -470,7 +470,7 @@ def save(self, fp, format=None, override_data_transformer=True,
# that save() will succeed even for large datasets that would
# normally trigger a MaxRowsError
if override_data_transformer:
with data_transformers.enable('default', max_rows=None):
with data_transformers.disable_max_rows():
result = save(**kwds)
else:
result = save(**kwds)
Expand Down
33 changes: 33 additions & 0 deletions altair/vegalite/v3/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

import pandas as pd
import pytest

from .. import data as alt


@pytest.fixture
def sample_data():
return pd.DataFrame({'x': range(10), 'y': range(10)})


def test_disable_max_rows(sample_data):
with alt.data_transformers.enable('default', max_rows=5):
# Ensure max rows error is raised.
with pytest.raises(alt.MaxRowsError):
alt.data_transformers.get()(sample_data)

# Ensure that max rows error is properly disabled.
with alt.data_transformers.disable_max_rows():
alt.data_transformers.get()(sample_data)

try:
with alt.data_transformers.enable('json'):
# Ensure that there is no TypeError for non-max_rows transformers.
with alt.data_transformers.disable_max_rows():
jsonfile = alt.data_transformers.get()(sample_data)
except:
jsonfile = {}
finally:
if jsonfile:
os.remove(jsonfile['url'])
27 changes: 13 additions & 14 deletions doc/user_guide/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,21 @@ error is a way of preventing that.

You can get around it in a few ways:

Disabling MaxRowsError
~~~~~~~~~~~~~~~~~~~~~~
If you are certain you would like to embed your dataset within the visualization
specification, you can disable the ``MaxRows`` check with the following::

alt.data_transformers.disable_max_rows()

If you choose this route, please be careful: if you are making multiple plots
with the dataset in a particular notebook, the notebook will grow very large
and performance may suffer.

Passing Data by URL
~~~~~~~~~~~~~~~~~~~
The preferred solution to working with large datasets is to not embed the data
in the notebook, but rather pass it to the chart by URL.
A better solution when working with large datasets is to not embed the data
in the notebook, but rather store it separately pass it to the chart by URL.
This not only addresses the issue of large notebooks, but also leads to better
interactivity performance with large datasets.

Expand Down Expand Up @@ -199,15 +210,3 @@ And then enable the data transformer::
alt.data_transformers.enable('data_server')

Note that this may not approach on some cloud-based Jupyter notebook services.

Disabling MaxRows
~~~~~~~~~~~~~~~~~
If you are certain you would like to embed your dataset within the visualization
specification, you can disable the ``MaxRows`` check by modifying the arguments
to the default data transformer::

alt.data_transformers.enable(max_rows=None)

If you choose this route, please be careful: if you are making multiple plots
with the dataset in a particular notebook, the notebook will grow very large
and performance may suffer.