diff --git a/altair/utils/schemapi.py b/altair/utils/schemapi.py index fab24a4ad..0c8f9b274 100644 --- a/altair/utils/schemapi.py +++ b/altair/utils/schemapi.py @@ -5,7 +5,7 @@ import inspect import json import textwrap -from typing import Any +from typing import Any, Sequence, List import jsonschema import jsonschema.exceptions @@ -66,9 +66,50 @@ def validate_jsonschema(spec, schema, rootschema=None): if hasattr(validator_cls, "FORMAT_CHECKER"): validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER validator = validator_cls(schema, **validator_kwargs) - error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) - if error is not None: - raise error + errors = list(validator.iter_errors(spec)) + if errors: + errors = _get_most_relevant_errors(errors) + main_error = errors[0] + # They can be used to craft more helpful error messages when this error + # is being converted to a SchemaValidationError + main_error._additional_errors = errors[1:] + raise main_error + + +def _get_most_relevant_errors( + errors: Sequence[jsonschema.exceptions.ValidationError], +) -> List[jsonschema.exceptions.ValidationError]: + if len(errors) == 0: + return [] + + # Go to lowest level in schema where an error happened as these give + # the most relevant error messages + lowest_level = errors[0] + while lowest_level.context: + lowest_level = lowest_level.context[0] + + most_relevant_errors = [] + if lowest_level.validator == "additionalProperties": + # For these errors, the message is already informative enough and the other + # errors on the lowest level are not helpful for a user but instead contain + # the same information in a more verbose way + most_relevant_errors = [lowest_level] + else: + parent = lowest_level.parent + if parent is None: + # In this case we are still at the top level and can return all errors + most_relevant_errors = errors + else: + # Return all errors of the lowest level out of which + # we can construct more informative error messages + most_relevant_errors = lowest_level.parent.context + + # This should never happen but might still be good to test for it as else + # the original error would just slip through without being raised + if len(most_relevant_errors) == 0: + raise Exception("Could not determine the most relevant errors") from errors[0] + + return most_relevant_errors def _subclasses(cls): @@ -82,18 +123,14 @@ def _subclasses(cls): yield cls -def _todict(obj, validate, context): +def _todict(obj, context): """Convert an object to a dict representation.""" if isinstance(obj, SchemaBase): - return obj.to_dict(validate=validate, context=context) + return obj.to_dict(validate=False, context=context) elif isinstance(obj, (list, tuple, np.ndarray)): - return [_todict(v, validate, context) for v in obj] + return [_todict(v, context) for v in obj] elif isinstance(obj, dict): - return { - k: _todict(v, validate, context) - for k, v in obj.items() - if v is not Undefined - } + return {k: _todict(v, context) for k, v in obj.items() if v is not Undefined} elif hasattr(obj, "to_dict"): return obj.to_dict() elif isinstance(obj, np.number): @@ -117,24 +154,9 @@ class SchemaValidationError(jsonschema.ValidationError): """A wrapper for jsonschema.ValidationError with friendlier traceback""" def __init__(self, obj, err): - super(SchemaValidationError, self).__init__(**self._get_contents(err)) + super(SchemaValidationError, self).__init__(**err._contents()) self.obj = obj - - @staticmethod - def _get_contents(err): - """Get a dictionary with the contents of a ValidationError""" - try: - # works in jsonschema 2.3 or later - contents = err._contents() - except AttributeError: - try: - # works in Python >=3.4 - spec = inspect.getfullargspec(err.__init__) - except AttributeError: - # works in Python <3.4 - spec = inspect.getargspec(err.__init__) - contents = {key: getattr(err, key) for key in spec.args[1:]} - return contents + self._additional_errors = getattr(err, "_additional_errors", []) def __str__(self): cls = self.obj.__class__ @@ -145,13 +167,18 @@ def __str__(self): for val in schema_path[:-1] if val not in ("properties", "additionalProperties", "patternProperties") ) + message = self.message + if self._additional_errors: + message += "\n " + "\n ".join( + [e.message for e in self._additional_errors] + ) return """Invalid specification {}, validating {!r} {} """.format( - schema_path, self.validator, self.message + schema_path, self.validator, message ) @@ -327,11 +354,9 @@ def to_dict(self, validate=True, ignore=None, context=None): Parameters ---------- - validate : boolean or string + validate : boolean If True (default), then validate the output dictionary - against the schema. If "deep" then recursively validate - all objects in the spec. This takes much more time, but - it results in friendlier tracebacks for large objects. + against the schema. ignore : list A list of keys to ignore. This will *not* passed to child to_dict function calls. @@ -353,10 +378,9 @@ def to_dict(self, validate=True, ignore=None, context=None): context = {} if ignore is None: ignore = [] - sub_validate = "deep" if validate == "deep" else False if self._args and not self._kwds: - result = _todict(self._args[0], validate=sub_validate, context=context) + result = _todict(self._args[0], context=context) elif not self._args: kwds = self._kwds.copy() # parsed_shorthand is added by FieldChannelMixin. @@ -384,7 +408,6 @@ def to_dict(self, validate=True, ignore=None, context=None): } result = _todict( kwds, - validate=sub_validate, context=context, ) else: @@ -406,11 +429,9 @@ def to_json( Parameters ---------- - validate : boolean or string + validate : boolean If True (default), then validate the output dictionary - against the schema. If "deep" then recursively validate - all objects in the spec. This takes much more time, but - it results in friendlier tracebacks for large objects. + against the schema. ignore : list A list of keys to ignore. This will *not* passed to child to_dict function calls. @@ -516,7 +537,7 @@ def validate_property(cls, name, value, schema=None): Validate a property against property schema in the context of the rootschema """ - value = _todict(value, validate=False, context={}) + value = _todict(value, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) return validate_jsonschema( value, props.get(name, {}), rootschema=cls._rootschema or cls._schema diff --git a/altair/vegalite/v5/api.py b/altair/vegalite/v5/api.py index 4a0ddad5e..5b11d0298 100644 --- a/altair/vegalite/v5/api.py +++ b/altair/vegalite/v5/api.py @@ -556,17 +556,7 @@ def to_dict(self, *args, **kwargs): context["top_level"] = False kwargs["context"] = context - try: - dct = super(TopLevelMixin, copy).to_dict(*args, **kwargs) - except jsonschema.ValidationError: - dct = None - - # If we hit an error, then re-convert with validate='deep' to get - # a more useful traceback. We don't do this by default because it's - # much slower in the case that there are no errors. - if dct is None: - kwargs["validate"] = "deep" - dct = super(TopLevelMixin, copy).to_dict(*args, **kwargs) + dct = super(TopLevelMixin, copy).to_dict(*args, **kwargs) # TODO: following entries are added after validation. Should they be validated? if is_top_level: diff --git a/doc/releases/changes.rst b/doc/releases/changes.rst index 09ed7e7d7..02978562e 100644 --- a/doc/releases/changes.rst +++ b/doc/releases/changes.rst @@ -21,6 +21,7 @@ Enhancements - Saving charts with HTML inline is now supported without having altair_saver installed (#2807). - The documentation page has been revamped, both in terms of appearance and content. - More informative autocompletion by removing deprecated methods (#2814) and adding support for completion in method chains for editors that rely on type hints (e.g. VS Code) (#2846) +- Improved error messages (#2842) Grammar Changes ~~~~~~~~~~~~~~~ diff --git a/tests/utils/tests/test_schemapi.py b/tests/utils/tests/test_schemapi.py index 4c82839b0..d7263404e 100644 --- a/tests/utils/tests/test_schemapi.py +++ b/tests/utils/tests/test_schemapi.py @@ -4,11 +4,14 @@ import io import json import jsonschema +import re import pickle -import pytest +import warnings import numpy as np import pandas as pd +import pytest +from vega_datasets import data import altair as alt from altair import load_schema @@ -380,6 +383,131 @@ def test_schema_validation_error(): assert the_err.message in message +def chart_example_layer(): + points = ( + alt.Chart(data.cars.url) + .mark_point() + .encode( + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + ) + ) + return (points & points).properties(width=400) + + +def chart_example_hconcat(): + source = data.cars() + points = ( + alt.Chart(source) + .mark_point() + .encode( + x="Horsepower", + y="Miles_per_Gallon", + ) + ) + + text = ( + alt.Chart(source) + .mark_text(align="right") + .encode(alt.Text("Horsepower:N", title=dict(text="Horsepower", align="right"))) + ) + + return points | text + + +def chart_example_invalid_channel_and_condition(): + selection = alt.selection_point() + return ( + alt.Chart(data.barley()) + .mark_circle() + .add_params(selection) + .encode( + color=alt.condition(selection, alt.value("red"), alt.value("green")), + invalidChannel=None, + ) + ) + + +def chart_example_invalid_y_option(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x=alt.X("variety", unknown=2), + y=alt.Y("sum(yield)", stack="asdf"), + ) + ) + + +def chart_example_invalid_y_option_value(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x=alt.X("variety"), + y=alt.Y("sum(yield)", stack="asdf"), + ) + ) + + +def chart_example_invalid_y_option_value_with_condition(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x="variety", + y=alt.Y("sum(yield)", stack="asdf"), + opacity=alt.condition("datum.yield > 0", alt.value(1), alt.value(0.2)), + ) + ) + + +@pytest.mark.parametrize( + "chart_func, expected_error_message", + [ + ( + chart_example_invalid_y_option, + r"Additional properties are not allowed \('unknown' was unexpected\)", + ), + ( + chart_example_invalid_y_option_value, + r"'asdf' is not one of \['zero', 'center', 'normalize'\].*" + + r"'asdf' is not of type 'null'.*'asdf' is not of type 'boolean'", + ), + ( + chart_example_layer, + r"Additional properties are not allowed \('width' was unexpected\)", + ), + ( + chart_example_invalid_y_option_value_with_condition, + r"'asdf' is not one of \['zero', 'center', 'normalize'\].*" + + r"'asdf' is not of type 'null'.*'asdf' is not of type 'boolean'", + ), + ( + chart_example_hconcat, + r"\{'text': 'Horsepower', 'align': 'right'\} is not of type 'string'.*" + + r"\{'text': 'Horsepower', 'align': 'right'\} is not of type 'array'", + ), + ( + chart_example_invalid_channel_and_condition, + r"Additional properties are not allowed \('invalidChannel' was unexpected\)", + ), + ], +) +def test_chart_validation_errors(chart_func, expected_error_message): + # DOTALL flag makes that a dot (.) also matches new lines + pattern = re.compile(expected_error_message, re.DOTALL) + # For some wrong chart specifications such as an unknown encoding channel, + # Altair already raises a warning before the chart specifications are validated. + # We can ignore these warnings as we are interested in the errors being raised + # during validation which is triggered by to_dict + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + chart = chart_func() + with pytest.raises(SchemaValidationError, match=pattern): + chart.to_dict() + + def test_serialize_numpy_types(): m = MySchema( a={"date": np.datetime64("2019-01-01")}, diff --git a/tools/schemapi/schemapi.py b/tools/schemapi/schemapi.py index d04bdf2df..190446bbf 100644 --- a/tools/schemapi/schemapi.py +++ b/tools/schemapi/schemapi.py @@ -3,7 +3,7 @@ import inspect import json import textwrap -from typing import Any +from typing import Any, Sequence, List import jsonschema import jsonschema.exceptions @@ -64,9 +64,50 @@ def validate_jsonschema(spec, schema, rootschema=None): if hasattr(validator_cls, "FORMAT_CHECKER"): validator_kwargs["format_checker"] = validator_cls.FORMAT_CHECKER validator = validator_cls(schema, **validator_kwargs) - error = jsonschema.exceptions.best_match(validator.iter_errors(spec)) - if error is not None: - raise error + errors = list(validator.iter_errors(spec)) + if errors: + errors = _get_most_relevant_errors(errors) + main_error = errors[0] + # They can be used to craft more helpful error messages when this error + # is being converted to a SchemaValidationError + main_error._additional_errors = errors[1:] + raise main_error + + +def _get_most_relevant_errors( + errors: Sequence[jsonschema.exceptions.ValidationError], +) -> List[jsonschema.exceptions.ValidationError]: + if len(errors) == 0: + return [] + + # Go to lowest level in schema where an error happened as these give + # the most relevant error messages + lowest_level = errors[0] + while lowest_level.context: + lowest_level = lowest_level.context[0] + + most_relevant_errors = [] + if lowest_level.validator == "additionalProperties": + # For these errors, the message is already informative enough and the other + # errors on the lowest level are not helpful for a user but instead contain + # the same information in a more verbose way + most_relevant_errors = [lowest_level] + else: + parent = lowest_level.parent + if parent is None: + # In this case we are still at the top level and can return all errors + most_relevant_errors = errors + else: + # Return all errors of the lowest level out of which + # we can construct more informative error messages + most_relevant_errors = lowest_level.parent.context + + # This should never happen but might still be good to test for it as else + # the original error would just slip through without being raised + if len(most_relevant_errors) == 0: + raise Exception("Could not determine the most relevant errors") from errors[0] + + return most_relevant_errors def _subclasses(cls): @@ -80,18 +121,14 @@ def _subclasses(cls): yield cls -def _todict(obj, validate, context): +def _todict(obj, context): """Convert an object to a dict representation.""" if isinstance(obj, SchemaBase): - return obj.to_dict(validate=validate, context=context) + return obj.to_dict(validate=False, context=context) elif isinstance(obj, (list, tuple, np.ndarray)): - return [_todict(v, validate, context) for v in obj] + return [_todict(v, context) for v in obj] elif isinstance(obj, dict): - return { - k: _todict(v, validate, context) - for k, v in obj.items() - if v is not Undefined - } + return {k: _todict(v, context) for k, v in obj.items() if v is not Undefined} elif hasattr(obj, "to_dict"): return obj.to_dict() elif isinstance(obj, np.number): @@ -115,24 +152,9 @@ class SchemaValidationError(jsonschema.ValidationError): """A wrapper for jsonschema.ValidationError with friendlier traceback""" def __init__(self, obj, err): - super(SchemaValidationError, self).__init__(**self._get_contents(err)) + super(SchemaValidationError, self).__init__(**err._contents()) self.obj = obj - - @staticmethod - def _get_contents(err): - """Get a dictionary with the contents of a ValidationError""" - try: - # works in jsonschema 2.3 or later - contents = err._contents() - except AttributeError: - try: - # works in Python >=3.4 - spec = inspect.getfullargspec(err.__init__) - except AttributeError: - # works in Python <3.4 - spec = inspect.getargspec(err.__init__) - contents = {key: getattr(err, key) for key in spec.args[1:]} - return contents + self._additional_errors = getattr(err, "_additional_errors", []) def __str__(self): cls = self.obj.__class__ @@ -143,13 +165,18 @@ def __str__(self): for val in schema_path[:-1] if val not in ("properties", "additionalProperties", "patternProperties") ) + message = self.message + if self._additional_errors: + message += "\n " + "\n ".join( + [e.message for e in self._additional_errors] + ) return """Invalid specification {}, validating {!r} {} """.format( - schema_path, self.validator, self.message + schema_path, self.validator, message ) @@ -325,11 +352,9 @@ def to_dict(self, validate=True, ignore=None, context=None): Parameters ---------- - validate : boolean or string + validate : boolean If True (default), then validate the output dictionary - against the schema. If "deep" then recursively validate - all objects in the spec. This takes much more time, but - it results in friendlier tracebacks for large objects. + against the schema. ignore : list A list of keys to ignore. This will *not* passed to child to_dict function calls. @@ -351,10 +376,9 @@ def to_dict(self, validate=True, ignore=None, context=None): context = {} if ignore is None: ignore = [] - sub_validate = "deep" if validate == "deep" else False if self._args and not self._kwds: - result = _todict(self._args[0], validate=sub_validate, context=context) + result = _todict(self._args[0], context=context) elif not self._args: kwds = self._kwds.copy() # parsed_shorthand is added by FieldChannelMixin. @@ -382,7 +406,6 @@ def to_dict(self, validate=True, ignore=None, context=None): } result = _todict( kwds, - validate=sub_validate, context=context, ) else: @@ -404,11 +427,9 @@ def to_json( Parameters ---------- - validate : boolean or string + validate : boolean If True (default), then validate the output dictionary - against the schema. If "deep" then recursively validate - all objects in the spec. This takes much more time, but - it results in friendlier tracebacks for large objects. + against the schema. ignore : list A list of keys to ignore. This will *not* passed to child to_dict function calls. @@ -514,7 +535,7 @@ def validate_property(cls, name, value, schema=None): Validate a property against property schema in the context of the rootschema """ - value = _todict(value, validate=False, context={}) + value = _todict(value, context={}) props = cls.resolve_references(schema or cls._schema).get("properties", {}) return validate_jsonschema( value, props.get(name, {}), rootschema=cls._rootschema or cls._schema diff --git a/tools/schemapi/tests/test_schemapi.py b/tools/schemapi/tests/test_schemapi.py index f5e7469e7..d26b127fb 100644 --- a/tools/schemapi/tests/test_schemapi.py +++ b/tools/schemapi/tests/test_schemapi.py @@ -2,11 +2,14 @@ import io import json import jsonschema +import re import pickle -import pytest +import warnings import numpy as np import pandas as pd +import pytest +from vega_datasets import data import altair as alt from altair import load_schema @@ -378,6 +381,131 @@ def test_schema_validation_error(): assert the_err.message in message +def chart_example_layer(): + points = ( + alt.Chart(data.cars.url) + .mark_point() + .encode( + x="Horsepower:Q", + y="Miles_per_Gallon:Q", + ) + ) + return (points & points).properties(width=400) + + +def chart_example_hconcat(): + source = data.cars() + points = ( + alt.Chart(source) + .mark_point() + .encode( + x="Horsepower", + y="Miles_per_Gallon", + ) + ) + + text = ( + alt.Chart(source) + .mark_text(align="right") + .encode(alt.Text("Horsepower:N", title=dict(text="Horsepower", align="right"))) + ) + + return points | text + + +def chart_example_invalid_channel_and_condition(): + selection = alt.selection_point() + return ( + alt.Chart(data.barley()) + .mark_circle() + .add_params(selection) + .encode( + color=alt.condition(selection, alt.value("red"), alt.value("green")), + invalidChannel=None, + ) + ) + + +def chart_example_invalid_y_option(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x=alt.X("variety", unknown=2), + y=alt.Y("sum(yield)", stack="asdf"), + ) + ) + + +def chart_example_invalid_y_option_value(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x=alt.X("variety"), + y=alt.Y("sum(yield)", stack="asdf"), + ) + ) + + +def chart_example_invalid_y_option_value_with_condition(): + return ( + alt.Chart(data.barley()) + .mark_bar() + .encode( + x="variety", + y=alt.Y("sum(yield)", stack="asdf"), + opacity=alt.condition("datum.yield > 0", alt.value(1), alt.value(0.2)), + ) + ) + + +@pytest.mark.parametrize( + "chart_func,expected_error_message", + [ + ( + chart_example_invalid_y_option, + r"Additional properties are not allowed \('unknown' was unexpected\)", + ), + ( + chart_example_invalid_y_option_value, + r"'asdf' is not one of \['zero', 'center', 'normalize'\].*" + + r"'asdf' is not of type 'null'.*'asdf' is not of type 'boolean'", + ), + ( + chart_example_layer, + r"Additional properties are not allowed \('width' was unexpected\)", + ), + ( + chart_example_invalid_y_option_value_with_condition, + r"'asdf' is not one of \['zero', 'center', 'normalize'\].*" + + r"'asdf' is not of type 'null'.*'asdf' is not of type 'boolean'", + ), + ( + chart_example_hconcat, + r"\{'text': 'Horsepower', 'align': 'right'\} is not of type 'string'.*" + + r"\{'text': 'Horsepower', 'align': 'right'\} is not of type 'array'", + ), + ( + chart_example_invalid_channel_and_condition, + r"Additional properties are not allowed \('invalidChannel' was unexpected\)", + ), + ], +) +def test_chart_validation_errors(chart_func, expected_error_message): + # DOTALL flag makes that a dot (.) also matches new lines + pattern = re.compile(expected_error_message, re.DOTALL) + # For some wrong chart specifications such as an unknown encoding channel, + # Altair already raises a warning before the chart specifications are validated. + # We can ignore these warnings as we are interested in the errors being raised + # during validation which is triggered by to_dict + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + chart = chart_func() + with pytest.raises(SchemaValidationError, match=pattern): + chart.to_dict() + + def test_serialize_numpy_types(): m = MySchema( a={"date": np.datetime64("2019-01-01")},