Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for generic series limit #16660

Merged
merged 7 commits into from
Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions UPDATING.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ assists people when migrating to a new version.

### Breaking Changes

- [16660](https://github.com/apache/incubator-superset/pull/16660): The `columns` Jinja parameter has been renamed `table_columns` to make the `columns` query object parameter available in the Jinja context.
- [16711](https://github.com/apache/incubator-superset/pull/16711): The `url_param` Jinja function will now by default escape the result. For instance, the value `O'Brien` will now be changed to `O''Brien`. To disable this behavior, call `url_param` with `escape_result` set to `False`: `url_param("my_key", "my default", escape_result=False)`.

### Potential Downtime
Expand Down
5 changes: 3 additions & 2 deletions docs/src/pages/docs/installation/sql_templating.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,15 @@ To enable templating, the `ENABLE_TEMPLATE_PROCESSING` feature flag needs to be
in Custom SQL in the filter and metric controls in Explore. By default, the following variables are
made available in the Jinja context:

- `columns`: columns available in the dataset
- `columns`: columns which to group by in the query
- `filter`: filters applied in the query
- `from_dttm`: start `datetime` value from the selected time range (`None` if undefined)
- `to_dttm`: end `datetime` value from the selected time range (`None` if undefined)
- `groupby`: columns which to group by in the query
- `groupby`: columns which to group by in the query (deprecated)
- `metrics`: aggregate expressions in the query
- `row_limit`: row limit of the query
- `row_offset`: row offset of the query
- `table_columns`: columns available in the dataset
- `time_column`: temporal column of the query (`None` if undefined)
- `time_grain`: selected time grain (`None` if undefined)

Expand Down
604 changes: 302 additions & 302 deletions superset-frontend/package-lock.json

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions superset-frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,35 @@
"@emotion/cache": "^11.4.0",
"@emotion/react": "^11.4.1",
"@emotion/styled": "^11.3.0",
"@superset-ui/chart-controls": "^0.18.2",
"@superset-ui/core": "^0.18.2",
"@superset-ui/legacy-plugin-chart-calendar": "^0.18.2",
"@superset-ui/legacy-plugin-chart-chord": "^0.18.2",
"@superset-ui/legacy-plugin-chart-country-map": "^0.18.2",
"@superset-ui/legacy-plugin-chart-event-flow": "^0.18.2",
"@superset-ui/legacy-plugin-chart-force-directed": "^0.18.2",
"@superset-ui/legacy-plugin-chart-heatmap": "^0.18.2",
"@superset-ui/legacy-plugin-chart-histogram": "^0.18.2",
"@superset-ui/legacy-plugin-chart-horizon": "^0.18.2",
"@superset-ui/legacy-plugin-chart-map-box": "^0.18.2",
"@superset-ui/legacy-plugin-chart-paired-t-test": "^0.18.2",
"@superset-ui/legacy-plugin-chart-parallel-coordinates": "^0.18.2",
"@superset-ui/legacy-plugin-chart-partition": "^0.18.2",
"@superset-ui/legacy-plugin-chart-pivot-table": "^0.18.2",
"@superset-ui/legacy-plugin-chart-rose": "^0.18.2",
"@superset-ui/legacy-plugin-chart-sankey": "^0.18.2",
"@superset-ui/legacy-plugin-chart-sankey-loop": "^0.18.2",
"@superset-ui/legacy-plugin-chart-sunburst": "^0.18.2",
"@superset-ui/legacy-plugin-chart-treemap": "^0.18.2",
"@superset-ui/legacy-plugin-chart-world-map": "^0.18.2",
"@superset-ui/legacy-preset-chart-big-number": "^0.18.2",
"@superset-ui/chart-controls": "^0.18.4",
"@superset-ui/core": "^0.18.4",
"@superset-ui/legacy-plugin-chart-calendar": "^0.18.4",
"@superset-ui/legacy-plugin-chart-chord": "^0.18.4",
"@superset-ui/legacy-plugin-chart-country-map": "^0.18.4",
"@superset-ui/legacy-plugin-chart-event-flow": "^0.18.4",
"@superset-ui/legacy-plugin-chart-force-directed": "^0.18.4",
"@superset-ui/legacy-plugin-chart-heatmap": "^0.18.4",
"@superset-ui/legacy-plugin-chart-histogram": "^0.18.4",
"@superset-ui/legacy-plugin-chart-horizon": "^0.18.4",
"@superset-ui/legacy-plugin-chart-map-box": "^0.18.4",
"@superset-ui/legacy-plugin-chart-paired-t-test": "^0.18.4",
"@superset-ui/legacy-plugin-chart-parallel-coordinates": "^0.18.4",
"@superset-ui/legacy-plugin-chart-partition": "^0.18.4",
"@superset-ui/legacy-plugin-chart-pivot-table": "^0.18.4",
"@superset-ui/legacy-plugin-chart-rose": "^0.18.4",
"@superset-ui/legacy-plugin-chart-sankey": "^0.18.4",
"@superset-ui/legacy-plugin-chart-sankey-loop": "^0.18.4",
"@superset-ui/legacy-plugin-chart-sunburst": "^0.18.4",
"@superset-ui/legacy-plugin-chart-treemap": "^0.18.4",
"@superset-ui/legacy-plugin-chart-world-map": "^0.18.4",
"@superset-ui/legacy-preset-chart-big-number": "^0.18.4",
"@superset-ui/legacy-preset-chart-deckgl": "^0.4.12",
"@superset-ui/legacy-preset-chart-nvd3": "^0.18.2",
"@superset-ui/plugin-chart-echarts": "^0.18.2",
"@superset-ui/plugin-chart-pivot-table": "^0.18.2",
"@superset-ui/plugin-chart-table": "^0.18.2",
"@superset-ui/plugin-chart-word-cloud": "^0.18.2",
"@superset-ui/preset-chart-xy": "^0.18.2",
"@superset-ui/legacy-preset-chart-nvd3": "^0.18.4",
"@superset-ui/plugin-chart-echarts": "^0.18.4",
"@superset-ui/plugin-chart-pivot-table": "^0.18.4",
"@superset-ui/plugin-chart-table": "^0.18.4",
"@superset-ui/plugin-chart-word-cloud": "^0.18.4",
"@superset-ui/preset-chart-xy": "^0.18.4",
"@vx/responsive": "^0.0.195",
"abortcontroller-polyfill": "^1.1.9",
"antd": "^4.9.4",
Expand Down
40 changes: 30 additions & 10 deletions superset/charts/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,18 +639,15 @@ class ChartDataPivotOptionsSchema(ChartDataPostProcessingOperationOptionsSchema)

index = (
fields.List(
fields.String(
allow_none=False,
description="Columns to group by on the table index (=rows)",
),
fields.String(allow_none=False),
description="Columns to group by on the table index (=rows)",
minLength=1,
required=True,
),
)
columns = fields.List(
fields.String(
allow_none=False, description="Columns to group by on the table columns",
),
fields.String(allow_none=False),
description="Columns to group by on the table columns",
)
metric_fill_value = fields.Number(
description="Value to replace missing values with in aggregate calculations.",
Expand Down Expand Up @@ -964,7 +961,9 @@ class Meta: # pylint: disable=too-few-public-methods
deprecated=True,
)
groupby = fields.List(
fields.String(description="Columns by which to group the query.",),
fields.String(),
description="Columns by which to group the query. "
"This field is deprecated, use `columns` instead.",
allow_none=True,
)
metrics = fields.List(
Expand Down Expand Up @@ -1012,12 +1011,33 @@ class Meta: # pylint: disable=too-few-public-methods
is_timeseries = fields.Boolean(
description="Is the `query_object` a timeseries.", allow_none=True,
)
series_columns = fields.List(
fields.String(),
description="Columns to use when limiting series count. "
"All columns must be present in the `columns` property. "
"Requires `series_limit` and `series_limit_metric` to be set.",
allow_none=True,
)
series_limit = fields.Integer(
description="Maximum number of series. "
"Requires `series` and `series_limit_metric` to be set.",
allow_none=True,
)
series_limit_metric = fields.Raw(
description="Metric used to limit timeseries queries by. "
"Requires `series` and `series_limit` to be set.",
allow_none=True,
)
timeseries_limit = fields.Integer(
description="Maximum row count for timeseries queries. Default: `0`",
description="Maximum row count for timeseries queries. "
"This field is deprecated, use `series_limit` instead."
"Default: `0`",
allow_none=True,
)
timeseries_limit_metric = fields.Raw(
description="Metric used to limit timeseries queries by.", allow_none=True,
description="Metric used to limit timeseries queries by. "
"This field is deprecated, use `series_limit_metric` instead.",
allow_none=True,
)
row_limit = fields.Integer(
description='Maximum row count (0=disabled). Default: `config["ROW_LIMIT"]`',
Expand Down
1 change: 0 additions & 1 deletion superset/common/query_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,6 @@ def _get_samples(
query_obj = copy.copy(query_obj)
query_obj.is_timeseries = False
query_obj.orderby = []
query_obj.groupby = []
query_obj.metrics = []
query_obj.post_processing = []
query_obj.row_limit = min(row_limit, config["SAMPLES_ROW_LIMIT"])
Expand Down
1 change: 0 additions & 1 deletion superset/common/query_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,6 @@ def get_df_payload(
invalid_columns = [
col
for col in query_obj.columns
+ query_obj.groupby
+ get_column_names_from_metrics(query_obj.metrics or [])
if col not in self.datasource.column_names and col != DTTM_ALIAS
]
Expand Down
110 changes: 64 additions & 46 deletions superset/common/query_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ class DeprecatedField(NamedTuple):

DEPRECATED_FIELDS = (
DeprecatedField(old_name="granularity_sqla", new_name="granularity"),
DeprecatedField(old_name="groupby", new_name="columns"),
DeprecatedField(old_name="timeseries_limit", new_name="series_limit"),
DeprecatedField(old_name="timeseries_limit_metric", new_name="series_limit_metric"),
)

DEPRECATED_EXTRAS_FIELDS = (
Expand All @@ -74,63 +77,68 @@ class QueryObject: # pylint: disable=too-many-instance-attributes
annotation_layers: List[Dict[str, Any]]
applied_time_extras: Dict[str, str]
apply_fetch_values_predicate: bool
granularity: Optional[str]
columns: List[str]
datasource: Optional[BaseDatasource]
extras: Dict[str, Any]
filter: List[QueryObjectFilterClause]
from_dttm: Optional[datetime]
to_dttm: Optional[datetime]
granularity: Optional[str]
inner_from_dttm: Optional[datetime]
inner_to_dttm: Optional[datetime]
is_rowcount: bool
is_timeseries: bool
time_shift: Optional[timedelta]
groupby: List[str]
metrics: Optional[List[Metric]]
row_limit: int
row_offset: int
filter: List[QueryObjectFilterClause]
timeseries_limit: int
timeseries_limit_metric: Optional[Metric]
order_desc: bool
extras: Dict[str, Any]
columns: List[str]
orderby: List[OrderBy]
post_processing: List[Dict[str, Any]]
datasource: Optional[BaseDatasource]
metrics: Optional[List[Metric]]
result_type: Optional[ChartDataResultType]
is_rowcount: bool
row_limit: int
row_offset: int
series_columns: List[str]
series_limit: int
series_limit_metric: Optional[Metric]
time_offsets: List[str]
time_shift: Optional[timedelta]
to_dttm: Optional[datetime]
post_processing: List[Dict[str, Any]]
Comment on lines -77 to +102
Copy link
Member Author

@villebro villebro Sep 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In hindsight I should not have reordered these in this PR to keep it as easily reviewable as possible (my sincere apologies to reviewers!), but having these non-ordered annoyed me so much I couldn't help myself. The point here is I've added series_columns, series_limit and series_limit_metric and removed timeseries_limit and timeseries_limit_metric as they're picked up by kwargs and handled in the deprecated mapping methods.


def __init__( # pylint: disable=too-many-arguments,too-many-locals
self,
datasource: Optional[DatasourceDict] = None,
result_type: Optional[ChartDataResultType] = None,
annotation_layers: Optional[List[Dict[str, Any]]] = None,
applied_time_extras: Optional[Dict[str, str]] = None,
apply_fetch_values_predicate: bool = False,
granularity: Optional[str] = None,
metrics: Optional[List[Metric]] = None,
groupby: Optional[List[str]] = None,
columns: Optional[List[str]] = None,
datasource: Optional[DatasourceDict] = None,
extras: Optional[Dict[str, Any]] = None,
filters: Optional[List[QueryObjectFilterClause]] = None,
time_range: Optional[str] = None,
time_shift: Optional[str] = None,
granularity: Optional[str] = None,
is_rowcount: bool = False,
is_timeseries: Optional[bool] = None,
timeseries_limit: int = 0,
row_limit: Optional[int] = None,
row_offset: Optional[int] = None,
timeseries_limit_metric: Optional[Metric] = None,
metrics: Optional[List[Metric]] = None,
order_desc: bool = True,
extras: Optional[Dict[str, Any]] = None,
columns: Optional[List[str]] = None,
orderby: Optional[List[OrderBy]] = None,
post_processing: Optional[List[Optional[Dict[str, Any]]]] = None,
is_rowcount: bool = False,
result_type: Optional[ChartDataResultType] = None,
row_limit: Optional[int] = None,
row_offset: Optional[int] = None,
series_columns: Optional[List[str]] = None,
series_limit: int = 0,
series_limit_metric: Optional[Metric] = None,
time_range: Optional[str] = None,
time_shift: Optional[str] = None,
**kwargs: Any,
):
columns = columns or []
groupby = groupby or []
extras = extras or {}
annotation_layers = annotation_layers or []
self.time_offsets = kwargs.get("time_offsets", [])
self.inner_from_dttm = kwargs.get("inner_from_dttm")
self.inner_to_dttm = kwargs.get("inner_to_dttm")
if series_columns:
self.series_columns = series_columns
elif is_timeseries and metrics:
self.series_columns = columns
else:
self.series_columns = []
Copy link
Member

@ktmud ktmud Sep 14, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.series_columns = series_columns or (columns if is_timeseries and metrics else [])

Would this be more Pythonic?

I'm also wondering whether we should have another layer of parameter consolidation before QueryObject to handle all the special overrides & fallbacks for legacy/deprecated parameters (e.g. timeseries_limit vs series_limit, columns vs groupby, sortby vs orderby). By isolating parameter consolidation in the Flask view handler layer, we reduce the number of parameters for QueryObject itself and simply all downstream functions, which may help cleaning up deprecated code faster---all without affecting backward compatibility.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.series_columns = series_columns or (columns if is_timeseries and metrics else [])

Would this be more Pythonic?

IMO the current implementation is more pythonic/readable, as it doesn't require unnesting the inline logic when reading it. But I'm ok with the proposed snippet, too. I'm curious to hear what @dpgaspar thinks - it would be nice to add something on this to the code style guide.

I'm also wondering whether we should have another layer of parameter consolidation before QueryObject so to handle all the special overrides & fallbacks for legacy/deprecated parameters (e.g. timeseries_limit vs series_limit, columns vs groupby, sortby vs orderby). By isolating parameter consolidation in the Flask view handler layer, we reduce the number of parameters for QueryObject itself and simply all downstream functions, which may help cleaning up deprecated code faster---all without affecting backward compatibility.

I wanted to restrict this PR to the bare minimum amount of changes to contain the blast radius (whenever we're touching this type of core logic there's serious risk of regressions). I absolutely agree we should consolidate more (and should finish the consolidation work before cutting 2.0), but I'd propose doing it across multiple PRs to avoid making one big bang PR.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no strong opinions here, but, I do like to use simple ternaries. But current implementation seems more readable to me because of the nested ternary.


self.is_rowcount = is_rowcount
self.datasource = None
Expand Down Expand Up @@ -161,9 +169,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
# is_timeseries is True if time column is in either columns or groupby
# (both are dimensions)
self.is_timeseries = (
is_timeseries
if is_timeseries is not None
else DTTM_ALIAS in columns + groupby
is_timeseries if is_timeseries is not None else DTTM_ALIAS in columns
)
self.time_range = time_range
self.time_shift = parse_human_timedelta(time_shift)
Expand All @@ -183,8 +189,8 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
self.row_limit = config["ROW_LIMIT"] if row_limit is None else row_limit
self.row_offset = row_offset or 0
self.filter = filters or []
self.timeseries_limit = timeseries_limit
self.timeseries_limit_metric = timeseries_limit_metric
self.series_limit = series_limit
self.series_limit_metric = series_limit_metric
self.order_desc = order_desc
self.extras = extras

Expand All @@ -194,9 +200,12 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
)

self.columns = columns
self.groupby = groupby or []
self.orderby = orderby or []

self._rename_deprecated_fields(kwargs)
self._move_deprecated_extra_fields(kwargs)

def _rename_deprecated_fields(self, kwargs: Dict[str, Any]) -> None:
# rename deprecated fields
for field in DEPRECATED_FIELDS:
if field.old_name in kwargs:
Expand All @@ -216,6 +225,7 @@ def __init__( # pylint: disable=too-many-arguments,too-many-locals
)
setattr(self, field.new_name, value)

def _move_deprecated_extra_fields(self, kwargs: Dict[str, Any]) -> None:
# move deprecated extras fields to extras
for field in DEPRECATED_EXTRAS_FIELDS:
if field.old_name in kwargs:
Expand Down Expand Up @@ -254,6 +264,14 @@ def validate(
"""Validate query object"""
error: Optional[QueryObjectValidationError] = None
all_labels = self.metric_names + self.column_names
missing_series = [col for col in self.series_columns if col not in self.columns]
if missing_series:
_(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@villebro you forgot to raise the exception here

#16946 fix it

"The following entries in `series_columns` are missing "
"in `columns`: %(columns)s. ",
columns=", ".join(f'"{x}"' for x in missing_series),
)

if len(set(all_labels)) < len(all_labels):
dup_labels = find_duplicates(all_labels)
error = QueryObjectValidationError(
Expand All @@ -270,24 +288,24 @@ def validate(
def to_dict(self) -> Dict[str, Any]:
query_object_dict = {
"apply_fetch_values_predicate": self.apply_fetch_values_predicate,
"granularity": self.granularity,
"groupby": self.groupby,
"columns": self.columns,
"extras": self.extras,
"filter": self.filter,
"from_dttm": self.from_dttm,
"to_dttm": self.to_dttm,
"granularity": self.granularity,
"inner_from_dttm": self.inner_from_dttm,
"inner_to_dttm": self.inner_to_dttm,
"is_rowcount": self.is_rowcount,
"is_timeseries": self.is_timeseries,
"metrics": self.metrics,
"row_limit": self.row_limit,
"row_offset": self.row_offset,
"filter": self.filter,
"timeseries_limit": self.timeseries_limit,
"timeseries_limit_metric": self.timeseries_limit_metric,
"order_desc": self.order_desc,
"extras": self.extras,
"columns": self.columns,
"orderby": self.orderby,
"row_limit": self.row_limit,
"row_offset": self.row_offset,
"series_columns": self.series_columns,
"series_limit": self.series_limit,
"series_limit_metric": self.series_limit_metric,
"to_dttm": self.to_dttm,
}
return query_object_dict

Expand Down
Loading