Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sorting and ambiguous join issue #1127

Merged
merged 14 commits into from
Jan 10, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 69 additions & 0 deletions elide-core/src/main/java/com/yahoo/elide/utils/JoinTrieNode.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Copyright 2019, Yahoo Inc.
* Licensed under the Apache License, Version 2.0
* See LICENSE file in project root for terms.
*/
package com.yahoo.elide.utils;

import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.Path;

import lombok.Data;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;

/**
* This is a structure for storing and de-duplicating elide join paths.
* Basically, it is a Trie which uses relationship field names to navigate through the path.
*/
@Data
public class JoinTrieNode {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a unit test class (JoinTrieNodeTest) to get test coverage to a high percentage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

private final Class<?> type;
private final Map<String, JoinTrieNode> fields = new HashMap<>();

public JoinTrieNode(Class<?> type) {
this.type = type;
}

public void addPaths(Set<Path> paths, EntityDictionary dictionary) {
paths.forEach(path -> addPath(path, dictionary));
}

/**
* Add all path elements into this Trie, starting from the root.
*
* @param path full Elide join path, i.e. <code>foo.bar.baz</code>
* @param dictionary dictionary to use.
*/
public void addPath(Path path, EntityDictionary dictionary) {
JoinTrieNode node = this;

for (Path.PathElement pathElement : path.getPathElements()) {
Class<?> entityClass = pathElement.getType();
String fieldName = pathElement.getFieldName();

if (!dictionary.isRelation(entityClass, fieldName)) {
break;
}

if (!fields.containsKey(fieldName)) {
node.addField(fieldName, new JoinTrieNode(pathElement.getFieldType()));

}

node = fields.get(fieldName);
}
}

/**
* Attach a field to this node.
*
* @param fieldName field name
* @param node field node
*/
private void addField(String fieldName, JoinTrieNode node) {
fields.put(fieldName, node);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ public void close() throws IOException {
@VisibleForTesting
Query buildQuery(EntityProjection entityProjection, RequestScope scope) {
Table table = queryEngine.getTable(entityProjection.getType());

AggregationDataStoreHelper agHelper = new AggregationDataStoreHelper(table, entityProjection);
return agHelper.getQuery();
EntityProjectionTranslator translator = new EntityProjectionTranslator(table,
entityProjection, scope.getDictionary());
return translator.getQuery();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,9 @@
*/
package com.yahoo.elide.datastores.aggregation;

import com.yahoo.elide.core.Path;
import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.exceptions.InvalidOperationException;
import com.yahoo.elide.core.filter.FilterPredicate;
import com.yahoo.elide.core.filter.expression.AndFilterExpression;
import com.yahoo.elide.core.filter.expression.FilterExpression;
import com.yahoo.elide.core.filter.expression.NotFilterExpression;
import com.yahoo.elide.core.filter.expression.OrFilterExpression;
import com.yahoo.elide.datastores.aggregation.filter.visitor.FilterConstraints;
import com.yahoo.elide.datastores.aggregation.filter.visitor.SplitFilterExpressionVisitor;
import com.yahoo.elide.datastores.aggregation.metadata.metric.MetricFunctionInvocation;
Expand All @@ -24,6 +20,7 @@
import com.yahoo.elide.datastores.aggregation.query.Query;
import com.yahoo.elide.datastores.aggregation.query.TimeDimensionProjection;
import com.yahoo.elide.request.Argument;
import com.yahoo.elide.request.Attribute;
import com.yahoo.elide.request.EntityProjection;
import com.yahoo.elide.request.Relationship;

Expand All @@ -39,27 +36,28 @@
/**
* Helper for Aggregation Data Store which does the work associated with extracting {@link Query}.
*/
public class AggregationDataStoreHelper {
public class EntityProjectionTranslator {
private AnalyticView queriedTable;

private EntityProjection entityProjection;
private Set<ColumnProjection> dimensionProjections;
private Set<TimeDimensionProjection> timeDimensions;
private List<MetricFunctionInvocation> metrics;
private FilterExpression whereFilter;
private FilterExpression havingFilter;
private EntityDictionary dictionary;

public AggregationDataStoreHelper(Table table, EntityProjection entityProjection) {
public EntityProjectionTranslator(Table table, EntityProjection entityProjection, EntityDictionary dictionary) {
if (!(table instanceof AnalyticView)) {
throw new InvalidOperationException("Queried table is not analyticView: " + table.getName());
}

this.queriedTable = (AnalyticView) table;
this.entityProjection = entityProjection;

this.dictionary = dictionary;
dimensionProjections = resolveNonTimeDimensions();
timeDimensions = resolveTimeDimensions();
metrics = resolveMetrics();

splitFilters();
}

Expand All @@ -68,7 +66,7 @@ public AggregationDataStoreHelper(Table table, EntityProjection entityProjection
* @return {@link Query} query object with all the parameters provided by user.
*/
public Query getQuery() {
return Query.builder()
Query query = Query.builder()
.analyticView(queriedTable)
.metrics(metrics)
.groupByDimensions(dimensionProjections)
Expand All @@ -78,6 +76,9 @@ public Query getQuery() {
.sorting(entityProjection.getSorting())
.pagination(entityProjection.getPagination())
.build();
QueryValidator validator = new QueryValidator(query, getAllFields(), dictionary, entityProjection.getType());
validator.validate();
return query;
}

/**
Expand All @@ -94,10 +95,6 @@ private void splitFilters() {
FilterConstraints constraints = filterExpression.accept(visitor);
whereFilter = constraints.getWhereExpression();
havingFilter = constraints.getHavingExpression();

if (havingFilter != null) {
validateHavingClause(havingFilter);
}
}

/**
Expand Down Expand Up @@ -187,55 +184,21 @@ private Set<String> getRelationships() {
}

/**
* Validate the having clause before execution. Having clause is not as flexible as where clause,
* the fields in having clause must be either or these two:
* 1. A grouped by dimension in this query
* 2. An aggregated metric in this query
*
* All grouped by dimensions are defined in the entity bean, so the last entity class of a filter path
* must match entity class of the query.
*
* @param havingClause having clause generated from this query
* Gets attribute names from {@link EntityProjection}.
* @return relationships list of {@link Attribute} names
*/
private void validateHavingClause(FilterExpression havingClause) {
// TODO: support having clause for alias
if (havingClause instanceof FilterPredicate) {
Path path = ((FilterPredicate) havingClause).getPath();
Path.PathElement last = path.lastElement().get();
Class<?> cls = last.getType();
String fieldName = last.getFieldName();

if (cls != queriedTable.getCls()) {
throw new InvalidOperationException(
String.format(
"Classes don't match when try filtering on %s in having clause of %s.",
cls.getSimpleName(),
queriedTable.getCls().getSimpleName()));
}

if (queriedTable.isMetric(fieldName)) {
if (metrics.stream().noneMatch(m -> m.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Metric field %s must be aggregated before filtering in having clause.",
fieldName));
}
} else {
if (dimensionProjections.stream().noneMatch(dim -> dim.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Dimension field %s must be grouped before filtering in having clause.",
fieldName));
}
}
} else if (havingClause instanceof AndFilterExpression) {
validateHavingClause(((AndFilterExpression) havingClause).getLeft());
validateHavingClause(((AndFilterExpression) havingClause).getRight());
} else if (havingClause instanceof OrFilterExpression) {
validateHavingClause(((OrFilterExpression) havingClause).getLeft());
validateHavingClause(((OrFilterExpression) havingClause).getRight());
} else if (havingClause instanceof NotFilterExpression) {
validateHavingClause(((NotFilterExpression) havingClause).getNegated());
}
private Set<String> getAttributes() {
return entityProjection.getAttributes().stream()
.map(Attribute::getName).collect(Collectors.toCollection(LinkedHashSet::new));
}

/**
* Helper method to get all field names from the {@link EntityProjection}.
* @return allFields set of all field names
*/
private Set<String> getAllFields() {
Set<String> allFields = getAttributes();
allFields.addAll(getRelationships());
return allFields;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
/*
* Copyright 2019, Yahoo Inc.
* Licensed under the Apache License, Version 2.0
* See LICENSE file in project root for terms.
*/
package com.yahoo.elide.datastores.aggregation;

import com.yahoo.elide.core.EntityDictionary;
import com.yahoo.elide.core.Path;
import com.yahoo.elide.core.exceptions.InvalidOperationException;
import com.yahoo.elide.core.filter.FilterPredicate;
import com.yahoo.elide.core.filter.expression.AndFilterExpression;
import com.yahoo.elide.core.filter.expression.FilterExpression;
import com.yahoo.elide.core.filter.expression.NotFilterExpression;
import com.yahoo.elide.core.filter.expression.OrFilterExpression;
import com.yahoo.elide.core.sort.Sorting;
import com.yahoo.elide.datastores.aggregation.metadata.metric.MetricFunctionInvocation;
import com.yahoo.elide.datastores.aggregation.metadata.models.AnalyticView;
import com.yahoo.elide.datastores.aggregation.query.ColumnProjection;
import com.yahoo.elide.datastores.aggregation.query.Query;
import org.apache.commons.collections.CollectionUtils;

import java.util.List;
import java.util.Map;
import java.util.Set;

public class QueryValidator {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's create a Unit Test class (QueryValidatorTest) for this class and verify all of the corner cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

QueryValidator and EntityProjectionTranslator are tested in integration test.

Copy link
Member

@aklish aklish Jan 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be doing more testing in unit tests. Unit test are easer to maintain. They also execute faster and keep the build times more reasonable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍


private Query query;
private Set<String> allFields;
private EntityDictionary dictionary;
private Class<?> type;
private AnalyticView queriedTable;
private List<MetricFunctionInvocation> metrics;
private Set<ColumnProjection> dimensionProjections;

public QueryValidator(Query query, Set<String> allFields, EntityDictionary dictionary, Class<?> type) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The set of all fields should be contained within the query itself. Instead of passing this in as an argument, the QueryValidator can build the list of all the fields from the query.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment applies to the type argument.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type is fixed, for the fields one, the projections in the query are using alias, but the sorting clauses is still using fieldname, so I think it is ok to keep allFields for now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

this.query = query;
this.allFields = allFields;
this.dictionary = dictionary;
this.type = type;
this.queriedTable = query.getAnalyticView();
this.metrics = query.getMetrics();
this.dimensionProjections = query.getDimensions();
}

/**
* Method that handles all checks to make sure query is valid before we attempt to execute the query.
*/
public void validate() {
FilterExpression havingClause = query.getHavingFilter();
validateHavingClause(havingClause);
validateSorting();
validateMetricFunction();
}

/**
* Checks to make sure at least one metric is being aggregated on.
*/
private void validateMetricFunction() {
if (CollectionUtils.isEmpty(metrics)) {
throw new InvalidOperationException("Must provide at least one metric in query");
}
}

/**
* Validate the having clause before execution. Having clause is not as flexible as where clause,
* the fields in having clause must be either or these two:
* 1. A grouped by dimension in this query
* 2. An aggregated metric in this query
*
* All grouped by dimensions are defined in the entity bean, so the last entity class of a filter path
* must match entity class of the query.
*
* @param havingClause having clause generated from this query
*/
private void validateHavingClause(FilterExpression havingClause) {
// TODO: support having clause for alias
if (havingClause instanceof FilterPredicate) {
Path path = ((FilterPredicate) havingClause).getPath();
Path.PathElement last = path.lastElement().get();
Class<?> cls = last.getType();
String fieldName = last.getFieldName();

if (cls != queriedTable.getCls()) {
throw new InvalidOperationException(
String.format(
"Classes don't match when try filtering on %s in having clause of %s.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should try to make the errors friendly to a client of Elide - so we should remove things like "classes".
Let's rephrase in terms of what is wrong with the API request.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

cls.getSimpleName(),
queriedTable.getCls().getSimpleName()));
}

if (queriedTable.isMetric(fieldName)) {
if (metrics.stream().noneMatch(m -> m.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Metric field %s must be aggregated before filtering in having clause.",
fieldName));
}
} else {
if (dimensionProjections.stream().noneMatch(dim -> dim.getAlias().equals(fieldName))) {
throw new InvalidOperationException(
String.format(
"Dimension field %s must be grouped before filtering in having clause.",
fieldName));
}
}
} else if (havingClause instanceof AndFilterExpression) {
validateHavingClause(((AndFilterExpression) havingClause).getLeft());
validateHavingClause(((AndFilterExpression) havingClause).getRight());
} else if (havingClause instanceof OrFilterExpression) {
validateHavingClause(((OrFilterExpression) havingClause).getLeft());
validateHavingClause(((OrFilterExpression) havingClause).getRight());
} else if (havingClause instanceof NotFilterExpression) {
validateHavingClause(((NotFilterExpression) havingClause).getNegated());
}
}

/**
* Method to verify that all the sorting options provided
* by the user are valid and supported.
*/
public void validateSorting() {
Sorting sorting = query.getSorting();
if (sorting == null) {
return;
}
Map<Path, Sorting.SortOrder> sortClauses = sorting.getValidSortingRules(type, dictionary);
sortClauses.keySet().forEach((path) -> validateSortingPath(path, allFields));
}

/**
* Verifies that the current path can be sorted on
* @param path The path that we are validating
* @param allFields Set of all field names included in initial query
*/
private void validateSortingPath(Path path, Set<String> allFields) {
List<Path.PathElement> pathElemenets = path.getPathElements();

//TODO add support for double nested sorting
if (pathElemenets.size() > 2) {
throw new UnsupportedOperationException(
"Currently sorting on double nested fields is not supported");
}
Path.PathElement currentElement = pathElemenets.get(0);
String currentField = currentElement.getFieldName();
Class<?> currentClass = currentElement.getType();
if (!allFields.stream().anyMatch(field -> field.equals(currentField))) {
throw new InvalidOperationException("Can't sort on " + currentField + " as it is not present in query");
}
if (dictionary.getIdFieldName(currentClass).equals(currentField)
|| currentField.equals(EntityDictionary.REGULAR_ID_NAME)) {
throw new InvalidOperationException("Sorting on id field is not permitted");
}
}
}
Loading