-
Notifications
You must be signed in to change notification settings - Fork 230
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix sorting and ambiguous join issue #1127
Changes from 6 commits
75fb02c
6c5760b
0e29d41
d74a853
1a9857c
84dfe53
3e6577d
2098cf8
e059910
b26bf25
0dc6072
5c0503f
221ff8a
c05c595
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* | ||
* Copyright 2019, Yahoo Inc. | ||
* Licensed under the Apache License, Version 2.0 | ||
* See LICENSE file in project root for terms. | ||
*/ | ||
package com.yahoo.elide.utils; | ||
|
||
import com.yahoo.elide.core.EntityDictionary; | ||
import com.yahoo.elide.core.Path; | ||
|
||
import lombok.Data; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
/** | ||
* This is a structure for storing and de-duplicating elide join paths. | ||
* Basically, it is a Trie which uses relationship field names to navigate through the path. | ||
*/ | ||
@Data | ||
public class JoinTrieNode { | ||
private final Class<?> type; | ||
private final Map<String, JoinTrieNode> fields = new HashMap<>(); | ||
|
||
public JoinTrieNode(Class<?> type) { | ||
this.type = type; | ||
} | ||
|
||
public void addPaths(Set<Path> paths, EntityDictionary dictionary) { | ||
paths.forEach(path -> addPath(path, dictionary)); | ||
} | ||
|
||
/** | ||
* Add all path elements into this Trie, starting from the root. | ||
* | ||
* @param path full Elide join path, i.e. <code>foo.bar.baz</code> | ||
* @param dictionary dictionary to use. | ||
*/ | ||
public void addPath(Path path, EntityDictionary dictionary) { | ||
JoinTrieNode node = this; | ||
|
||
for (Path.PathElement pathElement : path.getPathElements()) { | ||
Class<?> entityClass = pathElement.getType(); | ||
String fieldName = pathElement.getFieldName(); | ||
|
||
if (!dictionary.isRelation(entityClass, fieldName)) { | ||
break; | ||
} | ||
|
||
if (!fields.containsKey(fieldName)) { | ||
node.addField(fieldName, new JoinTrieNode(pathElement.getFieldType())); | ||
|
||
} | ||
|
||
node = fields.get(fieldName); | ||
} | ||
} | ||
|
||
/** | ||
* Attach a field to this node. | ||
* | ||
* @param fieldName field name | ||
* @param node field node | ||
*/ | ||
private void addField(String fieldName, JoinTrieNode node) { | ||
fields.put(fieldName, node); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
/* | ||
* Copyright 2019, Yahoo Inc. | ||
* Licensed under the Apache License, Version 2.0 | ||
* See LICENSE file in project root for terms. | ||
*/ | ||
package com.yahoo.elide.datastores.aggregation; | ||
|
||
import com.yahoo.elide.core.EntityDictionary; | ||
import com.yahoo.elide.core.Path; | ||
import com.yahoo.elide.core.exceptions.InvalidOperationException; | ||
import com.yahoo.elide.core.filter.FilterPredicate; | ||
import com.yahoo.elide.core.filter.expression.AndFilterExpression; | ||
import com.yahoo.elide.core.filter.expression.FilterExpression; | ||
import com.yahoo.elide.core.filter.expression.NotFilterExpression; | ||
import com.yahoo.elide.core.filter.expression.OrFilterExpression; | ||
import com.yahoo.elide.core.sort.Sorting; | ||
import com.yahoo.elide.datastores.aggregation.metadata.metric.MetricFunctionInvocation; | ||
import com.yahoo.elide.datastores.aggregation.metadata.models.AnalyticView; | ||
import com.yahoo.elide.datastores.aggregation.query.ColumnProjection; | ||
import com.yahoo.elide.datastores.aggregation.query.Query; | ||
import org.apache.commons.collections.CollectionUtils; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
public class QueryValidator { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's create a Unit Test class (QueryValidatorTest) for this class and verify all of the corner cases. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should be doing more testing in unit tests. Unit test are easer to maintain. They also execute faster and keep the build times more reasonable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
|
||
private Query query; | ||
private Set<String> allFields; | ||
private EntityDictionary dictionary; | ||
private Class<?> type; | ||
private AnalyticView queriedTable; | ||
private List<MetricFunctionInvocation> metrics; | ||
private Set<ColumnProjection> dimensionProjections; | ||
|
||
public QueryValidator(Query query, Set<String> allFields, EntityDictionary dictionary, Class<?> type) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The set of all fields should be contained within the query itself. Instead of passing this in as an argument, the QueryValidator can build the list of all the fields from the query. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment applies to the type argument. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Type is fixed, for the fields one, the projections in the query are using alias, but the sorting clauses is still using fieldname, so I think it is ok to keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
this.query = query; | ||
this.allFields = allFields; | ||
this.dictionary = dictionary; | ||
this.type = type; | ||
this.queriedTable = query.getAnalyticView(); | ||
this.metrics = query.getMetrics(); | ||
this.dimensionProjections = query.getDimensions(); | ||
} | ||
|
||
/** | ||
* Method that handles all checks to make sure query is valid before we attempt to execute the query. | ||
*/ | ||
public void validate() { | ||
FilterExpression havingClause = query.getHavingFilter(); | ||
validateHavingClause(havingClause); | ||
validateSorting(); | ||
validateMetricFunction(); | ||
} | ||
|
||
/** | ||
* Checks to make sure at least one metric is being aggregated on. | ||
*/ | ||
private void validateMetricFunction() { | ||
if (CollectionUtils.isEmpty(metrics)) { | ||
throw new InvalidOperationException("Must provide at least one metric in query"); | ||
} | ||
} | ||
|
||
/** | ||
* Validate the having clause before execution. Having clause is not as flexible as where clause, | ||
* the fields in having clause must be either or these two: | ||
* 1. A grouped by dimension in this query | ||
* 2. An aggregated metric in this query | ||
* | ||
* All grouped by dimensions are defined in the entity bean, so the last entity class of a filter path | ||
* must match entity class of the query. | ||
* | ||
* @param havingClause having clause generated from this query | ||
*/ | ||
private void validateHavingClause(FilterExpression havingClause) { | ||
// TODO: support having clause for alias | ||
if (havingClause instanceof FilterPredicate) { | ||
Path path = ((FilterPredicate) havingClause).getPath(); | ||
Path.PathElement last = path.lastElement().get(); | ||
Class<?> cls = last.getType(); | ||
String fieldName = last.getFieldName(); | ||
|
||
if (cls != queriedTable.getCls()) { | ||
throw new InvalidOperationException( | ||
String.format( | ||
"Classes don't match when try filtering on %s in having clause of %s.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we should try to make the errors friendly to a client of Elide - so we should remove things like "classes". There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 |
||
cls.getSimpleName(), | ||
queriedTable.getCls().getSimpleName())); | ||
} | ||
|
||
if (queriedTable.isMetric(fieldName)) { | ||
if (metrics.stream().noneMatch(m -> m.getAlias().equals(fieldName))) { | ||
throw new InvalidOperationException( | ||
String.format( | ||
"Metric field %s must be aggregated before filtering in having clause.", | ||
fieldName)); | ||
} | ||
} else { | ||
if (dimensionProjections.stream().noneMatch(dim -> dim.getAlias().equals(fieldName))) { | ||
throw new InvalidOperationException( | ||
String.format( | ||
"Dimension field %s must be grouped before filtering in having clause.", | ||
fieldName)); | ||
} | ||
} | ||
} else if (havingClause instanceof AndFilterExpression) { | ||
validateHavingClause(((AndFilterExpression) havingClause).getLeft()); | ||
validateHavingClause(((AndFilterExpression) havingClause).getRight()); | ||
} else if (havingClause instanceof OrFilterExpression) { | ||
validateHavingClause(((OrFilterExpression) havingClause).getLeft()); | ||
validateHavingClause(((OrFilterExpression) havingClause).getRight()); | ||
} else if (havingClause instanceof NotFilterExpression) { | ||
validateHavingClause(((NotFilterExpression) havingClause).getNegated()); | ||
} | ||
} | ||
|
||
/** | ||
* Method to verify that all the sorting options provided | ||
* by the user are valid and supported. | ||
*/ | ||
public void validateSorting() { | ||
Sorting sorting = query.getSorting(); | ||
if (sorting == null) { | ||
return; | ||
} | ||
Map<Path, Sorting.SortOrder> sortClauses = sorting.getValidSortingRules(type, dictionary); | ||
sortClauses.keySet().forEach((path) -> validateSortingPath(path, allFields)); | ||
} | ||
|
||
/** | ||
* Verifies that the current path can be sorted on | ||
* @param path The path that we are validating | ||
* @param allFields Set of all field names included in initial query | ||
*/ | ||
private void validateSortingPath(Path path, Set<String> allFields) { | ||
List<Path.PathElement> pathElemenets = path.getPathElements(); | ||
|
||
//TODO add support for double nested sorting | ||
if (pathElemenets.size() > 2) { | ||
throw new UnsupportedOperationException( | ||
"Currently sorting on double nested fields is not supported"); | ||
} | ||
Path.PathElement currentElement = pathElemenets.get(0); | ||
String currentField = currentElement.getFieldName(); | ||
Class<?> currentClass = currentElement.getType(); | ||
if (!allFields.stream().anyMatch(field -> field.equals(currentField))) { | ||
throw new InvalidOperationException("Can't sort on " + currentField + " as it is not present in query"); | ||
} | ||
if (dictionary.getIdFieldName(currentClass).equals(currentField) | ||
|| currentField.equals(EntityDictionary.REGULAR_ID_NAME)) { | ||
throw new InvalidOperationException("Sorting on id field is not permitted"); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Let's add a unit test class (JoinTrieNodeTest) to get test coverage to a high percentage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍