-
Notifications
You must be signed in to change notification settings - Fork 3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
fix(browse): Fix browse pagination and multi-browse path issue #2984
Changes from 1 commit
41b28dc
96d9d6d
5d90a33
096a1e0
b949cf4
8a0e921
9c70b0d
983d7fc
07599d8
1cdf6fa
7c953ac
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,13 @@ public interface SearchService { | |
*/ | ||
void clear(); | ||
|
||
/** | ||
* Get the number of documents corresponding to the entity | ||
* | ||
* @param entityName name of the entity | ||
*/ | ||
long docCount(@Nonnull String entityName); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does this hold up given Timeseries aspects? Can we call this getSnapshotDocCount or something of the type to illustrate this divergence? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Timeseries service doesn't implement SearchService. This is specific to search backend (which is only used by versioned aspects) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh okay got it |
||
|
||
/** | ||
* Updates or inserts the given search document. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,7 +18,6 @@ | |
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
@@ -37,7 +36,7 @@ | |
import org.elasticsearch.search.SearchHit; | ||
import org.elasticsearch.search.aggregations.AggregationBuilder; | ||
import org.elasticsearch.search.aggregations.AggregationBuilders; | ||
import org.elasticsearch.search.aggregations.BucketOrder; | ||
import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation; | ||
import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude; | ||
import org.elasticsearch.search.aggregations.bucket.terms.ParsedTerms; | ||
import org.elasticsearch.search.builder.SearchSourceBuilder; | ||
|
@@ -57,6 +56,9 @@ public class ESBrowseDAO { | |
private static final String URN = "urn"; | ||
private static final String REMOVED = "removed"; | ||
|
||
private static final String GROUP_AGG = "groups"; | ||
private static final String ALL_PATHS = "allPaths"; | ||
|
||
/** | ||
* Gets a list of groups/entities that match given browse request. | ||
* | ||
|
@@ -74,14 +76,26 @@ public BrowseResult browse(@Nonnull String entityName, @Nonnull String path, @Nu | |
|
||
try { | ||
final String indexName = indexConvention.getIndexName(entityRegistry.getEntitySpec(entityName)); | ||
|
||
final SearchResponse groupsResponse = | ||
client.search(constructGroupsSearchRequest(indexName, path, requestMap, 0, 1000), RequestOptions.DEFAULT); | ||
client.search(constructGroupsSearchRequest(indexName, path, requestMap), RequestOptions.DEFAULT); | ||
final BrowseResultMetadata browseResultMetadata = extractGroupsResponse(groupsResponse, path, from, size); | ||
final int numGroups = browseResultMetadata.getTotalNumEntities().intValue(); | ||
|
||
// Based on the number of groups returned, compute the from and size to query for entities | ||
int entityFrom = Math.max(from - numGroups, 0); | ||
int entitySize = Math.min(Math.max(from + size - numGroups, 0), size); | ||
final SearchResponse entitiesResponse = | ||
client.search(constructEntitiesSearchRequest(indexName, path, requestMap, from, size), | ||
client.search(constructEntitiesSearchRequest(indexName, path, requestMap, entityFrom, entitySize), | ||
RequestOptions.DEFAULT); | ||
Comment on lines
99
to
101
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. small optimization available here- if entitySize is 0 we don't have to do this query There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. or do we need to for counts? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. I was going to do that until you mentioned we need the whole counts |
||
final BrowseResult result = extractQueryResult(groupsResponse, entitiesResponse, path, from); | ||
result.getMetadata().setPath(path); | ||
return result; | ||
final int numEntities = (int) entitiesResponse.getHits().getTotalHits().value; | ||
final List<BrowseResultEntity> browseResultEntityList = extractEntitiesResponse(entitiesResponse, path); | ||
|
||
return new BrowseResult().setMetadata(browseResultMetadata) | ||
.setEntities(new BrowseResultEntityArray(browseResultEntityList)) | ||
.setNumEntities(numGroups + numEntities) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be numElements or total or some other name that indicates it is a sum of entities + groups? I would expect numEntities to be numEntities + group.map(group -> group.count).sum() There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah the naming here is confusing... I'd agree I'd think num entities would be either a. The number of concrete entities matching the exact path OR In fact, it may be useful to include both of these in the response separately. Can we nip this in the bud with this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unfortunately BrowseResult PDL is in datahub-gms right now. We can make the above changes after moving them over here. |
||
.setFrom(from) | ||
.setPageSize(size); | ||
} catch (Exception e) { | ||
log.error("Browse query failed: " + e.getMessage()); | ||
throw new ESQueryException("Browse query failed: ", e); | ||
|
@@ -99,11 +113,11 @@ private AggregationBuilder buildAggregations(@Nonnull String path) { | |
final String includeFilter = ESUtils.escapeReservedCharacters(path) + "/.*"; | ||
final String excludeFilter = ESUtils.escapeReservedCharacters(path) + "/.*/.*"; | ||
|
||
return AggregationBuilders.terms("groups") | ||
return AggregationBuilders.terms(GROUP_AGG) | ||
.field(BROWSE_PATH) | ||
.size(Integer.MAX_VALUE) | ||
.order(BucketOrder.count(true)) // Ascending order | ||
.includeExclude(new IncludeExclude(includeFilter, excludeFilter)); | ||
.includeExclude(new IncludeExclude(includeFilter, excludeFilter)) | ||
.subAggregation(AggregationBuilders.terms(ALL_PATHS).field(BROWSE_PATH).size(Integer.MAX_VALUE)); | ||
} | ||
|
||
/** | ||
|
@@ -114,11 +128,10 @@ private AggregationBuilder buildAggregations(@Nonnull String path) { | |
*/ | ||
@Nonnull | ||
protected SearchRequest constructGroupsSearchRequest(@Nonnull String indexName, @Nonnull String path, | ||
@Nonnull Map<String, String> requestMap, int from, int size) { | ||
@Nonnull Map<String, String> requestMap) { | ||
final SearchRequest searchRequest = new SearchRequest(indexName); | ||
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder(); | ||
searchSourceBuilder.from(from); | ||
searchSourceBuilder.size(size); | ||
searchSourceBuilder.size(0); | ||
searchSourceBuilder.query(buildQueryString(path, requestMap, true)); | ||
searchSourceBuilder.aggregation(buildAggregations(path)); | ||
searchRequest.source(searchSourceBuilder); | ||
|
@@ -180,29 +193,6 @@ SearchRequest constructEntitiesSearchRequest(@Nonnull String indexName, @Nonnull | |
return searchRequest; | ||
} | ||
|
||
/** | ||
* Extracts search responses into browse result. | ||
* | ||
* @param groupsResponse groups search response | ||
* @param entitiesResponse entity search response | ||
* @param path the path which is being browsed | ||
* @param from index of first entity | ||
* @return {@link BrowseResult} | ||
*/ | ||
@Nonnull | ||
private BrowseResult extractQueryResult(@Nonnull SearchResponse groupsResponse, | ||
@Nonnull SearchResponse entitiesResponse, @Nonnull String path, int from) { | ||
final List<BrowseResultEntity> browseResultEntityList = extractEntitiesResponse(entitiesResponse, path); | ||
final BrowseResultMetadata browseResultMetadata = extractGroupsResponse(groupsResponse, path); | ||
browseResultMetadata.setTotalNumEntities( | ||
browseResultMetadata.getTotalNumEntities() + entitiesResponse.getHits().getTotalHits().value); | ||
return new BrowseResult().setEntities(new BrowseResultEntityArray(browseResultEntityList)) | ||
.setMetadata(browseResultMetadata) | ||
.setFrom(from) | ||
.setPageSize(browseResultEntityList.size()) | ||
.setNumEntities((int) entitiesResponse.getHits().getTotalHits().value); | ||
} | ||
|
||
/** | ||
* Extracts group search response into browse result metadata. | ||
* | ||
|
@@ -211,21 +201,35 @@ private BrowseResult extractQueryResult(@Nonnull SearchResponse groupsResponse, | |
* @return {@link BrowseResultMetadata} | ||
*/ | ||
@Nonnull | ||
private BrowseResultMetadata extractGroupsResponse(@Nonnull SearchResponse groupsResponse, @Nonnull String path) { | ||
final ParsedTerms groups = (ParsedTerms) groupsResponse.getAggregations().getAsMap().get("groups"); | ||
private BrowseResultMetadata extractGroupsResponse(@Nonnull SearchResponse groupsResponse, @Nonnull String path, | ||
int from, int size) { | ||
final ParsedTerms groups = groupsResponse.getAggregations().get(GROUP_AGG); | ||
final List<BrowseResultGroup> groupsAgg = groups.getBuckets() | ||
.stream() | ||
.filter(this::validateBucket) | ||
.map(group -> new BrowseResultGroup().setName(getSimpleName(group.getKeyAsString())) | ||
.setCount(group.getDocCount())) | ||
// Sort by document count desc and then by name | ||
.sorted(Comparator.<BrowseResultGroup, Long>comparing(BrowseResultGroup::getCount).reversed() | ||
.thenComparing(Comparator.comparing(BrowseResultGroup::getName))) | ||
.collect(Collectors.toList()); | ||
return new BrowseResultMetadata().setGroups(new BrowseResultGroupArray(groupsAgg)) | ||
.setTotalNumEntities(groupsResponse.getHits().getTotalHits().value) | ||
// Get the groups that are in the from to from + size range | ||
final List<BrowseResultGroup> paginatedGroups = | ||
groupsAgg.size() <= from ? Collections.emptyList() : groupsAgg.subList(from, Math.min(from + size, groupsAgg.size())); | ||
return new BrowseResultMetadata().setGroups(new BrowseResultGroupArray(paginatedGroups)) | ||
.setTotalNumEntities(groupsAgg.size()) | ||
.setPath(path); | ||
} | ||
|
||
/** | ||
* Check if there are any paths that extends the matchedPath signifying that the path does not point to an entity | ||
*/ | ||
private boolean validateBucket(@Nonnull MultiBucketsAggregation.Bucket bucket) { | ||
final ParsedTerms groups = bucket.getAggregations().get(ALL_PATHS); | ||
final String matchedPath = bucket.getKeyAsString(); | ||
return groups.getBuckets() | ||
.stream() | ||
.map(MultiBucketsAggregation.Bucket::getKeyAsString) | ||
.anyMatch(bucketPath -> (bucketPath.length() > matchedPath.length() && bucketPath.startsWith(matchedPath))); | ||
} | ||
|
||
/** | ||
* Extracts entity search response into list of browse result entities. | ||
* | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't want to disrupt this PR too much - but what do you think about naming this getTotalEntityCount? getNumEntities is a bit too vague IMO
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done!