Skip to content

Commit

Permalink
search permissions: reduce findDvObject calls #2036
Browse files Browse the repository at this point in the history
The profiler shows significantly better performance when we reduce the
number of calls to findDvObject so we pass around the DvObject itself
more often, rather than looking it up over and over.

The profiler also suggests that batching the commits to Solr may also
yield a good performance gain but that's for a different commit.

The index API endpoint "missing" is removed as part of this commit since
it relies on code that was rewritten as part of this commit but it was
little used anyway since there's a newer "skipIndexed" boolean for other
index endpoints for catching up an index. We can revisit if we want to
re-add the "missing" endpoint.

Finally the createSolrDoc method was used to SearchUtil and tests were
written for it.

Here are the numbers for before and after this commit from running
scripts/issues/2036/grant-role-then-revoke

        | Solr Up   | Solr Down
------- | --------- | -------
grant   | 0m25.653s | 0m9.699s
revoke  | 0m23.539s | 0m9.968s

        | Solr Up   | Solr Down
------- | --------- | -------
grant   | 0m7.422s  | 0m10.348s
revoke  | 0m6.758s  | 0m10.341s
  • Loading branch information
pdurbin committed Jul 16, 2015
1 parent b4e3e74 commit 0d40bd4
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 87 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,8 @@ public DvObject updatePermissionIndexTime(DvObject dvObject) {
* dvObject before we try to set this timestamp? See
* https://github.com/IQSS/dataverse/commit/6ad0ebb272c8cb46368cb76784b55dbf33eea947
*/
DvObject dvObjectToModify = findDvObject(dvObject.getId());
dvObjectToModify.setPermissionIndexTime(new Timestamp(new Date().getTime()));
DvObject savedDvObject = em.merge(dvObjectToModify);
dvObject.setPermissionIndexTime(new Timestamp(new Date().getTime()));
DvObject savedDvObject = em.merge(dvObject);
return savedDvObject;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public Future<String> indexDataverse(Dataverse dataverse) {
}

dvObjectService.updateContentIndexTime(dataverse);
IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse.getId());
IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse);
String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage();
return new AsyncResult<>(msg);

Expand Down
30 changes: 12 additions & 18 deletions src/main/java/edu/harvard/iq/dataverse/api/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -251,21 +251,6 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L
}
}

/**
* Note that this method is best used in a migration scenario because it
* skips the normal Solr doc cleanup of deleting drafts and other versions
* from Solr.
*/
@GET
@Path("missing")
public Response indexMissing() {
/**
* @todo How can we display the result?
*/
Future<String> result = solrIndexService.indexMissing();
return okResponse("index missing started, Solr doc cleanup operations will be skipped");
}

/**
* This is just a demo of the modular math logic we use for indexAll.
*/
Expand Down Expand Up @@ -295,8 +280,13 @@ public Response indexAllPermissions() {
@GET
@Path("perms/{id}")
public Response indexPermissions(@PathParam("id") Long id) {
IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(id);
return okResponse(indexResponse.getMessage());
DvObject dvObject = dvObjectService.findDvObject(id);
if (dvObject == null) {
return errorResponse(Status.BAD_REQUEST, "Could not find DvObject based on id " + id);
} else {
IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dvObject);
return okResponse(indexResponse.getMessage());
}
}

@GET
Expand Down Expand Up @@ -543,7 +533,11 @@ public Response searchPermsDebug(
return errorResponse(Response.Status.UNAUTHORIZED, "Invalid apikey '" + apiToken + "'");
}

List<DvObjectSolrDoc> solrDocs = SolrIndexService.determineSolrDocs(dvObjectId);
DvObject dvObjectToLookUp = dvObjectService.findDvObject(dvObjectId);
if (dvObjectToLookUp == null) {
return errorResponse(Status.BAD_REQUEST, "Could not find DvObject based on id " + dvObjectId);
}
List<DvObjectSolrDoc> solrDocs = SolrIndexService.determineSolrDocs(dvObjectToLookUp);

JsonObjectBuilder data = Json.createObjectBuilder();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException {
/**
* @todo what should we do with the indexRespose?
*/
IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataset.getId());
IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataset);

// set the subject of the parent (all the way up) Dataverses
DatasetField subject = null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public Dataverse execute(CommandContext ctxt) throws CommandException {
/**
* @todo what should we do with the indexRespose?
*/
IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataverse.getId());
IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataverse);
return savedDataverse;

}
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package edu.harvard.iq.dataverse.search;

import edu.harvard.iq.dataverse.IndexServiceBean;
import org.apache.solr.common.SolrInputDocument;

public class SearchUtil {

/**
Expand Down Expand Up @@ -37,4 +40,16 @@ public static String sanitizeQuery(String query) {
return query;
}

public static SolrInputDocument createSolrDoc(DvObjectSolrDoc dvObjectSolrDoc) {
if (dvObjectSolrDoc == null) {
return null;
}
SolrInputDocument solrInputDocument = new SolrInputDocument();
solrInputDocument.addField(SearchFields.ID, dvObjectSolrDoc.getSolrId() + IndexServiceBean.discoverabilityPermissionSuffix);
solrInputDocument.addField(SearchFields.DEFINITION_POINT, dvObjectSolrDoc.getSolrId());
solrInputDocument.addField(SearchFields.DEFINITION_POINT_DVOBJECT_ID, dvObjectSolrDoc.getDvObjectId());
solrInputDocument.addField(SearchFields.DISCOVERABLE_BY, dvObjectSolrDoc.getPermissions());
return solrInputDocument;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -77,13 +77,12 @@ public class SolrIndexServiceBean {
*/
private boolean unpublishedDataRelatedToMeModeEnabled = true;

public List<DvObjectSolrDoc> determineSolrDocs(Long dvObjectId) {
public List<DvObjectSolrDoc> determineSolrDocs(DvObject dvObject) {
List<DvObjectSolrDoc> emptyList = new ArrayList<>();
List<DvObjectSolrDoc> solrDocs = emptyList;
DvObject dvObject = dvObjectService.findDvObject(dvObjectId);
if (dvObject == null) {
return emptyList;
}
List<DvObjectSolrDoc> solrDocs = emptyList;
if (dvObject.isInstanceofDataverse()) {
DvObjectSolrDoc dataverseSolrDoc = constructDataverseSolrDoc((Dataverse) dvObject);
solrDocs.add(dataverseSolrDoc);
Expand Down Expand Up @@ -270,7 +269,7 @@ public IndexResponse indexAllPermissions() {
}
files.add(datafile);
} else {
definitionPoints.addAll(determineSolrDocs(dvObject.getId()));
definitionPoints.addAll(determineSolrDocs(dvObject));
}
}

Expand All @@ -287,7 +286,7 @@ public IndexResponse indexAllPermissions() {

for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) {
logger.info("creating solr doc in memory for " + dvObjectSolrDoc.getSolrId());
SolrInputDocument solrInputDocument = createSolrDoc(dvObjectSolrDoc);
SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc);
logger.info("adding to list of docs to index " + dvObjectSolrDoc.getSolrId());
docs.add(solrInputDocument);
}
Expand All @@ -307,21 +306,24 @@ public IndexResponse indexAllPermissions() {

}

public IndexResponse indexPermissionsForOneDvObject(long dvObjectId) {
public IndexResponse indexPermissionsForOneDvObject(DvObject dvObject) {
if (dvObject == null) {
return new IndexResponse("problem indexing... null DvObject passed in");
}
long dvObjectId = dvObject.getId();
Collection<SolrInputDocument> docs = new ArrayList<>();

List<DvObjectSolrDoc> definitionPoints = determineSolrDocs(dvObjectId);
List<DvObjectSolrDoc> definitionPoints = determineSolrDocs(dvObject);

for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) {
SolrInputDocument solrInputDocument = createSolrDoc(dvObjectSolrDoc);
SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc);
docs.add(solrInputDocument);
}
try {
persistToSolr(docs);
boolean updatePermissionTimeSuccessful = false;
DvObject dvObjectToUpdatePermissionTimeOn = dvObjectService.findDvObject(dvObjectId);
if (dvObjectToUpdatePermissionTimeOn != null) {
DvObject savedDvObject = dvObjectService.updatePermissionIndexTime(dvObjectService.findDvObject(dvObjectToUpdatePermissionTimeOn.getId()));
if (dvObject != null) {
DvObject savedDvObject = dvObjectService.updatePermissionIndexTime(dvObject);
if (savedDvObject != null) {
updatePermissionTimeSuccessful = true;
}
Expand All @@ -333,15 +335,6 @@ public IndexResponse indexPermissionsForOneDvObject(long dvObjectId) {

}

private SolrInputDocument createSolrDoc(DvObjectSolrDoc dvObjectSolrDoc) {
SolrInputDocument solrInputDocument = new SolrInputDocument();
solrInputDocument.addField(SearchFields.ID, dvObjectSolrDoc.getSolrId() + IndexServiceBean.discoverabilityPermissionSuffix);
solrInputDocument.addField(SearchFields.DEFINITION_POINT, dvObjectSolrDoc.getSolrId());
solrInputDocument.addField(SearchFields.DEFINITION_POINT_DVOBJECT_ID, dvObjectSolrDoc.getDvObjectId());
solrInputDocument.addField(SearchFields.DISCOVERABLE_BY, dvObjectSolrDoc.getPermissions());
return solrInputDocument;
}

private void persistToSolr(Collection<SolrInputDocument> docs) throws SolrServerException, IOException {
if (docs.isEmpty()) {
/**
Expand All @@ -364,7 +357,7 @@ private void persistToSolr(Collection<SolrInputDocument> docs) throws SolrServer
* inheritance
*/
public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) {
List<Long> dvObjectsToReindexPermissionsFor = new ArrayList<>();
List<DvObject> dvObjectsToReindexPermissionsFor = new ArrayList<>();
/**
* @todo Re-indexing the definition point itself seems to be necessary
* for revoke but not necessarily grant.
Expand All @@ -375,57 +368,57 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint)
if (definitionPoint.isInstanceofDataverse()) {
Dataverse selfDataverse = (Dataverse) definitionPoint;
if (!selfDataverse.equals(dataverseService.findRootDataverse())) {
dvObjectsToReindexPermissionsFor.add(definitionPoint.getId());
dvObjectsToReindexPermissionsFor.add(definitionPoint);
}
List<Dataset> directChildDatasetsOfDvDefPoint = datasetService.findByOwnerId(selfDataverse.getId());
for (Dataset dataset : directChildDatasetsOfDvDefPoint) {
dvObjectsToReindexPermissionsFor.add(dataset.getId());
for (long fileId : filesToReIndexPermissionsFor(dataset)) {
dvObjectsToReindexPermissionsFor.add(fileId);
dvObjectsToReindexPermissionsFor.add(dataset);
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
dvObjectsToReindexPermissionsFor.add(datafile);
}
}
} else if (definitionPoint.isInstanceofDataset()) {
// index the dataset itself
indexPermissionsForOneDvObject(definitionPoint.getId());
indexPermissionsForOneDvObject(definitionPoint);
// index files
/**
* @todo make this faster, index files in batches
*/
Dataset dataset = (Dataset) definitionPoint;
for (long fileId : filesToReIndexPermissionsFor(dataset)) {
dvObjectsToReindexPermissionsFor.add(fileId);
for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) {
dvObjectsToReindexPermissionsFor.add(datafile);
}
} else {
dvObjectsToReindexPermissionsFor.add(definitionPoint.getId());
dvObjectsToReindexPermissionsFor.add(definitionPoint);
}

List<String> updatePermissionTimeSuccessStatus = new ArrayList<>();
for (Long dvObjectId : dvObjectsToReindexPermissionsFor) {
for (DvObject dvObject : dvObjectsToReindexPermissionsFor) {
/**
* @todo do something with this response
*/
IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObjectId);
IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject);
DvObject managedDefinitionPoint = dvObjectService.updatePermissionIndexTime(definitionPoint);
boolean updatePermissionTimeSuccessful = false;
if (managedDefinitionPoint != null) {
updatePermissionTimeSuccessful = true;
}
updatePermissionTimeSuccessStatus.add(dvObjectId + ":" + updatePermissionTimeSuccessful);
updatePermissionTimeSuccessStatus.add(dvObject + ":" + updatePermissionTimeSuccessful);
}
return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint
+ " (updatePermissionTimeSuccessful:" + updatePermissionTimeSuccessStatus
+ "): " + dvObjectsToReindexPermissionsFor.size()
);
}

private List<Long> filesToReIndexPermissionsFor(Dataset dataset) {
List<Long> filesToReindexPermissionsFor = new ArrayList<>();
private List<DataFile> filesToReIndexPermissionsFor(Dataset dataset) {
List<DataFile> filesToReindexPermissionsFor = new ArrayList<>();
Map<DatasetVersion.VersionState, Boolean> desiredCards = searchPermissionsService.getDesiredCards(dataset);
for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) {
boolean cardShouldExist = desiredCards.get(version.getVersionState());
if (cardShouldExist) {
for (FileMetadata fileMetadata : version.getFileMetadatas()) {
filesToReindexPermissionsFor.add(fileMetadata.getDataFile().getId());
filesToReindexPermissionsFor.add(fileMetadata.getDataFile());
}
}
}
Expand Down Expand Up @@ -453,34 +446,6 @@ public IndexResponse deleteMultipleSolrIds(List<String> solrIdsToDelete) {
return new IndexResponse("no known problem deleting the following documents from Solr:" + solrIdsToDelete);
}

@Asynchronous
public Future<String> indexMissing() {
StringBuilder status = new StringBuilder();
List<Dataverse> stateOrMissingDataverses = indexService.findStaleOrMissingDataverses();
int countOfIndexedDataverses = 0;
for (Dataverse staleOrMissingDataverse : stateOrMissingDataverses) {
Future<String> response = indexService.indexDataverseInNewTransaction(staleOrMissingDataverse);
countOfIndexedDataverses++;
}
status.append("indexed dataverses: " + countOfIndexedDataverses + ", ");
List<Dataset> staleOrMissingDatasets = indexService.findStaleOrMissingDatasets();
int countOfIndexedDatasets = 0;
for (Dataset staleOrMissingDataset : staleOrMissingDatasets) {
indexService.indexDatasetInNewTransaction(staleOrMissingDataset);
countOfIndexedDatasets++;
}
status.append("indexed datasets: " + countOfIndexedDatasets + ", ");
int countOfIndexedPermissions = 0;
List<Long> dvObjectsWithStaleOrMissingPermissions = findPermissionsInDatabaseButStaleInOrMissingFromSolr();
for (long dvObjectId : dvObjectsWithStaleOrMissingPermissions) {
indexPermissionsForOneDvObject(dvObjectId);
countOfIndexedPermissions++;
}
status.append("indexed permissions (DvObject IDs): " + countOfIndexedPermissions);
logger.info(status.toString());
return new AsyncResult<>(status.toString());
}

/**
* @todo Do we want to report the root dataverse (id 1, often) in
* permissionsInDatabaseButMissingFromSolr?
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
*/
package edu.harvard.iq.dataverse.search;

import edu.harvard.iq.dataverse.IndexServiceBean;
import java.util.Arrays;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
Expand Down Expand Up @@ -45,4 +48,14 @@ public void testSanitizeQuery() {
assertEquals("datasetPersistentIdentifier:hdl\\:1902.1/21919", SearchUtil.sanitizeQuery("datasetPersistentIdentifier:hdl:1902.1/21919"));
}

@Test
public void testCreateSolrDoc() {
assertEquals(null, SearchUtil.createSolrDoc(null));
SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(new DvObjectSolrDoc("12345", "dataset_12345", "myNameOrTitleNotUsedHere", Arrays.asList(IndexServiceBean.getPublicGroupString())));
System.out.println(solrInputDocument.toString());
assertEquals(SearchFields.ID + "=" + IndexServiceBean.solrDocIdentifierDataset + "12345" + IndexServiceBean.discoverabilityPermissionSuffix, solrInputDocument.get(SearchFields.ID).toString());
assertEquals(SearchFields.DEFINITION_POINT + "=dataset_12345", solrInputDocument.get(SearchFields.DEFINITION_POINT).toString());
assertEquals(SearchFields.DEFINITION_POINT_DVOBJECT_ID + "=12345", solrInputDocument.get(SearchFields.DEFINITION_POINT_DVOBJECT_ID).toString());
assertEquals(SearchFields.DISCOVERABLE_BY + "=" + Arrays.asList(IndexServiceBean.getPublicGroupString()), solrInputDocument.get(SearchFields.DISCOVERABLE_BY).toString());
}
}

0 comments on commit 0d40bd4

Please sign in to comment.