From 0d40bd48cdcb75dc7eea7e87569322bd31290460 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 16 Jul 2015 15:59:48 -0400 Subject: [PATCH] search permissions: reduce findDvObject calls #2036 The profiler shows significantly better performance when we reduce the number of calls to findDvObject so we pass around the DvObject itself more often, rather than looking it up over and over. The profiler also suggests that batching the commits to Solr may also yield a good performance gain but that's for a different commit. The index API endpoint "missing" is removed as part of this commit since it relies on code that was rewritten as part of this commit but it was little used anyway since there's a newer "skipIndexed" boolean for other index endpoints for catching up an index. We can revisit if we want to re-add the "missing" endpoint. Finally the createSolrDoc method was used to SearchUtil and tests were written for it. Here are the numbers for before and after this commit from running scripts/issues/2036/grant-role-then-revoke | Solr Up | Solr Down ------- | --------- | ------- grant | 0m25.653s | 0m9.699s revoke | 0m23.539s | 0m9.968s | Solr Up | Solr Down ------- | --------- | ------- grant | 0m7.422s | 0m10.348s revoke | 0m6.758s | 0m10.341s --- .../iq/dataverse/DvObjectServiceBean.java | 5 +- .../iq/dataverse/IndexServiceBean.java | 2 +- .../edu/harvard/iq/dataverse/api/Index.java | 30 +++--- .../command/impl/PublishDatasetCommand.java | 2 +- .../command/impl/PublishDataverseCommand.java | 2 +- .../iq/dataverse/search/SearchUtil.java | 15 +++ .../search/SolrIndexServiceBean.java | 91 ++++++------------- .../iq/dataverse/search/SearchUtilTest.java | 13 +++ 8 files changed, 73 insertions(+), 87 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java index 0759be168a7..c054d796fb1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java @@ -75,9 +75,8 @@ public DvObject updatePermissionIndexTime(DvObject dvObject) { * dvObject before we try to set this timestamp? See * https://github.com/IQSS/dataverse/commit/6ad0ebb272c8cb46368cb76784b55dbf33eea947 */ - DvObject dvObjectToModify = findDvObject(dvObject.getId()); - dvObjectToModify.setPermissionIndexTime(new Timestamp(new Date().getTime())); - DvObject savedDvObject = em.merge(dvObjectToModify); + dvObject.setPermissionIndexTime(new Timestamp(new Date().getTime())); + DvObject savedDvObject = em.merge(dvObject); return savedDvObject; } diff --git a/src/main/java/edu/harvard/iq/dataverse/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/IndexServiceBean.java index dd1f2bf4427..58ccc39a44f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/IndexServiceBean.java @@ -207,7 +207,7 @@ public Future indexDataverse(Dataverse dataverse) { } dvObjectService.updateContentIndexTime(dataverse); - IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse.getId()); + IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse); String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage(); return new AsyncResult<>(msg); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Index.java b/src/main/java/edu/harvard/iq/dataverse/api/Index.java index f78213bc4c3..9cd8007f024 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Index.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Index.java @@ -251,21 +251,6 @@ public Response indexTypeById(@PathParam("type") String type, @PathParam("id") L } } - /** - * Note that this method is best used in a migration scenario because it - * skips the normal Solr doc cleanup of deleting drafts and other versions - * from Solr. - */ - @GET - @Path("missing") - public Response indexMissing() { - /** - * @todo How can we display the result? - */ - Future result = solrIndexService.indexMissing(); - return okResponse("index missing started, Solr doc cleanup operations will be skipped"); - } - /** * This is just a demo of the modular math logic we use for indexAll. */ @@ -295,8 +280,13 @@ public Response indexAllPermissions() { @GET @Path("perms/{id}") public Response indexPermissions(@PathParam("id") Long id) { - IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(id); - return okResponse(indexResponse.getMessage()); + DvObject dvObject = dvObjectService.findDvObject(id); + if (dvObject == null) { + return errorResponse(Status.BAD_REQUEST, "Could not find DvObject based on id " + id); + } else { + IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dvObject); + return okResponse(indexResponse.getMessage()); + } } @GET @@ -543,7 +533,11 @@ public Response searchPermsDebug( return errorResponse(Response.Status.UNAUTHORIZED, "Invalid apikey '" + apiToken + "'"); } - List solrDocs = SolrIndexService.determineSolrDocs(dvObjectId); + DvObject dvObjectToLookUp = dvObjectService.findDvObject(dvObjectId); + if (dvObjectToLookUp == null) { + return errorResponse(Status.BAD_REQUEST, "Could not find DvObject based on id " + dvObjectId); + } + List solrDocs = SolrIndexService.determineSolrDocs(dvObjectToLookUp); JsonObjectBuilder data = Json.createObjectBuilder(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java index 0b166f7cb36..e2f2c6c9851 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDatasetCommand.java @@ -153,7 +153,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { /** * @todo what should we do with the indexRespose? */ - IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataset.getId()); + IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataset); // set the subject of the parent (all the way up) Dataverses DatasetField subject = null; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDataverseCommand.java index c914f4e7ca4..8e1232cd1cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/PublishDataverseCommand.java @@ -47,7 +47,7 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { /** * @todo what should we do with the indexRespose? */ - IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataverse.getId()); + IndexResponse indexResponse = ctxt.solrIndex().indexPermissionsForOneDvObject(savedDataverse); return savedDataverse; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java index 1d043843cc2..f566d1065b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchUtil.java @@ -1,5 +1,8 @@ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.IndexServiceBean; +import org.apache.solr.common.SolrInputDocument; + public class SearchUtil { /** @@ -37,4 +40,16 @@ public static String sanitizeQuery(String query) { return query; } + public static SolrInputDocument createSolrDoc(DvObjectSolrDoc dvObjectSolrDoc) { + if (dvObjectSolrDoc == null) { + return null; + } + SolrInputDocument solrInputDocument = new SolrInputDocument(); + solrInputDocument.addField(SearchFields.ID, dvObjectSolrDoc.getSolrId() + IndexServiceBean.discoverabilityPermissionSuffix); + solrInputDocument.addField(SearchFields.DEFINITION_POINT, dvObjectSolrDoc.getSolrId()); + solrInputDocument.addField(SearchFields.DEFINITION_POINT_DVOBJECT_ID, dvObjectSolrDoc.getDvObjectId()); + solrInputDocument.addField(SearchFields.DISCOVERABLE_BY, dvObjectSolrDoc.getPermissions()); + return solrInputDocument; + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index 4a0396dccc9..aad14189733 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -77,13 +77,12 @@ public class SolrIndexServiceBean { */ private boolean unpublishedDataRelatedToMeModeEnabled = true; - public List determineSolrDocs(Long dvObjectId) { + public List determineSolrDocs(DvObject dvObject) { List emptyList = new ArrayList<>(); - List solrDocs = emptyList; - DvObject dvObject = dvObjectService.findDvObject(dvObjectId); if (dvObject == null) { return emptyList; } + List solrDocs = emptyList; if (dvObject.isInstanceofDataverse()) { DvObjectSolrDoc dataverseSolrDoc = constructDataverseSolrDoc((Dataverse) dvObject); solrDocs.add(dataverseSolrDoc); @@ -270,7 +269,7 @@ public IndexResponse indexAllPermissions() { } files.add(datafile); } else { - definitionPoints.addAll(determineSolrDocs(dvObject.getId())); + definitionPoints.addAll(determineSolrDocs(dvObject)); } } @@ -287,7 +286,7 @@ public IndexResponse indexAllPermissions() { for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) { logger.info("creating solr doc in memory for " + dvObjectSolrDoc.getSolrId()); - SolrInputDocument solrInputDocument = createSolrDoc(dvObjectSolrDoc); + SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc); logger.info("adding to list of docs to index " + dvObjectSolrDoc.getSolrId()); docs.add(solrInputDocument); } @@ -307,21 +306,24 @@ public IndexResponse indexAllPermissions() { } - public IndexResponse indexPermissionsForOneDvObject(long dvObjectId) { + public IndexResponse indexPermissionsForOneDvObject(DvObject dvObject) { + if (dvObject == null) { + return new IndexResponse("problem indexing... null DvObject passed in"); + } + long dvObjectId = dvObject.getId(); Collection docs = new ArrayList<>(); - List definitionPoints = determineSolrDocs(dvObjectId); + List definitionPoints = determineSolrDocs(dvObject); for (DvObjectSolrDoc dvObjectSolrDoc : definitionPoints) { - SolrInputDocument solrInputDocument = createSolrDoc(dvObjectSolrDoc); + SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(dvObjectSolrDoc); docs.add(solrInputDocument); } try { persistToSolr(docs); boolean updatePermissionTimeSuccessful = false; - DvObject dvObjectToUpdatePermissionTimeOn = dvObjectService.findDvObject(dvObjectId); - if (dvObjectToUpdatePermissionTimeOn != null) { - DvObject savedDvObject = dvObjectService.updatePermissionIndexTime(dvObjectService.findDvObject(dvObjectToUpdatePermissionTimeOn.getId())); + if (dvObject != null) { + DvObject savedDvObject = dvObjectService.updatePermissionIndexTime(dvObject); if (savedDvObject != null) { updatePermissionTimeSuccessful = true; } @@ -333,15 +335,6 @@ public IndexResponse indexPermissionsForOneDvObject(long dvObjectId) { } - private SolrInputDocument createSolrDoc(DvObjectSolrDoc dvObjectSolrDoc) { - SolrInputDocument solrInputDocument = new SolrInputDocument(); - solrInputDocument.addField(SearchFields.ID, dvObjectSolrDoc.getSolrId() + IndexServiceBean.discoverabilityPermissionSuffix); - solrInputDocument.addField(SearchFields.DEFINITION_POINT, dvObjectSolrDoc.getSolrId()); - solrInputDocument.addField(SearchFields.DEFINITION_POINT_DVOBJECT_ID, dvObjectSolrDoc.getDvObjectId()); - solrInputDocument.addField(SearchFields.DISCOVERABLE_BY, dvObjectSolrDoc.getPermissions()); - return solrInputDocument; - } - private void persistToSolr(Collection docs) throws SolrServerException, IOException { if (docs.isEmpty()) { /** @@ -364,7 +357,7 @@ private void persistToSolr(Collection docs) throws SolrServer * inheritance */ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) { - List dvObjectsToReindexPermissionsFor = new ArrayList<>(); + List dvObjectsToReindexPermissionsFor = new ArrayList<>(); /** * @todo Re-indexing the definition point itself seems to be necessary * for revoke but not necessarily grant. @@ -375,42 +368,42 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) if (definitionPoint.isInstanceofDataverse()) { Dataverse selfDataverse = (Dataverse) definitionPoint; if (!selfDataverse.equals(dataverseService.findRootDataverse())) { - dvObjectsToReindexPermissionsFor.add(definitionPoint.getId()); + dvObjectsToReindexPermissionsFor.add(definitionPoint); } List directChildDatasetsOfDvDefPoint = datasetService.findByOwnerId(selfDataverse.getId()); for (Dataset dataset : directChildDatasetsOfDvDefPoint) { - dvObjectsToReindexPermissionsFor.add(dataset.getId()); - for (long fileId : filesToReIndexPermissionsFor(dataset)) { - dvObjectsToReindexPermissionsFor.add(fileId); + dvObjectsToReindexPermissionsFor.add(dataset); + for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) { + dvObjectsToReindexPermissionsFor.add(datafile); } } } else if (definitionPoint.isInstanceofDataset()) { // index the dataset itself - indexPermissionsForOneDvObject(definitionPoint.getId()); + indexPermissionsForOneDvObject(definitionPoint); // index files /** * @todo make this faster, index files in batches */ Dataset dataset = (Dataset) definitionPoint; - for (long fileId : filesToReIndexPermissionsFor(dataset)) { - dvObjectsToReindexPermissionsFor.add(fileId); + for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) { + dvObjectsToReindexPermissionsFor.add(datafile); } } else { - dvObjectsToReindexPermissionsFor.add(definitionPoint.getId()); + dvObjectsToReindexPermissionsFor.add(definitionPoint); } List updatePermissionTimeSuccessStatus = new ArrayList<>(); - for (Long dvObjectId : dvObjectsToReindexPermissionsFor) { + for (DvObject dvObject : dvObjectsToReindexPermissionsFor) { /** * @todo do something with this response */ - IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObjectId); + IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject); DvObject managedDefinitionPoint = dvObjectService.updatePermissionIndexTime(definitionPoint); boolean updatePermissionTimeSuccessful = false; if (managedDefinitionPoint != null) { updatePermissionTimeSuccessful = true; } - updatePermissionTimeSuccessStatus.add(dvObjectId + ":" + updatePermissionTimeSuccessful); + updatePermissionTimeSuccessStatus.add(dvObject + ":" + updatePermissionTimeSuccessful); } return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint + " (updatePermissionTimeSuccessful:" + updatePermissionTimeSuccessStatus @@ -418,14 +411,14 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) ); } - private List filesToReIndexPermissionsFor(Dataset dataset) { - List filesToReindexPermissionsFor = new ArrayList<>(); + private List filesToReIndexPermissionsFor(Dataset dataset) { + List filesToReindexPermissionsFor = new ArrayList<>(); Map desiredCards = searchPermissionsService.getDesiredCards(dataset); for (DatasetVersion version : datasetVersionsToBuildCardsFor(dataset)) { boolean cardShouldExist = desiredCards.get(version.getVersionState()); if (cardShouldExist) { for (FileMetadata fileMetadata : version.getFileMetadatas()) { - filesToReindexPermissionsFor.add(fileMetadata.getDataFile().getId()); + filesToReindexPermissionsFor.add(fileMetadata.getDataFile()); } } } @@ -453,34 +446,6 @@ public IndexResponse deleteMultipleSolrIds(List solrIdsToDelete) { return new IndexResponse("no known problem deleting the following documents from Solr:" + solrIdsToDelete); } - @Asynchronous - public Future indexMissing() { - StringBuilder status = new StringBuilder(); - List stateOrMissingDataverses = indexService.findStaleOrMissingDataverses(); - int countOfIndexedDataverses = 0; - for (Dataverse staleOrMissingDataverse : stateOrMissingDataverses) { - Future response = indexService.indexDataverseInNewTransaction(staleOrMissingDataverse); - countOfIndexedDataverses++; - } - status.append("indexed dataverses: " + countOfIndexedDataverses + ", "); - List staleOrMissingDatasets = indexService.findStaleOrMissingDatasets(); - int countOfIndexedDatasets = 0; - for (Dataset staleOrMissingDataset : staleOrMissingDatasets) { - indexService.indexDatasetInNewTransaction(staleOrMissingDataset); - countOfIndexedDatasets++; - } - status.append("indexed datasets: " + countOfIndexedDatasets + ", "); - int countOfIndexedPermissions = 0; - List dvObjectsWithStaleOrMissingPermissions = findPermissionsInDatabaseButStaleInOrMissingFromSolr(); - for (long dvObjectId : dvObjectsWithStaleOrMissingPermissions) { - indexPermissionsForOneDvObject(dvObjectId); - countOfIndexedPermissions++; - } - status.append("indexed permissions (DvObject IDs): " + countOfIndexedPermissions); - logger.info(status.toString()); - return new AsyncResult<>(status.toString()); - } - /** * @todo Do we want to report the root dataverse (id 1, often) in * permissionsInDatabaseButMissingFromSolr? diff --git a/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java index 9c1b6fbd279..348acae0f03 100644 --- a/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/search/SearchUtilTest.java @@ -5,6 +5,9 @@ */ package edu.harvard.iq.dataverse.search; +import edu.harvard.iq.dataverse.IndexServiceBean; +import java.util.Arrays; +import org.apache.solr.common.SolrInputDocument; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -45,4 +48,14 @@ public void testSanitizeQuery() { assertEquals("datasetPersistentIdentifier:hdl\\:1902.1/21919", SearchUtil.sanitizeQuery("datasetPersistentIdentifier:hdl:1902.1/21919")); } + @Test + public void testCreateSolrDoc() { + assertEquals(null, SearchUtil.createSolrDoc(null)); + SolrInputDocument solrInputDocument = SearchUtil.createSolrDoc(new DvObjectSolrDoc("12345", "dataset_12345", "myNameOrTitleNotUsedHere", Arrays.asList(IndexServiceBean.getPublicGroupString()))); + System.out.println(solrInputDocument.toString()); + assertEquals(SearchFields.ID + "=" + IndexServiceBean.solrDocIdentifierDataset + "12345" + IndexServiceBean.discoverabilityPermissionSuffix, solrInputDocument.get(SearchFields.ID).toString()); + assertEquals(SearchFields.DEFINITION_POINT + "=dataset_12345", solrInputDocument.get(SearchFields.DEFINITION_POINT).toString()); + assertEquals(SearchFields.DEFINITION_POINT_DVOBJECT_ID + "=12345", solrInputDocument.get(SearchFields.DEFINITION_POINT_DVOBJECT_ID).toString()); + assertEquals(SearchFields.DISCOVERABLE_BY + "=" + Arrays.asList(IndexServiceBean.getPublicGroupString()), solrInputDocument.get(SearchFields.DISCOVERABLE_BY).toString()); + } }