Skip to content

Commit

Permalink
dataset page performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
ErykKul committed May 11, 2023
1 parent 3bc24df commit 64743bf
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 48 deletions.
94 changes: 46 additions & 48 deletions src/main/java/edu/harvard/iq/dataverse/DatasetPage.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
import edu.harvard.iq.dataverse.metadataimport.ForeignMetadataImportServiceBean;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlUtil;
Expand Down Expand Up @@ -81,6 +82,8 @@
import java.util.Set;
import java.util.Collection;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.faces.application.FacesMessage;
Expand Down Expand Up @@ -233,6 +236,8 @@ public enum DisplayMode {
ExternalToolServiceBean externalToolService;
@EJB
SolrClientService solrClientService;
@EJB
DvObjectServiceBean dvObjectService;
@Inject
DataverseRequestServiceBean dvRequestService;
@Inject
Expand Down Expand Up @@ -678,48 +683,43 @@ public void showAll(){
}

private List<FileMetadata> selectFileMetadatasForDisplay() {
Set<Long> searchResultsIdSet = null;

if (isIndexedVersion()) {
final Set<Long> searchResultsIdSet;
if (StringUtil.isEmpty(fileLabelSearchTerm) && StringUtil.isEmpty(fileTypeFacet) && StringUtil.isEmpty(fileAccessFacet) && StringUtil.isEmpty(fileTagsFacet)) {
// But, if no search terms were specified, we return the full
// list of the files in the version:
// Since the search results should include the full set of fmds if all the
// terms/facets are empty, setting them to null should just be
// an optimization for the loop below
searchResultsIdSet = null;
} else if (isIndexedVersion()) {
// We run the search even if no search term and/or facets are
// specified - to generate the facet labels list:
searchResultsIdSet = getFileIdsInVersionFromSolr(workingVersion.getId(), this.fileLabelSearchTerm);
// But, if no search terms were specified, we return the full
// list of the files in the version:
if (StringUtil.isEmpty(fileLabelSearchTerm)
&& StringUtil.isEmpty(fileTypeFacet)
&& StringUtil.isEmpty(fileAccessFacet)
&& StringUtil.isEmpty(fileTagsFacet)) {
// Since the search results should include the full set of fmds if all the
// terms/facets are empty, setting them to null should just be
// an optimization for the loop below
searchResultsIdSet = null;
}
} else {
} else if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) {
// No, this is not an indexed version.
// If the search term was specified, we'll run a search in the db;
// if not - return the full list of files in the version.
// (no facets without solr!)
if (!StringUtil.isEmpty(this.fileLabelSearchTerm)) {
searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm);
}
searchResultsIdSet = getFileIdsInVersionFromDb(workingVersion.getId(), this.fileLabelSearchTerm);
} else {
searchResultsIdSet = null;
}

List<FileMetadata> retList = new ArrayList<>();

for (FileMetadata fileMetadata : workingVersion.getFileMetadatas()) {
if (searchResultsIdSet == null || searchResultsIdSet.contains(fileMetadata.getDataFile().getId())) {
retList.add(fileMetadata);
}
final List<FileMetadata> md = workingVersion.getFileMetadatas();
final List<FileMetadata> retList;
if (searchResultsIdSet == null) {
retList = new ArrayList<>(md);
} else {
retList = md.stream().filter(x -> searchResultsIdSet.contains(x.getDataFile().getId())).collect(Collectors.toList());
}
sortFileMetadatas(retList);
return retList;
}

private void sortFileMetadatas(List<FileMetadata> fileList) {
private void sortFileMetadatas(final List<FileMetadata> fileList) {

DataFileComparator dfc = new DataFileComparator();
Comparator<FileMetadata> comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder));
final DataFileComparator dfc = new DataFileComparator();
final Comparator<FileMetadata> comp = dfc.compareBy(folderPresort, tagPresort, fileSortField, !"desc".equals(fileSortOrder));
Collections.sort(fileList, comp);
}

Expand Down Expand Up @@ -1843,6 +1843,17 @@ public boolean webloaderUploadSupported() {
return settingsWrapper.isWebloaderUpload() && StorageIO.isDirectUploadEnabled(dataset.getEffectiveStorageDriverId());
}

private void setIdByPersistentId() {
GlobalId gid = PidUtil.parseAsGlobalID(persistentId);
Long id = dvObjectService.findIdByGlobalId(gid, DvObject.DType.Dataset);
if (id == null) {
id = dvObjectService.findIdByAltGlobalId(gid, DvObject.DType.Dataset);
}
if (id != null) {
this.setId(id);
}
}

private String init(boolean initFull) {

//System.out.println("_YE_OLDE_QUERY_COUNTER_"); // for debug purposes
Expand All @@ -1866,21 +1877,9 @@ private String init(boolean initFull) {
// Set the workingVersion and Dataset
// ---------------------------------------
if (persistentId != null) {
logger.fine("initializing DatasetPage with persistent ID " + persistentId);
// Set Working Version and Dataset by PersistentID
dataset = datasetService.findByGlobalId(persistentId);
if (dataset == null) {
logger.warning("No such dataset: "+persistentId);
return permissionsWrapper.notFound();
}
logger.fine("retrieved dataset, id="+dataset.getId());

retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version);
//retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version);
this.workingVersion = retrieveDatasetVersionResponse.getDatasetVersion();
logger.fine("retrieved version: id: " + workingVersion.getId() + ", state: " + this.workingVersion.getVersionState());

} else if (this.getId() != null) {
setIdByPersistentId();
}
if (this.getId() != null) {
// Set Working Version and Dataset by Datasaet Id and Version
dataset = datasetService.find(this.getId());
if (dataset == null) {
Expand Down Expand Up @@ -2835,15 +2834,14 @@ public String refresh() {
DatasetVersionServiceBean.RetrieveDatasetVersionResponse retrieveDatasetVersionResponse = null;

if (persistentId != null) {
//retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByPersistentId(persistentId, version);
dataset = datasetService.findByGlobalId(persistentId);
retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version);
} else if (versionId != null) {
retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId);
} else if (dataset.getId() != null) {
setIdByPersistentId();
}
if (dataset.getId() != null) {

This comment has been minimized.

Copy link
@qqmyers

qqmyers Jun 23, 2023

Member

@ErykKul - it looks like this dataset.getId() can cause an NPE. @kcondon can hopefully provide details. Should this, and the similar line at 1882 above be else if instead of if?

This comment has been minimized.

Copy link
@ErykKul

ErykKul Jun 26, 2023

Author Collaborator

The idea here was to use this.getId() and not dataset.getId(), just as on the line 1882 (I overlooked it, and it caused the null pointer exception). On the line 1882 we do not need the "else if" logic, since when persistentId is not null, we set the id by querying the DB. We still need to initialize the retrieveDatasetVersionResponse using the newly found ID (the code needs to execute regardless if persistent id was not null).

This comment has been minimized.

Copy link
@ErykKul

ErykKul Jun 26, 2023

Author Collaborator

I have reworked this code for better readability and fixed the null pointer.

This comment has been minimized.

Copy link
@ErykKul

ErykKul Jun 26, 2023

Author Collaborator

Note that "else if (dataset.getId())" should not be here, as it would always cause nullpointer exceptions, since we set dataset to null before the "if ... else if ..." statements.

//retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionById(dataset.getId(), version);
dataset = datasetService.find(dataset.getId());
retrieveDatasetVersionResponse = datasetVersionService.selectRequestedVersion(dataset.getVersions(), version);
} else if (versionId != null) {
retrieveDatasetVersionResponse = datasetVersionService.retrieveDatasetVersionByVersionId(versionId);
}

if (retrieveDatasetVersionResponse == null) {
Expand Down
4 changes: 4 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DvObject.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@
query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"),
@NamedQuery(name = "DvObject.findByGlobalId",
query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),
@NamedQuery(name = "DvObject.findIdByGlobalId",
query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"),

@NamedQuery(name = "DvObject.findByAlternativeGlobalId",
query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"),
@NamedQuery(name = "DvObject.findIdByAlternativeGlobalId",
query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"),

@NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority",
query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"),
Expand Down
31 changes: 31 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/DvObjectServiceBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,16 @@ public DvObject findByAltGlobalId(GlobalId globalId, DvObject.DType dtype) {
return runFindByGlobalId(query, globalId, dtype);
}

public Long findIdByGlobalId(GlobalId globalId, DvObject.DType dtype) {
Query query = em.createNamedQuery("DvObject.findIdByGlobalId");
return runFindIdByGlobalId(query, globalId, dtype);
}

public Long findIdByAltGlobalId(GlobalId globalId, DvObject.DType dtype) {
Query query = em.createNamedQuery("DvObject.findIdByAlternativeGlobalId");
return runFindIdByGlobalId(query, globalId, dtype);
}

private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) {
DvObject foundDvObject = null;
try {
Expand All @@ -136,6 +146,27 @@ private DvObject runFindByGlobalId(Query query, GlobalId gid, DvObject.DType dty
}
return foundDvObject;
}

private Long runFindIdByGlobalId(Query query, GlobalId gid, DvObject.DType dtype) {
Long foundDvObject = null;
try {
query.setParameter("identifier", gid.getIdentifier());
query.setParameter("protocol", gid.getProtocol());
query.setParameter("authority", gid.getAuthority());
query.setParameter("dtype", dtype.getDType());
foundDvObject = (Long) query.getSingleResult();
} catch (javax.persistence.NoResultException e) {
// (set to .info, this can fill the log file with thousands of
// these messages during a large harvest run)
logger.fine("no dvObject found: " + gid.asString());
// DO nothing, just return null.
return null;
} catch (Exception ex) {
logger.info("Exception caught in findByGlobalId: " + ex.getLocalizedMessage());
return null;
}
return foundDvObject;
}

public DvObject findByGlobalId(GlobalId globalId) {
return (DvObject) em.createNamedQuery("DvObject.findByProtocolIdentifierAuthority")
Expand Down

0 comments on commit 64743bf

Please sign in to comment.