Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster combined query for retrieving datasets via API #9684

Merged
merged 16 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,8 @@ public Dataset findDeep(Object pk) {
.setHint("eclipselink.left-join-fetch", "o.files.dataFileTags")
.setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas")
.setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.fileCategories")
//.setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses")
.setHint("eclipselink.left-join-fetch", "o.files.fileMetadatas.varGroups")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this part of the optimization here? I would guess this isn't often accessed, so I'm not sure how much of a performance help it would be.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had seen in in the query log even when it was not used. For large datasets it does make a difference (one query less for each file).

//.setHint("eclipselink.left-join-fetch", "o.files.guestbookResponses
.setHint("eclipselink.left-join-fetch", "o.files.embargo")
.setHint("eclipselink.left-join-fetch", "o.files.fileAccessRequests")
.setHint("eclipselink.left-join-fetch", "o.files.owner")
Expand Down
39 changes: 29 additions & 10 deletions src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.DvObjectServiceBean;
import edu.harvard.iq.dataverse.EjbDataverseEngine;
import edu.harvard.iq.dataverse.GlobalId;
import edu.harvard.iq.dataverse.GuestbookResponseServiceBean;
import edu.harvard.iq.dataverse.MetadataBlock;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
Expand All @@ -41,6 +42,7 @@
import edu.harvard.iq.dataverse.externaltools.ExternalToolServiceBean;
import edu.harvard.iq.dataverse.license.LicenseServiceBean;
import edu.harvard.iq.dataverse.metrics.MetricsServiceBean;
import edu.harvard.iq.dataverse.pidproviders.PidUtil;
import edu.harvard.iq.dataverse.locality.StorageSiteServiceBean;
import edu.harvard.iq.dataverse.search.savedsearch.SavedSearchServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
Expand Down Expand Up @@ -390,31 +392,48 @@ protected DataverseLinkingDataverse findDataverseLinkingDataverseOrDie(String da
}

protected Dataset findDatasetOrDie(String id) throws WrappedResponse {
return findDatasetOrDie(id, false);
}

protected Dataset findDatasetOrDie(String id, boolean deep) throws WrappedResponse {
Long datasetId;
Dataset dataset;
if (id.equals(PERSISTENT_ID_KEY)) {
String persistentId = getRequestParameter(PERSISTENT_ID_KEY.substring(1));
if (persistentId == null) {
throw new WrappedResponse(
badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
}
dataset = datasetSvc.findByGlobalId(persistentId);
if (dataset == null) {
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.persistentId", Collections.singletonList(persistentId))));
GlobalId globalId;
try {
globalId = PidUtil.parseAsGlobalID(persistentId);
} catch (IllegalArgumentException e) {
throw new WrappedResponse(
badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.bad.id", Collections.singletonList(persistentId))));
}
datasetId = dvObjSvc.findIdByGlobalId(globalId, DvObject.DType.Dataset);
ErykKul marked this conversation as resolved.
Show resolved Hide resolved
if (datasetId == null) {
throw new WrappedResponse(
badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset_id_is_null", Collections.singletonList(PERSISTENT_ID_KEY.substring(1)))));
ErykKul marked this conversation as resolved.
Show resolved Hide resolved
}
return dataset;

} else {
try {
dataset = datasetSvc.find(Long.parseLong(id));
if (dataset == null) {
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.id", Collections.singletonList(id))));
}
return dataset;
datasetId = Long.parseLong(id);
} catch (NumberFormatException nfe) {
throw new WrappedResponse(
badRequest(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.bad.id", Collections.singletonList(id))));
}
}

if (deep) {
dataset = datasetSvc.findDeep(datasetId);
} else {
dataset = datasetSvc.find(datasetId);
}
if (dataset == null) {
throw new WrappedResponse(notFound(BundleUtil.getStringFromBundle("find.dataset.error.dataset.not.found.id", Collections.singletonList(id))));
}
return dataset;
}

protected DataFile findDataFileOrDie(String id) throws WrappedResponse {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/edu/harvard/iq/dataverse/api/Datasets.java
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ public interface DsVersionHandler<T> {
@Path("{id}")
public Response getDataset(@Context ContainerRequestContext crc, @PathParam("id") String id, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
return response( req -> {
final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id)));
final Dataset retrieved = execCommand(new GetDatasetCommand(req, findDatasetOrDie(id, true)));
final DatasetVersion latest = execCommand(new GetLatestAccessibleDatasetVersionCommand(req, retrieved));
final JsonObjectBuilder jsonbuilder = json(retrieved);
//Report MDC if this is a released version (could be draft if user has access, or user may not have access at all and is not getting metadata beyond the minimum)
Expand Down Expand Up @@ -490,7 +490,7 @@ public Response getVersion(@Context ContainerRequestContext crc, @PathParam("id"
@Path("{id}/versions/{versionId}/files")
public Response getVersionFiles(@Context ContainerRequestContext crc, @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) {
return response( req -> ok( jsonFileMetadatas(
getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers).getFileMetadatas())), getRequestUser(crc));
getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId, true), uriInfo, headers).getFileMetadatas())), getRequestUser(crc));
}

@GET
Expand Down
Loading