From 65ec69f97c74078e4db5d7194e1a5fd6a99f9b50 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 5 Aug 2024 17:17:31 -0400 Subject: [PATCH 01/31] a quick experimental AddReplaceFileHelper implementation of adding Globus-uploaded files #10623 --- .../iq/dataverse/EditDatafilesPage.java | 8 ++- .../dataverse/globus/GlobusServiceBean.java | 51 ++++++++++++++----- 2 files changed, 44 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java index 993cb02b66b..99a44058809 100644 --- a/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/EditDatafilesPage.java @@ -2121,8 +2121,12 @@ public void handleFileUpload(FileUploadEvent event) throws IOException { } /** - * Using information from the DropBox choose, ingest the chosen files - * https://www.dropbox.com/developers/dropins/chooser/js + * External, aka "Direct" Upload. + * The file(s) have been uploaded to physical storage (such as S3) directly, + * this call is to create and add the DataFiles to the Dataset on the Dataverse + * side. The method does NOT finalize saving the datafiles in the database - + * that will happen when the user clicks 'Save', similar to how the "normal" + * uploads are handled. * * @param event */ diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index fb50214c259..02bc4667ea6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -61,6 +61,9 @@ import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; import edu.harvard.iq.dataverse.settings.JvmSettings; @@ -70,6 +73,7 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.ws.rs.core.Response; @Stateless @Named("GlobusServiceBean") @@ -81,6 +85,8 @@ public class GlobusServiceBean implements java.io.Serializable { protected SettingsServiceBean settingsSvc; @Inject DataverseSession session; + @Inject + DataverseRequestServiceBean dataverseRequestSvc; @EJB protected AuthenticationServiceBean authSvc; @EJB @@ -92,7 +98,13 @@ public class GlobusServiceBean implements java.io.Serializable { @EJB FileDownloadServiceBean fileDownloadService; @EJB - DataFileServiceBean dataFileService; + DataFileServiceBean dataFileSvc; + @EJB + PermissionServiceBean permissionSvc; + @EJB + IngestServiceBean ingestSvc; + @EJB + SystemConfig systemConfig; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -764,7 +776,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); logger.fine("Size: " + newfilesJsonArray.size()); logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); - JsonArrayBuilder jsonDataSecondAPI = Json.createArrayBuilder(); + JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { @@ -790,7 +802,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S path = Json.createPatchBuilder() .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); fileJsonObject = path.apply(fileJsonObject); - jsonDataSecondAPI.add(fileJsonObject); + addFilesJsonData.add(fileJsonObject); countSuccess++; // } else { // globusLogger.info(fileName @@ -805,20 +817,32 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } } - String newjsonData = jsonDataSecondAPI.build().toString(); + String newjsonData = addFilesJsonData.build().toString(); - globusLogger.info("Successfully generated new JsonData for Second API call"); + globusLogger.info("Successfully generated new JsonData for addFiles call"); - String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command); + System.out.println("*******====command ==== " + command);*/ // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of // calling API - - String output = addFilesAsync(command, globusLogger); - if (output.equalsIgnoreCase("ok")) { + + // a quick experimental AddReplaceFileHelper implementation: + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dataverseRequestSvc.getDataverseRequest(), + this.ingestSvc, + this.datasetSvc, + this.dataFileSvc, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); + + if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -830,10 +854,10 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), countSuccess + " files added out of " + countAll, true); } - globusLogger.info("Successfully completed api/datasets/:persistentId/addFiles call "); + globusLogger.info("Successfully completed addFiles call "); } else { globusLogger.log(Level.SEVERE, - "******* Error while executing api/datasets/:persistentId/add call ", command); + "******* Error while executing addFiles ", newjsonData); } } @@ -848,6 +872,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S e.printStackTrace(); globusLogger.info("Exception from globusUpload call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); + // } } if (ruleId != null) { @@ -1261,7 +1286,7 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, Long fileId = Long.parseLong(idAsString); // If we need to create a GuestBookResponse record, we have to // look up the DataFile object for this file: - df = dataFileService.findCheapAndEasy(fileId); + df = dataFileSvc.findCheapAndEasy(fileId); selectedFiles.add(df); if (!doNotSaveGuestbookResponse) { guestbookResponse.setDataFile(df); From 04951609c20f954573eb1d50eebe8ef08d464ed8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 6 Aug 2024 16:40:59 -0400 Subject: [PATCH 02/31] no need to try to calculate checksums if this globus storage isn't dataverse-accessible. #10623 --- .../dataverse/globus/GlobusServiceBean.java | 41 +++++++++++-------- .../timer/DataverseTimerServiceBean.java | 4 +- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 02bc4667ea6..d6b56b51fa5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1160,25 +1160,30 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) // ToDo: what if the file does not exist in s3 // ToDo: what if checksum calculation failed - do { - try { - StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); - in = dataFileStorageIO.getInputStream(); - checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); - count = 3; - } catch (IOException ioex) { - count = 3; - logger.fine(ioex.getMessage()); - globusLogger.info( - "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); - } catch (Exception ex) { - count = count + 1; - ex.printStackTrace(); - logger.info(ex.getMessage()); - Thread.sleep(5000); - } + String storageDriverId = DataAccess.getDriverIdAndStorageLocation(fullPath)[0]; - } while (count < 3); + if (StorageIO.isDataverseAccessible(storageDriverId)) { + do { + try { + StorageIO dataFileStorageIO = DataAccess.getDirectStorageIO(fullPath); + in = dataFileStorageIO.getInputStream(); + checksumVal = FileUtil.calculateChecksum(in, DataFile.ChecksumType.MD5); + count = 3; + } catch (IOException ioex) { + count = 3; + logger.fine(ioex.getMessage()); + globusLogger.info( + "DataFile (fullPath " + fullPath + ") does not appear to be accessible within Dataverse: "); + } catch (Exception ex) { + count = count + 1; + ex.printStackTrace(); + logger.info(ex.getMessage()); + Thread.sleep(5000); + } + + + } while (count < 3); + } if (checksumVal.length() == 0) { checksumVal = "Not available in Dataverse"; diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index 6eb3a8df0bc..a783b211b36 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -120,13 +120,13 @@ public void handleTimeout(jakarta.ejb.Timer timer) { } try { - logger.log(Level.INFO,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); + logger.log(Level.FINE,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); } catch (UnknownHostException ex) { Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); } if (timer.getInfo() instanceof MotherTimerInfo) { - logger.info("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); + logger.fine("Behold! I am the Master Timer, king of all timers! I'm here to create all the lesser timers!"); removeHarvestTimers(); for (HarvestingClient client : harvestingClientService.getAllHarvestingClients()) { createHarvestTimer(client); From ba661387bba24e770ae5c0f9cade3b64db7614f3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 10:43:45 -0400 Subject: [PATCH 03/31] more globus mods (work in progress). #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 7 +- .../dataverse/globus/GlobusServiceBean.java | 485 +++++++++++------- .../{GlobusTask.java => GlobusTaskState.java} | 6 +- .../iq/dataverse/globus/GlobusUtil.java | 33 ++ .../dataverse/ingest/IngestServiceBean.java | 5 +- .../iq/dataverse/settings/FeatureFlags.java | 5 + .../iq/dataverse/util/SystemConfig.java | 9 + 7 files changed, 363 insertions(+), 187 deletions(-) rename src/main/java/edu/harvard/iq/dataverse/globus/{GlobusTask.java => GlobusTaskState.java} (93%) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4b919c5ed82..b93257bc0c3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4009,6 +4009,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); if (!systemConfig.isHTTPUpload()) { + // @todo why isHTTPUpload()? - shouldn't it be checking isGlobusUpload() here? return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); } @@ -4075,7 +4076,11 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + try { + globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + } catch (IllegalArgumentException ex) { + return badRequest("Invalid parameters: "+ex.getMessage()); + } return ok("Async call to Globus Upload started "); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index d6b56b51fa5..eb1eb47611a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -22,7 +22,6 @@ import jakarta.json.JsonString; import jakarta.json.JsonValue.ValueType; import jakarta.json.stream.JsonParsingException; -import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.HttpMethod; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; @@ -33,7 +32,6 @@ import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; -import java.net.URLEncoder; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.time.Duration; @@ -62,10 +60,10 @@ import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; +import edu.harvard.iq.dataverse.settings.FeatureFlags; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; @@ -73,6 +71,8 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.persistence.EntityManager; +import jakarta.persistence.PersistenceContext; import jakarta.ws.rs.core.Response; @Stateless @@ -105,6 +105,8 @@ public class GlobusServiceBean implements java.io.Serializable { IngestServiceBean ingestSvc; @EJB SystemConfig systemConfig; + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; private static final Logger logger = Logger.getLogger(GlobusServiceBean.class.getCanonicalName()); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -391,19 +393,33 @@ private void monitorTemporaryPermissions(String ruleId, long datasetId) { * @return * @throws MalformedURLException */ - public GlobusTask getTask(String accessToken, String taskId, Logger globusLogger) throws MalformedURLException { + public GlobusTaskState getTask(String accessToken, String taskId, Logger globusLogger) { - URL url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + Logger myLogger = globusLogger != null ? globusLogger : logger; + + URL url; + try { + url = new URL("https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + taskId); + } catch (MalformedURLException mue) { + myLogger.warning("Malformed URL exception when trying to contact Globus. Globus API url: " + + "https://transfer.api.globusonline.org/v0.10/endpoint_manager/task/" + + taskId); + return null; + } MakeRequestResponse result = makeRequest(url, "Bearer", accessToken, "GET", null); - GlobusTask task = null; + GlobusTaskState task = null; if (result.status == 200) { - task = parseJson(result.jsonResponse, GlobusTask.class, false); + task = parseJson(result.jsonResponse, GlobusTaskState.class, false); } if (result.status != 200) { - globusLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + // @todo It should probably retry it 2-3 times before giving up; + // similarly, it should probably differentiate between a "no such task" + // response and something intermittent like a server/network error or + // an expired token... i.e. something that's recoverable (?) + myLogger.warning("Cannot find information for the task " + taskId + " : Reason : " + result.jsonResponse.toString()); } @@ -646,11 +662,17 @@ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List< @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, - AuthenticatedUser authUser) throws ExecutionException, InterruptedException, MalformedURLException { + AuthenticatedUser authUser) throws IllegalArgumentException, ExecutionException, InterruptedException, MalformedURLException { - Integer countAll = 0; - Integer countSuccess = 0; - Integer countError = 0; + // Before we do anything else, let's do some basic validation of what + // we've been passed: + + JsonArray filesJsonArray = jsonData.getJsonArray("files"); + + if (filesJsonArray == null || filesJsonArray.size() < 1) { + throw new IllegalArgumentException("No valid json entries supplied for the files being uploaded"); + } + String logTimestamp = logFormatter.format(new Date()); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); @@ -674,11 +696,13 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } logger.fine("json: " + JsonUtil.prettyPrint(jsonData)); + + globusLogger.info("Globus upload initiated"); String taskIdentifier = jsonData.getString("taskIdentifier"); GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); logger.fine("Found rule: " + ruleId); if (ruleId != null) { @@ -688,15 +712,43 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S rulesCache.invalidate(ruleId); } } - + // Wait before first check Thread.sleep(5000); + + if (FeatureFlags.GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK.enabled()) { + + // Save the task information in the database so that the Globus monitoring + // service can continue checking on its progress. + + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token, new Timestamp(new Date().getTime())); + em.persist(taskInProgress); + + // Save the metadata entries that define the files that are being uploaded + // in the database. These entries will be used once/if the uploads + // completes successfully to add the files to the dataset. + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + ExternalFileUploadInProgress fileUploadRecord = new ExternalFileUploadInProgress(taskIdentifier, fileJsonObject.toString()); + + em.persist(fileUploadRecord); + } + + return; + } + + + // the old implementation that relies on looping continuosly, + // sleeping-then-checking the task status repeatedly: + // globus task status check + // (the method below performs continuous looped checks of the remote + // Globus API, monitoring it for as long as it takes for the task to + // finish one way or another!) task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + // @todo null check, or make sure it's never null String taskStatus = getTaskStatus(task); - globusLogger.info("Starting a globusUpload "); - if (ruleId != null) { // Transfer is complete, so delete rule deletePermission(ruleId, dataset, globusLogger); @@ -739,138 +791,11 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } else { try { - // - - List inputList = new ArrayList(); - JsonArray filesJsonArray = jsonData.getJsonArray("files"); - - if (filesJsonArray != null) { - String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" - + dataset.getIdentifierForFileStorage(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from - // externalTool - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - String storeId = parts[0]; - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - String bucketName = ""; - if (bits.length > 1) { - bucketName = bits[0]; - } - String fileId = bits[bits.length - 1]; - - // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 - String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; - String fileName = fileJsonObject.getString("fileName"); - - inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); - } - - // calculateMissingMetadataFields: checksum, mimetype - JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, globusLogger); - JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); - logger.fine("Size: " + newfilesJsonArray.size()); - logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); - JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); - - for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { - - countAll++; - String storageIdentifier = fileJsonObject.getString("storageIdentifier"); - String fileName = fileJsonObject.getString("fileName"); - String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); - // If this is an S3 store, we need to split out the bucket name - String[] bits = parts[1].split(":"); - if (bits.length > 1) { - } - String fileId = bits[bits.length - 1]; - - List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) - .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) - .filter(Objects::nonNull).collect(Collectors.toList()); - if (newfileJsonObject != null) { - logger.fine("List Size: " + newfileJsonObject.size()); - // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { - JsonPatch path = Json.createPatchBuilder() - .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); - fileJsonObject = path.apply(fileJsonObject); - path = Json.createPatchBuilder() - .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); - fileJsonObject = path.apply(fileJsonObject); - addFilesJsonData.add(fileJsonObject); - countSuccess++; - // } else { - // globusLogger.info(fileName - // + " will be skipped from adding to dataset by second API due to missing - // values "); - // countError++; - // } - } else { - globusLogger.info(fileName - + " will be skipped from adding to dataset by second API due to missing values "); - countError++; - } - } - - String newjsonData = addFilesJsonData.build().toString(); - - globusLogger.info("Successfully generated new JsonData for addFiles call"); - - /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " - + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" - + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command);*/ - - // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of - // calling API - - // a quick experimental AddReplaceFileHelper implementation: - AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dataverseRequestSvc.getDataverseRequest(), - this.ingestSvc, - this.datasetSvc, - this.dataFileSvc, - this.permissionSvc, - this.commandEngine, - this.systemConfig - ); - - Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - - if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { - // if(!taskSkippedFiles) - if (countError == 0) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, - dataset.getId(), countSuccess + " files added out of " + countAll, true); - } else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), - countSuccess + " files added out of " + countAll, true); - } - globusLogger.info("Successfully completed addFiles call "); - } else { - globusLogger.log(Level.SEVERE, - "******* Error while executing addFiles ", newjsonData); - } - - } - - globusLogger.info("Files processed: " + countAll.toString()); - globusLogger.info("Files added successfully: " + countSuccess.toString()); - globusLogger.info("Files failures: " + countError.toString()); - globusLogger.info("Finished upload via Globus job."); - + processUploadedFiles(filesJsonArray, dataset, authUser, globusLogger); } catch (Exception e) { - logger.info("Exception from globusUpload call "); + logger.info("Exception from processUploadedFiles call "); e.printStackTrace(); - globusLogger.info("Exception from globusUpload call " + e.getMessage()); + globusLogger.info("Exception from processUploadedFiles call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); // } @@ -883,7 +808,164 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S fileHandler.close(); } } + /** + * The code in this method is copy-and-pasted from the previous Borealis + * implemenation + * @todo see if it can be refactored and simplified a bit, the json manipulation + * specifically (?) + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param dataset the dataset + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @throws IOException, InterruptedException, ExecutionException @todo may need to throw more exceptions (?) + */ + private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, AuthenticatedUser authUser, Logger myLogger) throws IOException, InterruptedException, ExecutionException { + myLogger = myLogger != null ? myLogger : logger; + + Integer countAll = 0; + Integer countSuccess = 0; + Integer countError = 0; + + List inputList = new ArrayList(); + + String datasetIdentifier = dataset.getAuthorityForFileStorage() + "/" + + dataset.getIdentifierForFileStorage(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + // storageIdentifier s3://gcs5-bucket1:1781cfeb8a7-748c270a227c from + // externalTool + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + String storeId = parts[0]; + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + String bucketName = ""; + if (bits.length > 1) { + bucketName = bits[0]; + } + String fileId = bits[bits.length - 1]; + + // fullpath s3://gcs5-bucket1/10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + // or globus:///10.5072/FK2/3S6G2E/1781cfeb8a7-4ad9418a5873 + String fullPath = storeId + "://" + bucketName + "/" + datasetIdentifier + "/" + fileId; + String fileName = fileJsonObject.getString("fileName"); + + inputList.add(fileId + "IDsplit" + fullPath + "IDsplit" + fileName); + } + + // calculateMissingMetadataFields: checksum, mimetype + JsonObject newfilesJsonObject = calculateMissingMetadataFields(inputList, myLogger); + JsonArray newfilesJsonArray = newfilesJsonObject.getJsonArray("files"); + logger.fine("Size: " + newfilesJsonArray.size()); + logger.fine("Val: " + JsonUtil.prettyPrint(newfilesJsonArray.getJsonObject(0))); + JsonArrayBuilder addFilesJsonData = Json.createArrayBuilder(); + + for (JsonObject fileJsonObject : filesJsonArray.getValuesAs(JsonObject.class)) { + + countAll++; + String storageIdentifier = fileJsonObject.getString("storageIdentifier"); + String fileName = fileJsonObject.getString("fileName"); + String[] parts = DataAccess.getDriverIdAndStorageLocation(storageIdentifier); + // If this is an S3 store, we need to split out the bucket name + String[] bits = parts[1].split(":"); + if (bits.length > 1) { + } + String fileId = bits[bits.length - 1]; + + List newfileJsonObject = IntStream.range(0, newfilesJsonArray.size()) + .mapToObj(index -> ((JsonObject) newfilesJsonArray.get(index)).getJsonObject(fileId)) + .filter(Objects::nonNull).collect(Collectors.toList()); + if (newfileJsonObject != null) { + logger.fine("List Size: " + newfileJsonObject.size()); + // if (!newfileJsonObject.get(0).getString("hash").equalsIgnoreCase("null")) { + JsonPatch path = Json.createPatchBuilder() + .add("/md5Hash", newfileJsonObject.get(0).getString("hash")).build(); + fileJsonObject = path.apply(fileJsonObject); + path = Json.createPatchBuilder() + .add("/mimeType", newfileJsonObject.get(0).getString("mime")).build(); + fileJsonObject = path.apply(fileJsonObject); + addFilesJsonData.add(fileJsonObject); + countSuccess++; + // } else { + // globusLogger.info(fileName + // + " will be skipped from adding to dataset by second API due to missing + // values "); + // countError++; + // } + } else { + myLogger.info(fileName + + " will be skipped from adding to dataset in the final AddReplaceFileHelper.addFiles() call. "); + countError++; + } + } + + String newjsonData = addFilesJsonData.build().toString(); + + myLogger.info("Successfully generated new JsonData for addFiles call"); + + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; + System.out.println("*******====command ==== " + command);*/ + // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of + // calling API + // a quick experimental AddReplaceFileHelper implementation: + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( + dataverseRequestSvc.getDataverseRequest(), + this.ingestSvc, + this.datasetSvc, + this.dataFileSvc, + this.permissionSvc, + this.commandEngine, + this.systemConfig + ); + + Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); + + if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { + // if(!taskSkippedFiles) + if (countError == 0) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll, true); + } + myLogger.info("Successfully completed addFiles call "); + } else { + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + } + myLogger.info("Files processed: " + countAll); + myLogger.info("Files added successfully: " + countSuccess); + myLogger.info("Files failures: " + countError); + myLogger.info("Finished upload via Globus job."); + + } + + /** + * I don't think this method is needed at all. (I suspect that it's a remnant + * from the times when *multiple* individual /add calls needed to be performed + * for each file being added. So this was part of a framework that attempted + * to run this calls in parallel, potentially speeding things up (similarly to + * how the checksums are being calculated in parallel for multiple files). + * As of now, this method doesn't do anything "asynchronous" - there is one + * /addFiles call, and the method below will wait for it to complete, via the + * CompletableFuture.get(). (L.A.) + * @param curlCommand + * @param globusLogger + * @return + * @throws ExecutionException + * @throws InterruptedException + */ public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { @@ -983,7 +1065,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro // If the rules_cache times out, the permission will be deleted. Presumably that // doesn't affect a // globus task status check - GlobusTask task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); String ruleId = getRuleId(endpoint, task.getOwner_id(), "r"); if (ruleId != null) { logger.fine("Found rule: " + ruleId); @@ -999,6 +1081,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro logger.warning("ruleId not found for taskId: " + taskIdentifier); } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + // @todo null check String taskStatus = getTaskStatus(task); // Transfer is done (success or failure) so delete the rule @@ -1033,61 +1116,38 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro Executor executor = Executors.newFixedThreadPool(10); - private GlobusTask globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) + private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId, Logger globusLogger) throws MalformedURLException { - boolean taskCompletion = false; - String status = ""; - GlobusTask task = null; + boolean taskCompleted = false; + GlobusTaskState task = null; int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 50); do { try { globusLogger.info("checking globus transfer task " + taskId); Thread.sleep(pollingInterval * 1000); + // Call the (centralized) Globus API to check on the task state/status: task = getTask(endpoint.getClientToken(), taskId, globusLogger); - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress. - if (status.equalsIgnoreCase("ACTIVE")) { - if (task.getNice_status().equalsIgnoreCase("ok") - || task.getNice_status().equalsIgnoreCase("queued")) { - taskCompletion = false; - } else { - taskCompletion = true; - // status = "FAILED" + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // The task is either succeeded, failed or inactive. - taskCompletion = true; - // status = status + "#" + task.getNice_status() + "#" + - // task.getNice_status_short_description(); - } - } else { - // status = "FAILED"; - taskCompletion = true; - } - } else { - // status = "FAILED"; - taskCompletion = true; - } + taskCompleted = GlobusUtil.isTaskCompleted(task); } catch (Exception ex) { ex.printStackTrace(); } - } while (!taskCompletion); + } while (!taskCompleted); globusLogger.info("globus transfer task completed successfully"); return task; } - - private String getTaskStatus(GlobusTask task) { + + private String getTaskStatus(GlobusTaskState task) { String status = null; if (task != null) { status = task.getStatus(); if (status != null) { // The task is in progress but is not ok or queued + // (L.A.) I think the assumption here is that this method is called + // exclusively on tasks that have already completed. So that's why + // it is safe to assume that "ACTIVE" means "FAILED". if (status.equalsIgnoreCase("ACTIVE")) { status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); } else { @@ -1158,7 +1218,16 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) String fileName = id.split("IDsplit")[2]; // ToDo: what if the file does not exist in s3 + // (L.A.) - good question. maybe it should call .open and .exists() here? + // otherwise, there doesn't seem to be any diagnostics as to which + // files uploaded successfully and which failed (?) + // ... however, any partially successful upload cases should be + // properly handled later, during the .addFiles() call - only + // the files that actually exists in storage remotely will be + // added to the dataset permanently then. // ToDo: what if checksum calculation failed + // (L.A.) - this appears to have been addressed - by using "Not available in Dataverse" + // in place of a checksum. String storageDriverId = DataAccess.getDriverIdAndStorageLocation(fullPath)[0]; @@ -1180,8 +1249,6 @@ private FileDetailsHolder calculateDetails(String id, Logger globusLogger) logger.info(ex.getMessage()); Thread.sleep(5000); } - - } while (count < 3); } @@ -1311,5 +1378,57 @@ public void writeGuestbookAndStartTransfer(GuestbookResponse guestbookResponse, } } } + + public List findAllOngoingTasks() { + return em.createQuery("select object(o) from GlobusTaskInProgress as o order by o.startTime", GlobusTaskInProgress.class).getResultList(); + } + + public void deleteTask(GlobusTaskInProgress task) { + GlobusTaskInProgress mergedTask = em.merge(task); + em.remove(mergedTask); + } + + public List findExternalUploadsByTaskId(String taskId) { + return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); + } + + // @todo this may or may not need to be async (?) + public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { + List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); + + if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { + // @todo log error message; do nothing + return; + } + Dataset dataset = globusTask.getDataset(); + AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); + if (authUser == null) { + // @todo log error message; do nothing + return; + } + + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); + + for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { + String jsonInfoString = pendingFile.getFileInfo(); + JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); + filesJsonArrayBuilder.add(fileObject); + } + + JsonArray filesJsonArray = filesJsonArrayBuilder.build(); + + if (filesJsonArray == null || filesJsonArray.size() < 1) { + // @todo log error message; do nothing + return; + } + + try { + processUploadedFiles(filesJsonArray, dataset, authUser, null); + } catch (Exception ex) { + // @todo log error message; make sure the error notification to the + // has been sent (may or may not have already been sent inside the + // method above). + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java similarity index 93% rename from src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java rename to src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java index c2b01779f4a..b5db20d46c1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTask.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskState.java @@ -1,6 +1,10 @@ package edu.harvard.iq.dataverse.globus; -public class GlobusTask { +/** + * This class is used to store the state of an ongoing Globus task (transfer) + * as reported by the Globus task API. + */ +public class GlobusTaskState { private String DATA_TYPE; private String type; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java index 92cf8ac7704..67594ad1a5e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -30,4 +30,37 @@ public static JsonObject getFilesMap(List dataFiles, Dataset d) { } return filesBuilder.build(); } + + public static boolean isTaskCompleted(GlobusTaskState task) { + if (task != null) { + String status = task.getStatus(); + if (status != null) { + if (status.equalsIgnoreCase("ACTIVE")) { + if (task.getNice_status().equalsIgnoreCase("ok") + || task.getNice_status().equalsIgnoreCase("queued")) { + return false; + } + } + } + } + return true; + } + + public static boolean isTaskSucceeded(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + status = status.toUpperCase(); + if (status.equals("ACTIVE") || status.startsWith("FAILED") || status.startsWith("INACTIVE")) { + // There are cases where a failed task may still be showing + // as "ACTIVE". But it is definitely safe to assume that it + // has not completed *successfully*. + return false; + } + return true; + } + } + return false; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 9bacafd173f..3f76a319902 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -345,12 +345,13 @@ public List saveAndAddFilesToDataset(DatasetVersion version, StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); - + // (this will make a remote call to check if the file exists + // and obtain its size) confirmedFileSize = dataAccess.getSize(); // For directly-uploaded files, we will perform the file size // limit and quota checks here. Perform them *again*, in - // some cases: a directly uploaded files have already been + // some cases: files directly uploaded via the UI have already been // checked (for the sake of being able to reject the upload // before the user clicks "save"). But in case of direct // uploads via API, these checks haven't been performed yet, diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 021977ff8c6..746e6e3b75d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -91,6 +91,11 @@ public enum FeatureFlags { * @since Dataverse 6.3 */ DISABLE_RETURN_TO_AUTHOR_REASON("disable-return-to-author-reason"), + /** + * TEMPORARY feature flag for the new Globus upload framework (will only be + * used for testing). + */ + GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; final String flag; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index f9801419e47..7417a5db4d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -82,6 +82,7 @@ public class SystemConfig { private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; + private static final String JVM_GLOBUS_TASK_MONITORING_OPTION = "dataverse.globus.taskMonitoringServer"; private static final long DEFAULT_GUESTBOOK_RESPONSES_DISPLAY_LIMIT = 5000L; private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_IMAGE = 3000000L; // 3 MB @@ -545,6 +546,14 @@ public boolean isTimerServer() { } return false; } + + public boolean isGlobusTaskMonitoringServer() { + String optionValue = System.getProperty(JVM_GLOBUS_TASK_MONITORING_OPTION); + if ("true".equalsIgnoreCase(optionValue)) { + return true; + } + return false; + } public String getFooterCopyrightAndYear() { return BundleUtil.getStringFromBundle("footer.copyright", Arrays.asList(Year.now().getValue() + "")); From dac53023309a17b6761388203a6040ee7199f382 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 13:14:00 -0400 Subject: [PATCH 04/31] new class files that weren't included in the last commit #10623 --- .../ExternalFileUploadInProgress.java | 131 ++++++++++++ .../globus/GlobusTaskInProgress.java | 188 ++++++++++++++++++ .../globus/TaskMonitoringServiceBean.java | 78 ++++++++ 3 files changed, 397 insertions(+) create mode 100644 src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java new file mode 100644 index 00000000000..dadde64608c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -0,0 +1,131 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse; + +import jakarta.persistence.Column; +import jakarta.persistence.ManyToOne; +import jakarta.persistence.NamedQueries; +import jakarta.persistence.NamedQuery; +import java.io.Serializable; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; + +/** + * + * @author landreev + * + * The name of the class is provisional. I'm open to better-sounding alternatives, + * if anyone can think of any. + * But I wanted to avoid having the word "Globus" in the entity name. I'm adding + * it specifically for the Globus use case. But I'm guessing there's a chance + * this setup may come in handy for other types of datafile uploads that happen + * externally. (?) + */ +@NamedQueries({ + @NamedQuery( name="ExternalFileUploadInProgress.deleteByTaskId", + query="DELETE FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId"), + @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", + query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) +@Entity +public class ExternalFileUploadInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + /** + * Rather than saving various individual fields defining the datafile, + * which would essentially replicate the DataFile table, we are simply + * storing the full json record as passed to the API here. + */ + @Column(columnDefinition = "TEXT", nullable = false) + private String fileInfo; + + /** + * This is Globus-specific task id associated with the upload in progress + */ + @Column(nullable = false) + private String taskId; + + /** + * The Dataset to which the files are being added. + * (@todo may not be necessary? - since the corresponding task is tied to a specific + * dataset already?) + */ + /*@ManyToOne + private Dataset dataset;*/ + + /*public ExternalFileUploadInProgress(String taskId, Dataset dataset, String fileInfo) { + this.taskId = taskId; + this.fileInfo = fileInfo; + this.dataset = dataset; + }*/ + + public ExternalFileUploadInProgress(String taskId, String fileInfo) { + this.taskId = taskId; + this.fileInfo = fileInfo; + } + + public String getFileInfo() { + return fileInfo; + } + + public void setFileInfo(String fileInfo) { + this.fileInfo = fileInfo; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + /*public Dataset getDataset() { + return dataset; + } + + public void setDataset(Dataset dataset) { + this.dataset = dataset; + }*/ + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof ExternalFileUploadInProgress)) { + return false; + } + ExternalFileUploadInProgress other = (ExternalFileUploadInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.ExternalFileUploadInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java new file mode 100644 index 00000000000..7b12ec0a3ad --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -0,0 +1,188 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.authorization.users.ApiToken; +import jakarta.persistence.Column; +import jakarta.persistence.EnumType; +import jakarta.persistence.Enumerated; +import jakarta.persistence.ManyToOne; +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.Arrays; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; + +/** + * + * @author landreev + */ +@Entity +public class GlobusTaskInProgress implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + /** + * Globus-side identifier of the task in progress, upload or download + */ + @Column(nullable = false) + private String taskId; + + GlobusTaskInProgress(String taskIdentifier, TaskType taskType, Dataset dataset, String clientToken, ApiToken token, Timestamp timestamp) { + throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody + } + + /** + * I was considering giving this enum type a more specific name "TransferType" + * - but maybe there will be another use case where we need to keep track of + * Globus tasks that are not data transfers (?) + */ + public enum TaskType { + + UPLOAD("UPLOAD"), + DOWNLOAD("DOWNLOAD"); + + private final String text; + + private TaskType(final String text) { + this.text = text; + } + + public static TaskType fromString(String text) { + if (text != null) { + for (TaskType taskType : TaskType.values()) { + if (text.equals(taskType.text)) { + return taskType; + } + } + } + throw new IllegalArgumentException("TaskType must be one of these values: " + Arrays.asList(TaskType.values()) + "."); + } + + @Override + public String toString() { + return text; + } + } + + @Column(nullable = false) + @Enumerated(EnumType.STRING) + private TaskType taskType; + + /** + * Globus API token that should be used to monitor the status of the task + */ + @Column(nullable = false) + private String globusToken; + + /** + * This is the Dataverse API token of the user who initiated the Globus task + */ + private String apiToken; + + @ManyToOne + private Dataset dataset; + + @Column( nullable = false ) + private Timestamp startTime; + + + public GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String clientToken, String apiToken, Timestamp startTime) { + this.taskId = taskId; + this.taskType = taskType; + this.globusToken = clientToken; + this.apiToken = apiToken; + this.dataset = dataset; + this.startTime = startTime; + } + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public String getTaskId() { + return taskId; + } + + public void setTaskId(String taskId) { + this.taskId = taskId; + } + + public TaskType getTaskType() { + return taskType; + } + + public void setTaskType(TaskType taskType) { + this.taskType = taskType; + } + + public String getGlobusToken() { + return globusToken; + } + + public void setGlobusToken(String clientToken) { + this.globusToken = clientToken; + } + + public String getApiToken() { + return apiToken; + } + + public void setApiToken(String apiToken) { + this.apiToken = apiToken; + } + + public Dataset getDataset() { + return dataset; + } + + public void setDataset(Dataset dataset) { + this.dataset = dataset; + } + + public Timestamp getStartTime() { + return startTime; + } + + public void setStartTime(Timestamp startTime) { + this.startTime = startTime; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof GlobusTaskInProgress)) { + return false; + } + GlobusTaskInProgress other = (GlobusTaskInProgress) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.globus.GlobusTaskInProgress[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java new file mode 100644 index 00000000000..da31ded90db --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -0,0 +1,78 @@ +/* + * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license + * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template + */ +package edu.harvard.iq.dataverse.globus; + +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import jakarta.annotation.PostConstruct; +import jakarta.annotation.Resource; +import jakarta.ejb.EJB; +import jakarta.ejb.Singleton; +import jakarta.ejb.Startup; +import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; +import java.util.List; +import java.util.concurrent.TimeUnit; + +/** + * + * This Singleton monitors ongoing Globus tasks by checking with the centralized + * Globus API on the status of all the registered ongoing tasks. + * When a successful completion of a task is detected, the service triggers + * the execution of the associated tasks (for example, finalizing adding datafiles + * to the dataset on completion of a remote Globus upload). When a task fails or + * terminates abnormally, a message is logged and the task record is deleted + * from the database. + * + * @author landreev + */ +@Singleton +@Startup +public class TaskMonitoringServiceBean { + @Resource + ManagedScheduledExecutorService scheduler; + + @EJB + SystemConfig systemConfig; + @EJB + SettingsServiceBean settingsSvc; + @EJB + GlobusServiceBean globusService; + + @PostConstruct + public void init() { + if (systemConfig.isGlobusTaskMonitoringServer()) { + int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( + settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); + this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, + 0, pollingInterval, + TimeUnit.SECONDS); + } + } + + /** + * This method will be executed on a timer-like schedule, continuously + * monitoring all the ongoing external Globus tasks (transfers). + * @todo make sure the executions do not overlap/stack up + */ + public void checkOngoingTasks() { + List tasks = globusService.findAllOngoingTasks(); + + tasks.forEach(t -> { + GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); + if (GlobusUtil.isTaskCompleted(retrieved)) { + if (GlobusUtil.isTaskSucceeded(retrieved)) { + // Do our thing, finalize adding the files to the dataset + globusService.addFilesOnSuccess(t); + } + // Whether it finished successfully, or failed in the process, + // there's no need to keep monitoring this task, so we can + // delete it. + globusService.deleteTask(t); + // @todo double-check that the locks have been properly handled + } + }); + } + +} From 408034125998e5cd40dda2ff27374cab4bdacd21 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 12 Aug 2024 15:21:45 -0400 Subject: [PATCH 05/31] fixing some bad changes that got committed earlier #10623 --- .../ExternalFileUploadInProgress.java | 41 ++++++------------ .../dataverse/globus/GlobusServiceBean.java | 2 +- .../globus/GlobusTaskInProgress.java | 42 ++++++++++--------- 3 files changed, 35 insertions(+), 50 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java index dadde64608c..ab6a1798307 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -5,14 +5,15 @@ package edu.harvard.iq.dataverse; import jakarta.persistence.Column; -import jakarta.persistence.ManyToOne; +import jakarta.persistence.Index; import jakarta.persistence.NamedQueries; import jakarta.persistence.NamedQuery; +import jakarta.persistence.Table; import java.io.Serializable; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; /** * @@ -31,6 +32,7 @@ @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) @Entity +@Table(indexes = {@Index(columnList="taskid")}) public class ExternalFileUploadInProgress implements Serializable { private static final long serialVersionUID = 1L; @@ -51,29 +53,18 @@ public void setId(Long id) { * which would essentially replicate the DataFile table, we are simply * storing the full json record as passed to the API here. */ - @Column(columnDefinition = "TEXT", nullable = false) + @Column(columnDefinition = "TEXT", nullable=false) private String fileInfo; /** * This is Globus-specific task id associated with the upload in progress */ - @Column(nullable = false) + @Column(nullable=false) private String taskId; - /** - * The Dataset to which the files are being added. - * (@todo may not be necessary? - since the corresponding task is tied to a specific - * dataset already?) - */ - /*@ManyToOne - private Dataset dataset;*/ - - /*public ExternalFileUploadInProgress(String taskId, Dataset dataset, String fileInfo) { - this.taskId = taskId; - this.fileInfo = fileInfo; - this.dataset = dataset; - }*/ - + public ExternalFileUploadInProgress() { + } + public ExternalFileUploadInProgress(String taskId, String fileInfo) { this.taskId = taskId; this.fileInfo = fileInfo; @@ -95,14 +86,6 @@ public void setTaskId(String taskId) { this.taskId = taskId; } - /*public Dataset getDataset() { - return dataset; - } - - public void setDataset(Dataset dataset) { - this.dataset = dataset; - }*/ - @Override public int hashCode() { int hash = 0; diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index eb1eb47611a..6b78925beb0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -721,7 +721,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token, new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), new Timestamp(new Date().getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 7b12ec0a3ad..210f08710dc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -5,24 +5,26 @@ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.authorization.users.ApiToken; import jakarta.persistence.Column; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; import jakarta.persistence.ManyToOne; +import jakarta.persistence.Table; +import jakarta.persistence.UniqueConstraint; import java.io.Serializable; import java.sql.Timestamp; import java.util.Arrays; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; +import jakarta.persistence.Entity; +import jakarta.persistence.GeneratedValue; +import jakarta.persistence.GenerationType; +import jakarta.persistence.Id; /** * * @author landreev */ @Entity +@Table(uniqueConstraints = {@UniqueConstraint(columnNames = "taskid")}) public class GlobusTaskInProgress implements Serializable { private static final long serialVersionUID = 1L; @@ -33,13 +35,9 @@ public class GlobusTaskInProgress implements Serializable { /** * Globus-side identifier of the task in progress, upload or download */ - @Column(nullable = false) + @Column(nullable=false, unique = true) private String taskId; - GlobusTaskInProgress(String taskIdentifier, TaskType taskType, Dataset dataset, String clientToken, ApiToken token, Timestamp timestamp) { - throw new UnsupportedOperationException("Not supported yet."); // Generated from nbfs://nbhost/SystemFileSystem/Templates/Classes/Code/GeneratedMethodBody - } - /** * I was considering giving this enum type a more specific name "TransferType" * - but maybe there will be another use case where we need to keep track of @@ -73,14 +71,14 @@ public String toString() { } } - @Column(nullable = false) + @Column @Enumerated(EnumType.STRING) private TaskType taskType; /** * Globus API token that should be used to monitor the status of the task */ - @Column(nullable = false) + @Column private String globusToken; /** @@ -91,19 +89,23 @@ public String toString() { @ManyToOne private Dataset dataset; - @Column( nullable = false ) + @Column private Timestamp startTime; - - public GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String clientToken, String apiToken, Timestamp startTime) { - this.taskId = taskId; - this.taskType = taskType; - this.globusToken = clientToken; - this.apiToken = apiToken; + public GlobusTaskInProgress() { + } + + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, Timestamp startTime) { + this.taskId = taskId; + this.taskType = taskType; this.dataset = dataset; - this.startTime = startTime; + this.globusToken = globusToken; + this.apiToken = apiToken; + this.startTime = startTime; } + + public Long getId() { return id; } From e086a60aad4a758f3c6c1b1d2797985b7ade380a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 13 Aug 2024 09:25:35 -0400 Subject: [PATCH 06/31] cleanup #10623 --- .../harvard/iq/dataverse/globus/GlobusServiceBean.java | 9 ++++++++- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 8 ++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 6b78925beb0..9ab3a2df567 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -966,6 +966,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut * @throws ExecutionException * @throws InterruptedException */ + /* public String addFilesAsync(String curlCommand, Logger globusLogger) throws ExecutionException, InterruptedException { CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { @@ -1018,7 +1019,7 @@ private String addFiles(String curlCommand, Logger globusLogger) { } return status; - } + } */ @Asynchronous public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { @@ -1431,4 +1432,10 @@ public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { // method above). } } + + public void deleteExternalUploadRecords(String taskId) { + em.createNamedQuery("ExternalFileUploadInProgress.deleteByTaskId") + .setParameter("taskId", taskId) + .executeUpdate(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index da31ded90db..bd274d44e38 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -14,6 +14,7 @@ import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.logging.Logger; /** * @@ -30,6 +31,8 @@ @Singleton @Startup public class TaskMonitoringServiceBean { + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.globus.TaskMonitoringServiceBean"); + @Resource ManagedScheduledExecutorService scheduler; @@ -43,11 +46,14 @@ public class TaskMonitoringServiceBean { @PostConstruct public void init() { if (systemConfig.isGlobusTaskMonitoringServer()) { + logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); + } else { + logger.info("Skipping Globus task monitor initialization"); } } @@ -57,6 +63,7 @@ public void init() { * @todo make sure the executions do not overlap/stack up */ public void checkOngoingTasks() { + logger.info("Performing a scheduled external Globus task check"); List tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { @@ -69,6 +76,7 @@ public void checkOngoingTasks() { // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can // delete it. + globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); // @todo double-check that the locks have been properly handled } From 35ce7ef1892baa92b36e2350193eec4a63db6237 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 14 Aug 2024 14:37:38 -0400 Subject: [PATCH 07/31] more testing/debugging #10623 --- .../dataverse/globus/GlobusServiceBean.java | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 9ab3a2df567..3f6d0bf7c68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -60,6 +60,7 @@ import edu.harvard.iq.dataverse.dataaccess.GlobusAccessibleStore; import edu.harvard.iq.dataverse.dataaccess.StorageIO; import edu.harvard.iq.dataverse.datasetutility.AddReplaceFileHelper; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.ingest.IngestServiceBean; import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean; @@ -71,9 +72,12 @@ import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; import edu.harvard.iq.dataverse.util.json.JsonUtil; +import jakarta.json.JsonReader; import jakarta.persistence.EntityManager; import jakarta.persistence.PersistenceContext; +import jakarta.servlet.http.HttpServletRequest; import jakarta.ws.rs.core.Response; +import org.apache.http.util.EntityUtils; @Stateless @Named("GlobusServiceBean") @@ -810,7 +814,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S } /** * The code in this method is copy-and-pasted from the previous Borealis - * implemenation + * implemenation. * @todo see if it can be refactored and simplified a bit, the json manipulation * specifically (?) * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles @@ -907,6 +911,11 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Successfully generated new JsonData for addFiles call"); + myLogger.info("Files processed: " + countAll); + myLogger.info("Files added successfully: " + countSuccess); + myLogger.info("Files failures: " + countError); + myLogger.info("Finished upload via Globus job."); + /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; @@ -914,8 +923,15 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of // calling API // a quick experimental AddReplaceFileHelper implementation: + + // Passing null for the HttpServletRequest to make a new DataverseRequest. + // The parent method is executed asynchronously, so the real request + // that was associated with the original API call that triggered this upload + // cannot be obtained. + DataverseRequest dataverseRequest = new DataverseRequest(authUser, (HttpServletRequest)null); + AddReplaceFileHelper addFileHelper = new AddReplaceFileHelper( - dataverseRequestSvc.getDataverseRequest(), + dataverseRequest, this.ingestSvc, this.datasetSvc, this.dataFileSvc, @@ -923,10 +939,18 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut this.commandEngine, this.systemConfig ); + + // The old code had 2 sec. of sleep, so ... + Thread.sleep(2000); Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - if (Response.Status.OK.equals(addFilesResponse.getStatusInfo())) { + JsonReader jsonReader = Json.createReader(new StringReader((String) addFilesResponse.getEntity().toString())); + JsonObject jsonObject = jsonReader.readObject(); + String addFilesStatus = jsonObject.getString("status"); + String addFilesMessage = jsonObject.getJsonObject("data").getString("message"); + + if ("OK".equalsIgnoreCase(addFilesStatus)) { // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -942,13 +966,14 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); + // @todo send Failure notification + if (addFilesResponse != null) { + myLogger.info("addFilesResponse status: " + addFilesStatus); + myLogger.info("addFilesResponse message" + addFilesMessage); + } } - myLogger.info("Files processed: " + countAll); - myLogger.info("Files added successfully: " + countSuccess); - myLogger.info("Files failures: " + countError); - myLogger.info("Finished upload via Globus job."); - + } /** From d4b9bac8366a2b909c5c32cf5e3f4361d6abc7d5 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 16 Aug 2024 20:21:44 -0400 Subject: [PATCH 08/31] this is a working, but still work-in-progress state of things - needs some cleanup and refinements. #10623 --- .../iq/dataverse/DatasetServiceBean.java | 11 +- .../harvard/iq/dataverse/api/Datasets.java | 2 + .../datasetutility/AddReplaceFileHelper.java | 4 +- .../impl/UpdateDatasetVersionCommand.java | 26 +- .../dataverse/globus/GlobusServiceBean.java | 232 ++++++++++-------- .../globus/GlobusTaskInProgress.java | 13 +- .../globus/TaskMonitoringServiceBean.java | 14 +- 7 files changed, 182 insertions(+), 120 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..832d7192965 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -412,12 +412,20 @@ public boolean checkDatasetLock(Long datasetId) { List lock = lockCounter.getResultList(); return lock.size()>0; } - + + public List getLocksByDatasetId(Long datasetId) { + TypedQuery locksQuery = em.createNamedQuery("DatasetLock.getLocksByDatasetId", DatasetLock.class); + locksQuery.setParameter("datasetId", datasetId); + return locksQuery.getResultList(); + } + public List getDatasetLocksByUser( AuthenticatedUser user) { return listLocks(null, user); } + // @todo: we'll be better off getting rid of this method and using the other + // version of addDatasetLock() (that uses datasetId instead of Dataset). @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public DatasetLock addDatasetLock(Dataset dataset, DatasetLock lock) { lock.setDataset(dataset); @@ -467,6 +475,7 @@ public DatasetLock addDatasetLock(Long datasetId, DatasetLock.Reason reason, Lon * is {@code aReason}. * @param dataset the dataset whose locks (for {@code aReason}) will be removed. * @param aReason The reason of the locks that will be removed. + * @todo this should probably take dataset_id, not a dataset */ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void removeDatasetLocks(Dataset dataset, DatasetLock.Reason aReason) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index b93257bc0c3..4c547f5295f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4035,6 +4035,8 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return wr.getResponse(); } + // @todo check if the dataset is already locked! + JsonObject jsonObject = null; try { jsonObject = JsonUtil.getJsonObject(jsonData); diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 0143fced87c..2b35a4cc783 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2139,9 +2139,9 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "); + logger.log(Level.INFO, "Removed EditInProgress lock "+eipLock.getId()); } - + try { Command cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, clone); ((UpdateDatasetVersionCommand) cmd).setValidateLenient(true); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index 994f4c7dfb6..faf8884b08d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -14,6 +14,7 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -102,7 +103,10 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); + //ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); + // this is an experiment (probably temporary) + checkUpdateDatasetVersionLock(ctxt); + Dataset savedDataset = null; DatasetVersion persistedVersion = clone; @@ -297,5 +301,23 @@ public boolean onSuccess(CommandContext ctxt, Object r) { ctxt.index().asyncIndexDataset((Dataset) r, true); return true; } - + + private void checkUpdateDatasetVersionLock(CommandContext ctxt) throws IllegalCommandException { + List locks = ctxt.datasets().getLocksByDatasetId(getDataset().getId()); + //locks.forEach(lock -> { + for (DatasetLock lock : locks) { + // Ingest lock is ok: + if (DatasetLock.Reason.Ingest != lock.getReason()) { + // with Workflow lock *some* users can edit; + // any other kind of lock - nope + if (DatasetLock.Reason.Workflow != lock.getReason() + || !ctxt.permissions().isMatchingWorkflowLock(getDataset(), + getUser().getIdentifier(), + getRequest().getWFInvocationId())) { + throw new IllegalCommandException( + BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), this); + } + } + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 3f6d0bf7c68..03134d811a7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -51,6 +51,7 @@ import org.primefaces.PrimeFaces; import com.google.gson.Gson; +import edu.harvard.iq.dataverse.api.ApiConstants; import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; import edu.harvard.iq.dataverse.authorization.users.ApiToken; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; @@ -68,6 +69,7 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; +import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.URLTokenUtil; import edu.harvard.iq.dataverse.util.UrlSignerUtil; @@ -148,7 +150,7 @@ private String getRuleId(GlobusEndpoint endpoint, String principal, String permi * @param ruleId - Globus rule id - assumed to be associated with the * dataset's file path (should not be called with a user * specified rule id w/o further checking) - * @param datasetId - the id of the dataset associated with the rule + * @param dataset - the dataset associated with the rule * @param globusLogger - a separate logger instance, may be null */ public void deletePermission(String ruleId, Dataset dataset, Logger globusLogger) { @@ -690,7 +692,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; + return; // @todo ? } if (fileHandlerSuceeded) { @@ -706,8 +708,8 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S String taskIdentifier = jsonData.getString("taskIdentifier"); GlobusEndpoint endpoint = getGlobusEndpoint(dataset); - GlobusTaskState task = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); - String ruleId = getRuleId(endpoint, task.getOwner_id(), "rw"); + GlobusTaskState taskState = getTask(endpoint.getClientToken(), taskIdentifier, globusLogger); + String ruleId = getRuleId(endpoint, taskState.getOwner_id(), "rw"); logger.fine("Found rule: " + ruleId); if (ruleId != null) { Long datasetId = rulesCache.getIfPresent(ruleId); @@ -725,7 +727,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(new Date().getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -746,17 +748,16 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // sleeping-then-checking the task status repeatedly: // globus task status check - // (the method below performs continuous looped checks of the remote + // (the following method performs continuous looped checks of the remote // Globus API, monitoring it for as long as it takes for the task to // finish one way or another!) - task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); + taskState = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check, or make sure it's never null - String taskStatus = getTaskStatus(task); + String taskStatus = getTaskStatus(taskState); if (ruleId != null) { // Transfer is complete, so delete rule deletePermission(ruleId, dataset, globusLogger); - } // If success, switch to an EditInProgress lock - do this before removing the @@ -764,8 +765,17 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Keeping a lock through the add datafiles API call avoids a conflicting edit // and keeps any open dataset page refreshing until the datafile appears if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { - datasetSvc.addDatasetLock(dataset, - new DatasetLock(DatasetLock.Reason.EditInProgress, authUser, "Completing Globus Upload")); + globusLogger.info("Finished upload via Globus job."); + + DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), + DatasetLock.Reason.EditInProgress, + (authUser).getId(), + "Completing Globus Upload"); + if (editLock != null) { + dataset.addLock(editLock); + } else { + globusLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } } DatasetLock gLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); @@ -785,7 +795,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S */ datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); } - + if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + taskStatus.split("#")[2]; @@ -911,10 +921,9 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Successfully generated new JsonData for addFiles call"); - myLogger.info("Files processed: " + countAll); - myLogger.info("Files added successfully: " + countSuccess); - myLogger.info("Files failures: " + countError); - myLogger.info("Finished upload via Globus job."); + myLogger.info("Files passed to /addGlobusFiles: " + countAll); + myLogger.info("Files processed successfully: " + countSuccess); + myLogger.info("Files failures to process: " + countError); /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" @@ -925,7 +934,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut // a quick experimental AddReplaceFileHelper implementation: // Passing null for the HttpServletRequest to make a new DataverseRequest. - // The parent method is executed asynchronously, so the real request + // The parent method is always executed asynchronously, so the real request // that was associated with the original API call that triggered this upload // cannot be obtained. DataverseRequest dataverseRequest = new DataverseRequest(authUser, (HttpServletRequest)null); @@ -945,12 +954,38 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut Response addFilesResponse = addFileHelper.addFiles(newjsonData, dataset, authUser); - JsonReader jsonReader = Json.createReader(new StringReader((String) addFilesResponse.getEntity().toString())); - JsonObject jsonObject = jsonReader.readObject(); - String addFilesStatus = jsonObject.getString("status"); - String addFilesMessage = jsonObject.getJsonObject("data").getString("message"); + if (addFilesResponse == null) { + logger.info("null response from addFiles call"); + //@todo add this case to the user notification in case of error + return; + } + + JsonObject addFilesJsonObject = JsonUtil.getJsonObject(addFilesResponse.getEntity().toString()); - if ("OK".equalsIgnoreCase(addFilesStatus)) { + // @todo null checks etc. + String addFilesStatus = addFilesJsonObject.getString("status", null); + myLogger.info("addFilesResponse status: " + addFilesStatus); + + + if (ApiConstants.STATUS_OK.equalsIgnoreCase(addFilesStatus)) { + if (addFilesJsonObject.containsKey("data")) { + JsonObject responseFilesData = addFilesJsonObject.getJsonObject("data"); + if (responseFilesData.containsKey("Result")) { + JsonObject addFilesResult = responseFilesData.getJsonObject("Result"); + + Integer addFilesTotal = addFilesResult.getInt("Total number of files", -1); + Integer addFilesSuccess = addFilesResult.getInt("Number of files successfully added", -1); + // @todo handle -1 (missing values) above + // @todo log all this stuff in a task-specific log (??) + myLogger.info("Files processed by addFiles: " + addFilesTotal + ", successfully added: " + addFilesSuccess); + // @todo incorporate this into the user notification + } else { + logger.warning("Malformed addFiles data section: "+ responseFilesData.toString()); + } + } else { + logger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); + } + // if(!taskSkippedFiles) if (countError == 0) { userNotificationService.sendNotification((AuthenticatedUser) authUser, @@ -963,89 +998,21 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut countSuccess + " files added out of " + countAll, true); } myLogger.info("Successfully completed addFiles call "); + } else if (ApiConstants.STATUS_ERROR.equalsIgnoreCase(addFilesStatus)) { + String addFilesMessage = addFilesJsonObject.getString("message", null); + + myLogger.log(Level.SEVERE, + "******* Error while executing addFiles ", newjsonData); + myLogger.log(Level.SEVERE, "****** Output from addFiles: ", addFilesMessage); + // @todo send Failure notification + } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); // @todo send Failure notification - if (addFilesResponse != null) { - myLogger.info("addFilesResponse status: " + addFilesStatus); - myLogger.info("addFilesResponse message" + addFilesMessage); - } } - - } - /** - * I don't think this method is needed at all. (I suspect that it's a remnant - * from the times when *multiple* individual /add calls needed to be performed - * for each file being added. So this was part of a framework that attempted - * to run this calls in parallel, potentially speeding things up (similarly to - * how the checksums are being calculated in parallel for multiple files). - * As of now, this method doesn't do anything "asynchronous" - there is one - * /addFiles call, and the method below will wait for it to complete, via the - * CompletableFuture.get(). (L.A.) - * @param curlCommand - * @param globusLogger - * @return - * @throws ExecutionException - * @throws InterruptedException - */ - /* - public String addFilesAsync(String curlCommand, Logger globusLogger) - throws ExecutionException, InterruptedException { - CompletableFuture addFilesFuture = CompletableFuture.supplyAsync(() -> { - try { - Thread.sleep(2000); - } catch (InterruptedException e) { - e.printStackTrace(); - } - return (addFiles(curlCommand, globusLogger)); - }, executor).exceptionally(ex -> { - globusLogger.fine("Something went wrong : " + ex.getLocalizedMessage()); - ex.printStackTrace(); - return null; - }); - - String result = addFilesFuture.get(); - - return result; - } - - private String addFiles(String curlCommand, Logger globusLogger) { - ProcessBuilder processBuilder = new ProcessBuilder(); - Process process = null; - String line; - String status = ""; - - try { - globusLogger.info("Call to : " + curlCommand); - processBuilder.command("bash", "-c", curlCommand); - process = processBuilder.start(); - process.waitFor(); - - BufferedReader br = new BufferedReader(new InputStreamReader(process.getInputStream())); - - StringBuilder sb = new StringBuilder(); - while ((line = br.readLine()) != null) - sb.append(line); - globusLogger.info(" API Output : " + sb.toString()); - JsonObject jsonObject = null; - jsonObject = JsonUtil.getJsonObject(sb.toString()); - - status = jsonObject.getString("status"); - } catch (Exception ex) { - if (ex instanceof JsonParsingException) { - globusLogger.log(Level.SEVERE, "Error parsing dataset json."); - } else { - globusLogger.log(Level.SEVERE, - "******* Unexpected Exception while executing api/datasets/:persistentId/add call ", ex); - } - } - - return status; - } */ - @Asynchronous public void globusDownload(String jsonData, Dataset dataset, User authUser) throws MalformedURLException { @@ -1418,22 +1385,75 @@ public List findExternalUploadsByTaskId(String tas return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); } - // @todo this may or may not need to be async (?) - public void addFilesOnSuccess(GlobusTaskInProgress globusTask) { - List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); - - if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { - // @todo log error message; do nothing - return; - } + // @todo duplicated code, merge with the code handling the "classic" upload workflow + public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess) { + String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); + if (ruleId != null) { + // Transfer is complete, so delete rule + deletePermission(ruleId, dataset, logger); + } + AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing return; } + + // Switch the locks on the dataset: + // @todo is it necessary? what is wrong exactly with keeping the Globus + // lock on for the duration of the process? + if (taskSuccess) { + DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), + DatasetLock.Reason.EditInProgress, + (authUser).getId(), + "Completing Globus Upload"); + if (editLock != null) { + dataset.addLock(editLock); + } else { + logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + } + } + + // Remove the Globus lock, regardless of whether this is a success or failure + DatasetLock globusUploadLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); + if (globusUploadLock == null) { + logger.log(Level.WARNING, "No GlobusUpload lock found for dataset"); + } else { + logger.log(Level.FINE, "Removing GlobusUpload lock " + globusUploadLock.getId()); + /* + * Note: This call to remove a lock only works immediately because it is in + * another service bean. Despite the removeDatasetLocks method having the + * REQUIRES_NEW transaction annotation, when the globusUpload method and that + * method were in the same bean (globusUpload was in the DatasetServiceBean to + * start), the globus lock was still seen in the API call initiated in the + * addFilesAsync method called within the globusUpload method. I.e. it appeared + * that the lock removal was not committed/visible outside this method until + * globusUpload itself ended. + */ + datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); + } + + if (taskSuccess && GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { + List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); + + if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { + // @todo log error message; do nothing + return; + } + addFilesOnSuccess(dataset, authUser, fileUploadsInProgress); + } + // Handle locks/rules/etc. (?) + if (ruleId != null) { + deletePermission(ruleId, dataset, logger); + logger.info("Removed upload permission: " + ruleId); + } + } + + public void addFilesOnSuccess(Dataset dataset, AuthenticatedUser authUser, List fileUploadsInProgress) { + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 210f08710dc..d3d06d38151 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -86,6 +86,9 @@ public String toString() { */ private String apiToken; + @Column + private String ruleId; + @ManyToOne private Dataset dataset; @@ -95,12 +98,13 @@ public String toString() { public GlobusTaskInProgress() { } - GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, Timestamp startTime) { + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, String ruleId, Timestamp startTime) { this.taskId = taskId; this.taskType = taskType; this.dataset = dataset; this.globusToken = globusToken; this.apiToken = apiToken; + this.ruleId = ruleId; this.startTime = startTime; } @@ -146,6 +150,13 @@ public void setApiToken(String apiToken) { this.apiToken = apiToken; } + public String getRuleId() { + return ruleId; + } + + public void setRuleId(String ruleId) { + this.ruleId = ruleId; + } public Dataset getDataset() { return dataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index bd274d44e38..4db25072b6b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -49,6 +49,7 @@ public void init() { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); + // @todo scheduleAtFixedDelay() this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); @@ -65,20 +66,17 @@ public void init() { public void checkOngoingTasks() { logger.info("Performing a scheduled external Globus task check"); List tasks = globusService.findAllOngoingTasks(); - + tasks.forEach(t -> { GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); if (GlobusUtil.isTaskCompleted(retrieved)) { - if (GlobusUtil.isTaskSucceeded(retrieved)) { - // Do our thing, finalize adding the files to the dataset - globusService.addFilesOnSuccess(t); - } + // Do our thing, finalize adding the files to the dataset + globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved)); // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can - // delete it. - globusService.deleteExternalUploadRecords(t.getTaskId()); + // delete it. + //globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); - // @todo double-check that the locks have been properly handled } }); } From 9c62b81c484111df3ed9162c8353aa50bd4b295b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 19 Aug 2024 10:47:29 -0400 Subject: [PATCH 09/31] refined logging #10623 --- .../dataverse/globus/GlobusServiceBean.java | 231 ++++++++---------- .../iq/dataverse/globus/GlobusUtil.java | 27 ++ .../globus/TaskMonitoringServiceBean.java | 61 ++++- 3 files changed, 191 insertions(+), 128 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 03134d811a7..5e68128c954 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -679,20 +679,24 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S throw new IllegalArgumentException("No valid json entries supplied for the files being uploaded"); } - String logTimestamp = logFormatter.format(new Date()); + Date startDate = new Date(); + + String logTimestamp = logFormatter.format(startDate); Logger globusLogger = Logger.getLogger( "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimestamp); - String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_id_" + dataset.getId() + "_" + logTimestamp + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded; + boolean fileHandlerSuceeded = false; try { fileHandler = new FileHandler(logFileName); globusLogger.setUseParentHandlers(false); fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - return; // @todo ? + //return; // @todo I don't think we need to return here? + fileHandler = null; + } if (fileHandlerSuceeded) { @@ -727,7 +731,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(new Date().getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(startDate.getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -740,6 +744,11 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S em.persist(fileUploadRecord); } + if (fileHandler != null) { + fileHandler.close(); + } + + // return and forget return; } @@ -753,19 +762,62 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // finish one way or another!) taskState = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check, or make sure it's never null - String taskStatus = getTaskStatus(taskState); + String taskStatus = GlobusUtil.getTaskStatus(taskState); + boolean taskSuccess = GlobusUtil.isTaskCompleted(taskState); + + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + + if (fileHandler != null) { + fileHandler.close(); + } + } + /** + * As the name suggests, the method completes and finalizes an upload task, + * whether it completed successfully or failed. (In the latter case, it + * simply sends a failure notification and does some cleanup). + * The method is called in both task monitoring scenarios: the old method, + * that relies on continuous looping, and the new, implemented on the basis + * of timer-like monitoring from a dedicated monitoring Singleton service. + * @param dataset the dataset + * @param filesJsonArray JsonArray containing files metadata entries as passed to /addGlobusFiles + * @param authUser the user that should be be performing the addFiles call + * finalizing adding the files to the Dataset. Note that this + * user will need to be obtained from the saved api token, when this + * method is called via the TaskMonitoringService + * @param ruleId Globus rule/permission id associated with the task + * @param myLogger the Logger; if null, the main logger of the service bean will be used + * @param fileHandler FileHandler associated with the Logger, when not null + * @param taskSuccess boolean task status of the completed task + * @param taskState human-readable task status label as reported by the Globus API + * the method should not throw any exceptions; all the exceptions thrown + * by the methods within are expected to be intercepted. + */ + private void processCompletedUploadTask(Dataset dataset, + JsonArray filesJsonArray, + AuthenticatedUser authUser, + String ruleId, + Logger globusLogger, + FileHandler fileHandler, + boolean taskSuccess, + String taskStatus) { + + Logger myLogger = globusLogger == null ? logger : globusLogger; + if (ruleId != null) { // Transfer is complete, so delete rule - deletePermission(ruleId, dataset, globusLogger); + deletePermission(ruleId, dataset, myLogger); } - + // If success, switch to an EditInProgress lock - do this before removing the // GlobusUpload lock // Keeping a lock through the add datafiles API call avoids a conflicting edit - // and keeps any open dataset page refreshing until the datafile appears - if (!(taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE"))) { - globusLogger.info("Finished upload via Globus job."); + // and keeps any open dataset page refreshing until the datafile appears. + + // @todo is it necessary? what is wrong exactly with keeping the Globus + // lock on for the duration of the process? + if (taskSuccess) { + myLogger.info("Finished upload via Globus job."); DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), DatasetLock.Reason.EditInProgress, @@ -774,7 +826,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S if (editLock != null) { dataset.addLock(editLock); } else { - globusLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); + myLogger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); } } @@ -792,36 +844,50 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S * addFilesAsync method called within the globusUpload method. I.e. it appeared * that the lock removal was not committed/visible outside this method until * globusUpload itself ended. + * (from @landreev:) If I understand the comment above correctly - annotations + * like "@TransactionAttribute(REQUIRES_NEW) do NOT work when you call a method + * directly within the same service bean. Strictly speaking, it's not the + * "within the same bean" part that is the key, rather, these annotations + * only apply when calling a method via an @EJB-defined service. So it + * is generally possible to call another method within FooServiceBean + * with the REQUIRES_NEW transaction taking effect - but then it would need + * to define *itself* as an @EJB - + * @EJB FooServiceBean fooSvc; + * ... + * fooSvc.doSomethingInNewTransaction(...); + * etc. */ datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); } - if (taskStatus.startsWith("FAILED") || taskStatus.startsWith("INACTIVE")) { + if (!taskSuccess) { String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + taskStatus.split("#")[2]; userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - globusLogger.info("Globus task failed "); + myLogger.info("Globus task failed "); } else { try { - processUploadedFiles(filesJsonArray, dataset, authUser, globusLogger); + processUploadedFiles(filesJsonArray, dataset, authUser, myLogger); } catch (Exception e) { logger.info("Exception from processUploadedFiles call "); e.printStackTrace(); - globusLogger.info("Exception from processUploadedFiles call " + e.getMessage()); + myLogger.info("Exception from processUploadedFiles call " + e.getMessage()); datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - // } } if (ruleId != null) { - deletePermission(ruleId, dataset, globusLogger); - globusLogger.info("Removed upload permission: " + ruleId); - } - if (fileHandlerSuceeded) { - fileHandler.close(); + deletePermission(ruleId, dataset, myLogger); + myLogger.info("Removed upload permission: " + ruleId); } + //if (fileHandler != null) { + // fileHandler.close(); + //} + } + + /** * The code in this method is copy-and-pasted from the previous Borealis * implemenation. @@ -1075,7 +1141,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); // @todo null check - String taskStatus = getTaskStatus(task); + String taskStatus = GlobusUtil.getTaskStatus(task); // Transfer is done (success or failure) so delete the rule if (ruleId != null) { @@ -1132,30 +1198,6 @@ private GlobusTaskState globusStatusCheck(GlobusEndpoint endpoint, String taskId return task; } - private String getTaskStatus(GlobusTaskState task) { - String status = null; - if (task != null) { - status = task.getStatus(); - if (status != null) { - // The task is in progress but is not ok or queued - // (L.A.) I think the assumption here is that this method is called - // exclusively on tasks that have already completed. So that's why - // it is safe to assume that "ACTIVE" means "FAILED". - if (status.equalsIgnoreCase("ACTIVE")) { - status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } else { - // The task is either succeeded, failed or inactive. - status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); - } - } else { - status = "FAILED"; - } - } else { - status = "FAILED"; - } - return status; - } - public JsonObject calculateMissingMetadataFields(List inputList, Logger globusLogger) throws InterruptedException, ExecutionException, IOException { @@ -1386,98 +1428,41 @@ public List findExternalUploadsByTaskId(String tas } // @todo duplicated code, merge with the code handling the "classic" upload workflow - public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess) { + public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); - - if (ruleId != null) { - // Transfer is complete, so delete rule - deletePermission(ruleId, dataset, logger); - } - AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing return; } - // Switch the locks on the dataset: - // @todo is it necessary? what is wrong exactly with keeping the Globus - // lock on for the duration of the process? - if (taskSuccess) { - DatasetLock editLock = datasetSvc.addDatasetLock(dataset.getId(), - DatasetLock.Reason.EditInProgress, - (authUser).getId(), - "Completing Globus Upload"); - if (editLock != null) { - dataset.addLock(editLock); - } else { - logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); - } - } - - // Remove the Globus lock, regardless of whether this is a success or failure - DatasetLock globusUploadLock = dataset.getLockFor(DatasetLock.Reason.GlobusUpload); - if (globusUploadLock == null) { - logger.log(Level.WARNING, "No GlobusUpload lock found for dataset"); - } else { - logger.log(Level.FINE, "Removing GlobusUpload lock " + globusUploadLock.getId()); - /* - * Note: This call to remove a lock only works immediately because it is in - * another service bean. Despite the removeDatasetLocks method having the - * REQUIRES_NEW transaction annotation, when the globusUpload method and that - * method were in the same bean (globusUpload was in the DatasetServiceBean to - * start), the globus lock was still seen in the API call initiated in the - * addFilesAsync method called within the globusUpload method. I.e. it appeared - * that the lock removal was not committed/visible outside this method until - * globusUpload itself ended. - */ - datasetSvc.removeDatasetLocks(dataset, DatasetLock.Reason.GlobusUpload); - } - - if (taskSuccess && GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { + if (GlobusTaskInProgress.TaskType.UPLOAD.equals(globusTask.getTaskType())) { List fileUploadsInProgress = findExternalUploadsByTaskId(globusTask.getTaskId()); if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { // @todo log error message; do nothing return; } - addFilesOnSuccess(dataset, authUser, fileUploadsInProgress); - } - - // Handle locks/rules/etc. (?) - if (ruleId != null) { - deletePermission(ruleId, dataset, logger); - logger.info("Removed upload permission: " + ruleId); - } - } - - public void addFilesOnSuccess(Dataset dataset, AuthenticatedUser authUser, List fileUploadsInProgress) { - - JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); - - for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { - String jsonInfoString = pendingFile.getFileInfo(); - JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); - filesJsonArrayBuilder.add(fileObject); - } - - JsonArray filesJsonArray = filesJsonArrayBuilder.build(); - - if (filesJsonArray == null || filesJsonArray.size() < 1) { - // @todo log error message; do nothing - return; - } - - try { - processUploadedFiles(filesJsonArray, dataset, authUser, null); - } catch (Exception ex) { - // @todo log error message; make sure the error notification to the - // has been sent (may or may not have already been sent inside the - // method above). + + JsonArrayBuilder filesJsonArrayBuilder = Json.createArrayBuilder(); + + for (ExternalFileUploadInProgress pendingFile : fileUploadsInProgress) { + String jsonInfoString = pendingFile.getFileInfo(); + JsonObject fileObject = JsonUtil.getJsonObject(jsonInfoString); + filesJsonArrayBuilder.add(fileObject); + } + + JsonArray filesJsonArray = filesJsonArrayBuilder.build(); + + //processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, null, taskSuccess, taskStatus); + } else { + // @todo extend this async. framework to handle Glonus downloads as well } + } - + public void deleteExternalUploadRecords(String taskId) { em.createNamedQuery("ExternalFileUploadInProgress.deleteByTaskId") .setParameter("taskId", taskId) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java index 67594ad1a5e..652898591ac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusUtil.java @@ -63,4 +63,31 @@ public static boolean isTaskSucceeded(GlobusTaskState task) { } return false; } + /** + * Produces a human-readable Status label of a completed task + * @param GlobusTaskState task - a looked-up state of a task as reported by Globus API + */ + public static String getTaskStatus(GlobusTaskState task) { + String status = null; + if (task != null) { + status = task.getStatus(); + if (status != null) { + // The task is in progress but is not ok or queued + // (L.A.) I think the assumption here is that this method is called + // exclusively on tasks that have already completed. So that's why + // it is safe to assume that "ACTIVE" means "FAILED". + if (status.equalsIgnoreCase("ACTIVE")) { + status = "FAILED" + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } else { + // The task is either succeeded, failed or inactive. + status = status + "#" + task.getNice_status() + "#" + task.getNice_status_short_description(); + } + } else { + status = "FAILED"; + } + } else { + status = "FAILED"; + } + return status; + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index 4db25072b6b..a74d0c3f747 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -12,8 +12,13 @@ import jakarta.ejb.Singleton; import jakarta.ejb.Startup; import jakarta.enterprise.concurrent.ManagedScheduledExecutorService; +import java.io.File; +import java.io.IOException; +import java.text.SimpleDateFormat; +import java.util.Date; import java.util.List; import java.util.concurrent.TimeUnit; +import java.util.logging.FileHandler; import java.util.logging.Logger; /** @@ -42,15 +47,16 @@ public class TaskMonitoringServiceBean { SettingsServiceBean settingsSvc; @EJB GlobusServiceBean globusService; - + + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + @PostConstruct public void init() { if (systemConfig.isGlobusTaskMonitoringServer()) { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 60); - // @todo scheduleAtFixedDelay() - this.scheduler.scheduleAtFixedRate(this::checkOngoingTasks, + this.scheduler.scheduleWithFixedDelay(this::checkOngoingTasks, 0, pollingInterval, TimeUnit.SECONDS); } else { @@ -68,17 +74,62 @@ public void checkOngoingTasks() { List tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { - GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), null); + FileHandler taskLogHandler = getTaskLogHandler(t); + Logger taskLogger = getTaskLogger(t, taskLogHandler); + + GlobusTaskState retrieved = globusService.getTask(t.getGlobusToken(), t.getTaskId(), taskLogger); if (GlobusUtil.isTaskCompleted(retrieved)) { // Do our thing, finalize adding the files to the dataset - globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved)); + globusService.processCompletedTask(t, GlobusUtil.isTaskSucceeded(retrieved), GlobusUtil.getTaskStatus(retrieved), taskLogger); // Whether it finished successfully, or failed in the process, // there's no need to keep monitoring this task, so we can // delete it. //globusService.deleteExternalUploadRecords(t.getTaskId()); globusService.deleteTask(t); } + + if (taskLogHandler != null) { + // @todo it should be prudent to cache these loggers and handlers + // between monitoring runs + taskLogHandler.close(); + } }); } + private FileHandler getTaskLogHandler(GlobusTaskInProgress task) { + if (task == null) { + return null; + } + + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + task.getDataset().getId() + "_" + logTimeStamp + + ".log"; + FileHandler fileHandler; + try { + fileHandler = new FileHandler(logFileName); + } catch (IOException | SecurityException ex) { + // @todo log this error somehow? + fileHandler = null; + } + return fileHandler; + } + + private Logger getTaskLogger(GlobusTaskInProgress task, FileHandler logFileHandler) { + if (logFileHandler == null) { + return null; + } + Date startDate = new Date(task.getStartTime().getTime()); + String logTimeStamp = logFormatter.format(startDate); + + Logger taskLogger = Logger.getLogger( + "edu.harvard.iq.dataverse.upload.client.DatasetServiceBean." + "GlobusUpload" + logTimeStamp); + taskLogger.setUseParentHandlers(false); + + taskLogger.addHandler(logFileHandler); + + return taskLogger; + } + } From 8cdff8d66eb655e8def3e35b21f1a3d438f5608a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 19 Aug 2024 19:51:43 -0400 Subject: [PATCH 10/31] Added notifications for various failure cases. #10623 --- .../harvard/iq/dataverse/MailServiceBean.java | 27 +++- .../iq/dataverse/UserNotification.java | 3 +- .../iq/dataverse/api/ApiConstants.java | 4 + .../providers/builtin/DataverseUserPage.java | 2 + .../datasetutility/AddReplaceFileHelper.java | 4 +- .../dataverse/globus/GlobusServiceBean.java | 120 ++++++++++-------- .../globus/TaskMonitoringServiceBean.java | 3 +- .../harvard/iq/dataverse/util/MailUtil.java | 17 +++ src/main/java/propertyFiles/Bundle.properties | 9 +- src/main/webapp/dataverseuser.xhtml | 14 ++ 10 files changed, 144 insertions(+), 59 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java index 7359ef8eb33..d29649ad3a6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/MailServiceBean.java @@ -623,6 +623,7 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return downloadCompletedMessage; + case GLOBUSUPLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); @@ -633,8 +634,30 @@ public String getMessageTextBasedOnNotification(UserNotification userNotificatio comment )) ; return uploadCompletedWithErrorsMessage; + + case GLOBUSUPLOADREMOTEFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedRemotelyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedRemotely", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedRemotelyMessage; - case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADLOCALFAILURE: + dataset = (Dataset) targetObject; + messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); + String uploadFailedLocallyMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.upload.failedLocally", Arrays.asList( + systemConfig.getDataverseSiteUrl(), + dataset.getGlobalId().asString(), + dataset.getDisplayName(), + comment + )) ; + return uploadFailedLocallyMessage; + + case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: dataset = (Dataset) targetObject; messageText = BundleUtil.getStringFromBundle("notification.email.greeting.html"); String downloadCompletedWithErrorsMessage = messageText + BundleUtil.getStringFromBundle("notification.mail.globus.download.completedWithErrors", Arrays.asList( @@ -763,6 +786,8 @@ public Object getObjectOfNotification (UserNotification userNotification){ return versionService.find(userNotification.getObjectId()); case GLOBUSUPLOADCOMPLETED: case GLOBUSUPLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: return datasetService.find(userNotification.getObjectId()); diff --git a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java index 280c2075494..2d37540fab3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/UserNotification.java +++ b/src/main/java/edu/harvard/iq/dataverse/UserNotification.java @@ -39,7 +39,8 @@ public enum Type { CHECKSUMIMPORT, CHECKSUMFAIL, CONFIRMEMAIL, APIGENERATED, INGESTCOMPLETED, INGESTCOMPLETEDWITHERRORS, PUBLISHFAILED_PIDREG, WORKFLOW_SUCCESS, WORKFLOW_FAILURE, STATUSUPDATED, DATASETCREATED, DATASETMENTIONED, GLOBUSUPLOADCOMPLETED, GLOBUSUPLOADCOMPLETEDWITHERRORS, - GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS; + GLOBUSDOWNLOADCOMPLETED, GLOBUSDOWNLOADCOMPLETEDWITHERRORS, REQUESTEDFILEACCESS, + GLOBUSUPLOADREMOTEFAILURE, GLOBUSUPLOADLOCALFAILURE; public String getDescription() { return BundleUtil.getStringFromBundle("notification.typeDescription." + this.name()); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index 347a8946a46..a2faaf3637c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -17,4 +17,8 @@ private ApiConstants() { public static final String DS_VERSION_LATEST = ":latest"; public static final String DS_VERSION_DRAFT = ":draft"; public static final String DS_VERSION_LATEST_PUBLISHED = ":latest-published"; + + // addFiles call + public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files"; + public static final String API_ADD_FILES_COUNT_SUCCESSFULL = "Number of files successfully added"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java index a0e3f899443..48afb2b830a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/builtin/DataverseUserPage.java @@ -528,6 +528,8 @@ public void displayNotification() { case GLOBUSUPLOADCOMPLETEDWITHERRORS: case GLOBUSDOWNLOADCOMPLETED: case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: + case GLOBUSUPLOADREMOTEFAILURE: + case GLOBUSUPLOADLOCALFAILURE: userNotification.setTheObject(datasetService.find(userNotification.getObjectId())); break; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 2b35a4cc783..336fa9b5b7a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2167,8 +2167,8 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { } JsonObjectBuilder result = Json.createObjectBuilder() - .add("Total number of files", totalNumberofFiles) - .add("Number of files successfully added", successNumberofFiles); + .add(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, totalNumberofFiles) + .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, successNumberofFiles); return Response.ok().entity(Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 5e68128c954..4bb478d26ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -766,7 +766,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S boolean taskSuccess = GlobusUtil.isTaskCompleted(taskState); - processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, taskSuccess, taskStatus); if (fileHandler != null) { fileHandler.close(); @@ -798,7 +798,6 @@ private void processCompletedUploadTask(Dataset dataset, AuthenticatedUser authUser, String ruleId, Logger globusLogger, - FileHandler fileHandler, boolean taskSuccess, String taskStatus) { @@ -861,11 +860,17 @@ private void processCompletedUploadTask(Dataset dataset, } if (!taskSuccess) { - String comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " - + taskStatus.split("#")[2]; + String comment; + if (taskStatus != null) { + comment = "Reason : " + taskStatus.split("#")[1] + "
Short Description : " + + taskStatus.split("#")[2]; + } else { + comment = "No further information available"; + } + + myLogger.info("Globus Upload task failed "); userNotificationService.sendNotification((AuthenticatedUser) authUser, new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), comment, true); - myLogger.info("Globus task failed "); + UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, dataset.getId(), comment, true); } else { try { @@ -908,6 +913,8 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut Integer countAll = 0; Integer countSuccess = 0; Integer countError = 0; + Integer countAddFilesSuccess = 0; + String notificationErrorMessage = ""; List inputList = new ArrayList(); @@ -991,13 +998,22 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut myLogger.info("Files processed successfully: " + countSuccess); myLogger.info("Files failures to process: " + countError); - /*String command = "curl -H \"X-Dataverse-key:" + token.getTokenString() + "\" -X POST " - + httpRequestUrl + "/api/datasets/:persistentId/addFiles?persistentId=doi:" - + datasetIdentifier + " -F jsonData='" + newjsonData + "'"; - System.out.println("*******====command ==== " + command);*/ - // ToDo - refactor to call AddReplaceFileHelper.addFiles directly instead of - // calling API - // a quick experimental AddReplaceFileHelper implementation: + if (countSuccess < 1) { + // We don't have any valid entries to call addFiles() for; so, no + // need to proceed. + notificationErrorMessage = "Failed to successfully process any of the file entries, " + + "out of the " + countAll + " total as submitted to Dataverse"; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADREMOTEFAILURE, + dataset.getId(), notificationErrorMessage, true); + return; + } else if (countSuccess < countAll) { + notificationErrorMessage = "Out of the " + countAll + " file entries submitted to /addGlobusFiles " + + "only " + countSuccess + " could be successfully parsed and processed. "; + } + + // A new AddReplaceFileHelper implementation, replacing the old one that + // was relying on calling /addFiles api via curl: // Passing null for the HttpServletRequest to make a new DataverseRequest. // The parent method is always executed asynchronously, so the real request @@ -1028,55 +1044,57 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut JsonObject addFilesJsonObject = JsonUtil.getJsonObject(addFilesResponse.getEntity().toString()); - // @todo null checks etc. + // @todo null check? String addFilesStatus = addFilesJsonObject.getString("status", null); myLogger.info("addFilesResponse status: " + addFilesStatus); - if (ApiConstants.STATUS_OK.equalsIgnoreCase(addFilesStatus)) { - if (addFilesJsonObject.containsKey("data")) { - JsonObject responseFilesData = addFilesJsonObject.getJsonObject("data"); - if (responseFilesData.containsKey("Result")) { - JsonObject addFilesResult = responseFilesData.getJsonObject("Result"); - - Integer addFilesTotal = addFilesResult.getInt("Total number of files", -1); - Integer addFilesSuccess = addFilesResult.getInt("Number of files successfully added", -1); - // @todo handle -1 (missing values) above - // @todo log all this stuff in a task-specific log (??) - myLogger.info("Files processed by addFiles: " + addFilesTotal + ", successfully added: " + addFilesSuccess); - // @todo incorporate this into the user notification - } else { - logger.warning("Malformed addFiles data section: "+ responseFilesData.toString()); - } - } else { - logger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); - } - - // if(!taskSkippedFiles) - if (countError == 0) { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, - dataset.getId(), countSuccess + " files added out of " + countAll, true); + if (addFilesJsonObject.containsKey("data") && addFilesJsonObject.getJsonObject("data").containsKey("Result")) { + + //Integer countAddFilesTotal = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, -1); + countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, -1); + myLogger.info("Files successfully added by addFiles(): " + countAddFilesSuccess); + } else { - userNotificationService.sendNotification((AuthenticatedUser) authUser, - new Timestamp(new Date().getTime()), - UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), - countSuccess + " files added out of " + countAll, true); + myLogger.warning("Malformed addFiles response json: " + addFilesJsonObject.toString()); + notificationErrorMessage = "Malformed response received when attempting to add the files to the dataset. "; } - myLogger.info("Successfully completed addFiles call "); + + myLogger.info("Completed addFiles call "); } else if (ApiConstants.STATUS_ERROR.equalsIgnoreCase(addFilesStatus)) { String addFilesMessage = addFilesJsonObject.getString("message", null); - + myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); myLogger.log(Level.SEVERE, "****** Output from addFiles: ", addFilesMessage); - // @todo send Failure notification + notificationErrorMessage += "Error response received when attempting to add the files to the dataset: " + addFilesMessage + " "; } else { myLogger.log(Level.SEVERE, "******* Error while executing addFiles ", newjsonData); - // @todo send Failure notification + notificationErrorMessage += "Unexpected error encountered when attemptingh to add the files to the dataset."; + } + + // if(!taskSkippedFiles) + if (countAddFilesSuccess == countAll) { + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), UserNotification.Type.GLOBUSUPLOADCOMPLETED, + dataset.getId(), countSuccess + " files added out of " + countAll, true); + } else if (countAddFilesSuccess > 0) { + // success, but partial: + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADCOMPLETEDWITHERRORS, dataset.getId(), + countSuccess + " files added out of " + countAll + notificationErrorMessage, true); + } else { + notificationErrorMessage = "".equals(notificationErrorMessage) + ? " No additional information is available." : notificationErrorMessage; + userNotificationService.sendNotification((AuthenticatedUser) authUser, + new Timestamp(new Date().getTime()), + UserNotification.Type.GLOBUSUPLOADLOCALFAILURE, dataset.getId(), + notificationErrorMessage, true); } + } @Asynchronous @@ -1140,7 +1158,7 @@ public void globusDownload(String jsonData, Dataset dataset, User authUser) thro logger.warning("ruleId not found for taskId: " + taskIdentifier); } task = globusStatusCheck(endpoint, taskIdentifier, globusLogger); - // @todo null check + // @todo null check? String taskStatus = GlobusUtil.getTaskStatus(task); // Transfer is done (success or failure) so delete the rule @@ -1427,13 +1445,13 @@ public List findExternalUploadsByTaskId(String tas return em.createNamedQuery("ExternalFileUploadInProgress.findByTaskId").setParameter("taskId", taskId).getResultList(); } - // @todo duplicated code, merge with the code handling the "classic" upload workflow public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); if (authUser == null) { // @todo log error message; do nothing + // (the fields in GlobusTaskInProgress are not nullable though - ?) return; } @@ -1442,6 +1460,7 @@ public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSu if (fileUploadsInProgress == null || fileUploadsInProgress.size() < 1) { // @todo log error message; do nothing + // (will this ever happen though?) return; } @@ -1455,10 +1474,9 @@ public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSu JsonArray filesJsonArray = filesJsonArrayBuilder.build(); - //processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, globusLogger, fileHandler, taskSuccess, taskStatus); - processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, null, taskSuccess, taskStatus); + processCompletedUploadTask(dataset, filesJsonArray, authUser, ruleId, taskLogger, taskSuccess, taskStatus); } else { - // @todo extend this async. framework to handle Glonus downloads as well + // @todo eventually, extend this async. framework to handle Glonus downloads as well } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index a74d0c3f747..c956831317c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -67,7 +67,6 @@ public void init() { /** * This method will be executed on a timer-like schedule, continuously * monitoring all the ongoing external Globus tasks (transfers). - * @todo make sure the executions do not overlap/stack up */ public void checkOngoingTasks() { logger.info("Performing a scheduled external Globus task check"); @@ -90,7 +89,7 @@ public void checkOngoingTasks() { if (taskLogHandler != null) { // @todo it should be prudent to cache these loggers and handlers - // between monitoring runs + // between monitoring runs (should be fairly easy to do) taskLogHandler.close(); } }); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java index 36c249de834..f81ce093815 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/MailUtil.java @@ -99,6 +99,23 @@ public static String getSubjectTextBasedOnNotification(UserNotification userNoti } catch (Exception e) { return BundleUtil.getStringFromBundle("notification.email.globus.uploadCompletedWithErrors.subject", rootDvNameAsList); } + case GLOBUSUPLOADREMOTEFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", dsNameAsList); + + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedRemotely.subject", rootDvNameAsList); + } + case GLOBUSUPLOADLOCALFAILURE: + try { + DatasetVersion version = (DatasetVersion)objectOfNotification; + List dsNameAsList = Arrays.asList(version.getDataset().getDisplayName()); + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", dsNameAsList); + } catch (Exception e) { + return BundleUtil.getStringFromBundle("notification.email.globus.uploadFailedLocally.subject", rootDvNameAsList); + } case GLOBUSDOWNLOADCOMPLETEDWITHERRORS: try { DatasetVersion version = (DatasetVersion)objectOfNotification; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 4b366522966..e0488f95e2c 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -253,11 +253,16 @@ notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId= notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
+notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.mail.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. notification.globus.upload.completed=Globus transfer to Dataset {1} was successful. File(s) have been uploaded and verified. notification.globus.download.completed=Globus transfer from the dataset {1} was successful. notification.globus.upload.completedWithErrors=Globus transfer to Dataset {1} is complete with errors. +notification.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
+ notification.globus.download.completedWithErrors=Globus transfer from the dataset {1} is complete with errors. notification.import.checksum={1}, dataset had file checksums added via a batch job. removeNotification=Remove Notification @@ -823,8 +828,8 @@ notification.email.datasetWasMentioned.subject={0}: A Dataset Relationship has b notification.email.globus.uploadCompleted.subject={0}: Files uploaded successfully via Globus and verified notification.email.globus.downloadCompleted.subject={0}: Files downloaded successfully via Globus notification.email.globus.uploadCompletedWithErrors.subject={0}: Uploaded files via Globus with errors -notification.email.globus.downloadCompletedWithErrors.subject={0}: Downloaded files via Globus with errors - +notification.email.globus.uploadFailedRemotely.subject={0}: Failed to upload files via Globus +notification.email.globus.uploadFailedLocally.subject={0}: Failed to add files uploaded via Globus to dataset # dataverse.xhtml dataverse.name=Dataverse Name dataverse.name.title=The project, department, university, professor, or journal this dataverse will contain data for. diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 9ed8b5209b6..d061348ad87 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -367,6 +367,20 @@ + + + + + + + + + + + + + + From 531e25c05df0ec507b5e3c0650033b0691c96df2 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 09:23:36 -0400 Subject: [PATCH 11/31] Config guide entry. #10623 --- doc/sphinx-guides/source/installation/config.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 0038c188ea5..01bf1419a52 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3259,6 +3259,13 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. +.. _dataverse.bagit.sourceorg.email: + +dataverse.globus.taskMonitoringServer ++++++++++++++++++++++++++++++++++++++ + +This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag. Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. + .. _feature-flags: Feature Flags @@ -3294,6 +3301,9 @@ please find all known feature flags below. Any of these flags can be activated u * - disable-return-to-author-reason - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. - ``Off`` + * - globus-use-experimental-async-framework + - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Note that the JVM option ``dataverse.globus.taskMonitoringServer`` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. + - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable From 007d7150763b55db95c35d3007900cfe46d0f50d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 09:47:41 -0400 Subject: [PATCH 12/31] Added a few more doc notes. #10623 --- doc/release-notes/10623-globus-improvements.md | 1 + doc/sphinx-guides/source/developers/big-data-support.rst | 2 ++ doc/sphinx-guides/source/developers/globus-api.rst | 2 ++ 3 files changed, 5 insertions(+) create mode 100644 doc/release-notes/10623-globus-improvements.md diff --git a/doc/release-notes/10623-globus-improvements.md b/doc/release-notes/10623-globus-improvements.md new file mode 100644 index 00000000000..03579b59631 --- /dev/null +++ b/doc/release-notes/10623-globus-improvements.md @@ -0,0 +1 @@ +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See the `globus-use-experimental-async-framework` feature flag in the Configuration guide. \ No newline at end of file diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 4aaed10512e..99ea4d2e0ba 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -187,3 +187,5 @@ As described in that document, Globus transfers can be initiated by choosing the An overview of the control and data transfer interactions between components was presented at the 2022 Dataverse Community Meeting and can be viewed in the `Integrations and Tools Session Video `_ around the 1 hr 28 min mark. See also :ref:`Globus settings <:GlobusSettings>`. + +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 902fc9db2ee..0f16dc704ef 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -185,6 +185,8 @@ As the transfer can take significant time and the API call is asynchronous, the Once the transfer completes, Dataverse will remove the write permission for the principal. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. + Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. In the remote/reference case, where there is no transfer to monitor, the standard /addFiles API call (see :ref:`direct-add-to-dataset-api`) is used instead. There are no changes for the Globus case. From 6fcb2856f15ff88b9b0ae4f82dd3caf40b5cb2c3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 10:47:55 -0400 Subject: [PATCH 13/31] typo #10623 --- src/main/webapp/dataverseuser.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index d061348ad87..bcb71733716 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -374,7 +374,7 @@ - + From f6882df1ea49acb322cd5091a6bf1f6d8e075cfd Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 11:17:14 -0400 Subject: [PATCH 14/31] cut-and-paste error #10623 --- doc/sphinx-guides/source/installation/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 01bf1419a52..731eef71c57 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3259,7 +3259,7 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. -.. _dataverse.bagit.sourceorg.email: +.. _dataverse.globus.taskMonitoringServer: dataverse.globus.taskMonitoringServer +++++++++++++++++++++++++++++++++++++ From 4ae3ee6e4c268fa5911bff8e05377d85f45e5a94 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 20 Aug 2024 11:31:10 -0400 Subject: [PATCH 15/31] (#10623) --- .../java/edu/harvard/iq/dataverse/settings/FeatureFlags.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 746e6e3b75d..c8983b80df1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -92,8 +92,7 @@ public enum FeatureFlags { */ DISABLE_RETURN_TO_AUTHOR_REASON("disable-return-to-author-reason"), /** - * TEMPORARY feature flag for the new Globus upload framework (will only be - * used for testing). + * Feature flag for the new Globus upload framework. */ GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; From 9cf4e1b90888cf0d78f3659c2098bf142b5192a0 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:26:24 -0400 Subject: [PATCH 16/31] some minor cleanup changes #10623 --- .../ExternalFileUploadInProgress.java | 4 ---- .../datasetutility/AddReplaceFileHelper.java | 4 ++-- .../impl/UpdateDatasetVersionCommand.java | 22 +------------------ .../globus/GlobusTaskInProgress.java | 4 ---- 4 files changed, 3 insertions(+), 31 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java index ab6a1798307..96bfd3f63f5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse; import jakarta.persistence.Column; diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 336fa9b5b7a..8e7c07b80bb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2139,7 +2139,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { logger.log(Level.WARNING, "Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.log(Level.INFO, "Removed EditInProgress lock "+eipLock.getId()); + logger.log(Level.FINE, "Removed EditInProgress lock"); } try { @@ -2306,7 +2306,7 @@ public Response replaceFiles(String jsonData, Dataset ds, User authUser) { logger.warning("Dataset not locked for EditInProgress "); } else { datasetService.removeDatasetLocks(dataset, DatasetLock.Reason.EditInProgress); - logger.info("Removed EditInProgress lock "); + logger.fine("Removed EditInProgress lock "); } try { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index faf8884b08d..cad61000e69 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -103,9 +103,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } Dataset theDataset = getDataset(); - //ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); - // this is an experiment (probably temporary) - checkUpdateDatasetVersionLock(ctxt); + ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); Dataset savedDataset = null; @@ -302,22 +300,4 @@ public boolean onSuccess(CommandContext ctxt, Object r) { return true; } - private void checkUpdateDatasetVersionLock(CommandContext ctxt) throws IllegalCommandException { - List locks = ctxt.datasets().getLocksByDatasetId(getDataset().getId()); - //locks.forEach(lock -> { - for (DatasetLock lock : locks) { - // Ingest lock is ok: - if (DatasetLock.Reason.Ingest != lock.getReason()) { - // with Workflow lock *some* users can edit; - // any other kind of lock - nope - if (DatasetLock.Reason.Workflow != lock.getReason() - || !ctxt.permissions().isMatchingWorkflowLock(getDataset(), - getUser().getIdentifier(), - getRequest().getWFInvocationId())) { - throw new IllegalCommandException( - BundleUtil.getStringFromBundle("dataset.message.locked.editNotAllowed"), this); - } - } - } - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index d3d06d38151..0a56b4933a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; From 45fb938acb3fcd027920d8f5308a026713dc942c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:30:49 -0400 Subject: [PATCH 17/31] cosmetic #10623 --- .../engine/command/impl/UpdateDatasetVersionCommand.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index cad61000e69..e04cae13b35 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -14,7 +14,6 @@ import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import edu.harvard.iq.dataverse.util.FileMetadataUtil; @@ -104,7 +103,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { Dataset theDataset = getDataset(); ctxt.permissions().checkUpdateDatasetVersionLock(theDataset, getRequest(), this); - Dataset savedDataset = null; DatasetVersion persistedVersion = clone; From b3f79fe3102a308df1c8354465adb1f3b9bd2244 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:40:39 -0400 Subject: [PATCH 18/31] extra L in SUCCESSFUL (#10623) --- src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java | 2 +- .../iq/dataverse/datasetutility/AddReplaceFileHelper.java | 2 +- .../java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java index a2faaf3637c..15114085c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/ApiConstants.java @@ -20,5 +20,5 @@ private ApiConstants() { // addFiles call public static final String API_ADD_FILES_COUNT_PROCESSED = "Total number of files"; - public static final String API_ADD_FILES_COUNT_SUCCESSFULL = "Number of files successfully added"; + public static final String API_ADD_FILES_COUNT_SUCCESSFUL = "Number of files successfully added"; } diff --git a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java index 8e7c07b80bb..a470f08f736 100644 --- a/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/datasetutility/AddReplaceFileHelper.java @@ -2168,7 +2168,7 @@ public Response addFiles(String jsonData, Dataset dataset, User authUser) { JsonObjectBuilder result = Json.createObjectBuilder() .add(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, totalNumberofFiles) - .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, successNumberofFiles); + .add(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, successNumberofFiles); return Response.ok().entity(Json.createObjectBuilder() diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 4bb478d26ed..ae84cad545c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -1052,7 +1052,7 @@ private void processUploadedFiles(JsonArray filesJsonArray, Dataset dataset, Aut if (addFilesJsonObject.containsKey("data") && addFilesJsonObject.getJsonObject("data").containsKey("Result")) { //Integer countAddFilesTotal = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_PROCESSED, -1); - countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFULL, -1); + countAddFilesSuccess = addFilesJsonObject.getJsonObject("data").getJsonObject("Result").getInt(ApiConstants.API_ADD_FILES_COUNT_SUCCESSFUL, -1); myLogger.info("Files successfully added by addFiles(): " + countAddFilesSuccess); } else { From 1acae684c31542591ba9e407e8c50cb80de1993a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 18:58:08 -0400 Subject: [PATCH 19/31] better Globus service availability checks #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 22 ++++++++++++++----- src/main/java/propertyFiles/Bundle.properties | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4c547f5295f..aa6989d365a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3908,7 +3908,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP if (!systemConfig.isGlobusUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, - BundleUtil.getStringFromBundle("datasets.api.globusdownloaddisabled")); + BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); } // ------------------------------------- @@ -4008,11 +4008,6 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.info(" ==== (api addGlobusFilesToDataset) jsonData ====== " + jsonData); - if (!systemConfig.isHTTPUpload()) { - // @todo why isHTTPUpload()? - shouldn't it be checking isGlobusUpload() here? - return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.httpDisabled")); - } - // ------------------------------------- // (1) Get the user from the API key // ------------------------------------- @@ -4035,6 +4030,21 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return wr.getResponse(); } + // Is Globus upload service available? + + // ... on this Dataverse instance? + if (!systemConfig.isGlobusUpload()) { + return error(Response.Status.SERVICE_UNAVAILABLE, BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); + } + + // ... and on this specific Dataset? + String storeId = dataset.getEffectiveStorageDriverId(); + // acceptsGlobusTransfers should only be true for an S3 or globus store + if (!GlobusAccessibleStore.acceptsGlobusTransfers(storeId) + && !GlobusAccessibleStore.allowsGlobusReferences(storeId)) { + return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + } + // @todo check if the dataset is already locked! JsonObject jsonObject = null; diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index e0488f95e2c..25ae8ad00c0 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1765,6 +1765,7 @@ file.fromWebloaderAfterCreate.tip=An option to upload a folder of files will be file.fromWebloader=Upload a Folder file.api.httpDisabled=File upload via HTTP is not available for this installation of Dataverse. +file.api.globusUploadDisabled=File upload via Globus is not available for this installation of Dataverse. file.api.alreadyHasPackageFile=File upload via HTTP disabled since this dataset already contains a package file. file.replace.original=Original File file.editFiles=Edit Files From 5ba28883e2cdce99da768ebaa07ddd055f1b1229 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:03:18 -0400 Subject: [PATCH 20/31] better Globus service availability checks #10623 --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index aa6989d365a..ac5b9147dff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -3908,7 +3908,7 @@ public Response requestGlobusUpload(@Context ContainerRequestContext crc, @PathP if (!systemConfig.isGlobusUpload()) { return error(Response.Status.SERVICE_UNAVAILABLE, - BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); + BundleUtil.getStringFromBundle("file.api.globusUploadDisabled")); } // ------------------------------------- From 2512eab74e82284a4eae6fe29c172a869dbb64a8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:12:43 -0400 Subject: [PATCH 21/31] removed an unnecessary @todo (#10623) --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index ae84cad545c..6d3138856f2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -687,19 +687,16 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S String logFileName = System.getProperty("com.sun.aas.instanceRoot") + File.separator + "logs" + File.separator + "globusUpload_" + dataset.getId() + "_" + logTimestamp + ".log"; FileHandler fileHandler; - boolean fileHandlerSuceeded = false; + try { fileHandler = new FileHandler(logFileName); globusLogger.setUseParentHandlers(false); - fileHandlerSuceeded = true; } catch (IOException | SecurityException ex) { Logger.getLogger(DatasetServiceBean.class.getName()).log(Level.SEVERE, null, ex); - //return; // @todo I don't think we need to return here? fileHandler = null; - } - if (fileHandlerSuceeded) { + if (fileHandler != null) { globusLogger.addHandler(fileHandler); } else { globusLogger = logger; From 6b06d9472326b28c3d47e549e14e5ee2af7e6bd3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 21 Aug 2024 19:20:22 -0400 Subject: [PATCH 22/31] cosmetic #10623 --- .../edu/harvard/iq/dataverse/globus/GlobusServiceBean.java | 2 -- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 4 ---- 2 files changed, 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 6d3138856f2..133da0a1cab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -810,8 +810,6 @@ private void processCompletedUploadTask(Dataset dataset, // Keeping a lock through the add datafiles API call avoids a conflicting edit // and keeps any open dataset page refreshing until the datafile appears. - // @todo is it necessary? what is wrong exactly with keeping the Globus - // lock on for the duration of the process? if (taskSuccess) { myLogger.info("Finished upload via Globus job."); diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index c956831317c..5dd4ce312b6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -1,7 +1,3 @@ -/* - * Click nbfs://nbhost/SystemFileSystem/Templates/Licenses/license-default.txt to change this license - * Click nbfs://nbhost/SystemFileSystem/Templates/Classes/Class.java to edit this template - */ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; From 6d06927169f0c7780ba02791ce3b77d90b9121b3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 22 Aug 2024 10:54:06 -0400 Subject: [PATCH 23/31] more changes per feedback. (saving the api token in the GlobusTask entity was a BAD idea!) #10623 --- .../harvard/iq/dataverse/api/Datasets.java | 6 +--- .../dataverse/globus/GlobusServiceBean.java | 9 +++--- .../globus/GlobusTaskInProgress.java | 29 +++++++++++-------- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index ac5b9147dff..960cc408ee5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4077,19 +4077,15 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, logger.log(Level.WARNING, "Failed to lock the dataset (dataset id={0})", dataset.getId()); } - - ApiToken token = authSvc.findApiTokenByUser(authUser); - if(uriInfo != null) { logger.info(" ==== (api uriInfo.getRequestUri()) jsonData ====== " + uriInfo.getRequestUri().toString()); } - String requestUrl = SystemConfig.getDataverseSiteUrlStatic(); // Async Call try { - globusService.globusUpload(jsonObject, token, dataset, requestUrl, authUser); + globusService.globusUpload(jsonObject, dataset, requestUrl, authUser); } catch (IllegalArgumentException ex) { return badRequest("Invalid parameters: "+ex.getMessage()); } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java index 133da0a1cab..ac3c81622fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusServiceBean.java @@ -667,7 +667,7 @@ private String getGlobusDownloadScript(Dataset dataset, ApiToken apiToken, List< @Asynchronous @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, String httpRequestUrl, + public void globusUpload(JsonObject jsonData, Dataset dataset, String httpRequestUrl, AuthenticatedUser authUser) throws IllegalArgumentException, ExecutionException, InterruptedException, MalformedURLException { // Before we do anything else, let's do some basic validation of what @@ -728,7 +728,7 @@ public void globusUpload(JsonObject jsonData, ApiToken token, Dataset dataset, S // Save the task information in the database so that the Globus monitoring // service can continue checking on its progress. - GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), token.getTokenString(), ruleId, new Timestamp(startDate.getTime())); + GlobusTaskInProgress taskInProgress = new GlobusTaskInProgress(taskIdentifier, GlobusTaskInProgress.TaskType.UPLOAD, dataset, endpoint.getClientToken(), authUser, ruleId, new Timestamp(startDate.getTime())); em.persist(taskInProgress); // Save the metadata entries that define the files that are being uploaded @@ -1443,10 +1443,9 @@ public List findExternalUploadsByTaskId(String tas public void processCompletedTask(GlobusTaskInProgress globusTask, boolean taskSuccess, String taskStatus, Logger taskLogger) { String ruleId = globusTask.getRuleId(); Dataset dataset = globusTask.getDataset(); - AuthenticatedUser authUser = authSvc.lookupUser(globusTask.getApiToken()); + AuthenticatedUser authUser = globusTask.getLocalUser(); if (authUser == null) { - // @todo log error message; do nothing - // (the fields in GlobusTaskInProgress are not nullable though - ?) + // @todo log error message; do nothing return; } diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java index 0a56b4933a1..8644bca6143 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/GlobusTaskInProgress.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.globus; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import jakarta.persistence.Column; import jakarta.persistence.EnumType; import jakarta.persistence.Enumerated; @@ -14,6 +15,7 @@ import jakarta.persistence.GeneratedValue; import jakarta.persistence.GenerationType; import jakarta.persistence.Id; +import jakarta.persistence.JoinColumn; /** * @@ -67,24 +69,27 @@ public String toString() { } } - @Column + @Column(nullable=false) @Enumerated(EnumType.STRING) private TaskType taskType; /** * Globus API token that should be used to monitor the status of the task */ - @Column + @Column(nullable=false) private String globusToken; /** - * This is the Dataverse API token of the user who initiated the Globus task - */ - private String apiToken; + * This is the the user who initiated the Globus task + */ + @ManyToOne + @JoinColumn + private AuthenticatedUser user; - @Column + @Column(nullable=false) private String ruleId; + @JoinColumn(nullable = false) @ManyToOne private Dataset dataset; @@ -94,12 +99,12 @@ public String toString() { public GlobusTaskInProgress() { } - GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, String apiToken, String ruleId, Timestamp startTime) { + GlobusTaskInProgress(String taskId, TaskType taskType, Dataset dataset, String globusToken, AuthenticatedUser authUser, String ruleId, Timestamp startTime) { this.taskId = taskId; this.taskType = taskType; this.dataset = dataset; this.globusToken = globusToken; - this.apiToken = apiToken; + this.user = authUser; this.ruleId = ruleId; this.startTime = startTime; } @@ -138,12 +143,12 @@ public void setGlobusToken(String clientToken) { this.globusToken = clientToken; } - public String getApiToken() { - return apiToken; + public AuthenticatedUser getLocalUser() { + return user; } - public void setApiToken(String apiToken) { - this.apiToken = apiToken; + public void setLocalUser(AuthenticatedUser authUser) { + this.user = authUser; } public String getRuleId() { From 69cfe2909deb099a201442d36101d6686204ad70 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 12:40:58 -0400 Subject: [PATCH 24/31] changed the polling interval default in the new TaskMonitoringServiceBean #10623 --- doc/sphinx-guides/source/installation/config.rst | 2 +- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 731eef71c57..fe438504f06 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -4784,7 +4784,7 @@ The URL where the `dataverse-globus tasks = globusService.findAllOngoingTasks(); tasks.forEach(t -> { From d223a8f69166778b5015d7d9ad4f8e733ce22394 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 13:31:02 -0400 Subject: [PATCH 25/31] more changes/refinements per review feedback (#10623) --- .../iq/dataverse/globus/TaskMonitoringServiceBean.java | 3 ++- .../harvard/iq/dataverse/ingest/IngestServiceBean.java | 4 ++-- .../edu/harvard/iq/dataverse/settings/JvmSettings.java | 1 + .../java/edu/harvard/iq/dataverse/util/SystemConfig.java | 9 --------- src/main/java/propertyFiles/Bundle.properties | 2 +- 5 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java index 341714539cf..fdb2b222804 100644 --- a/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/globus/TaskMonitoringServiceBean.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.globus; +import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; import jakarta.annotation.PostConstruct; @@ -48,7 +49,7 @@ public class TaskMonitoringServiceBean { @PostConstruct public void init() { - if (systemConfig.isGlobusTaskMonitoringServer()) { + if (JvmSettings.GLOBUS_TASK_MONITORING_SERVER.lookupOptional(Boolean.class).orElse(false)) { logger.info("Starting Globus task monitoring service"); int pollingInterval = SystemConfig.getIntLimitFromStringOrDefault( settingsSvc.getValueForKey(SettingsServiceBean.Key.GlobusPollingInterval), 600); diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index 3f76a319902..b42fd950528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -345,8 +345,8 @@ public List saveAndAddFilesToDataset(DatasetVersion version, StorageIO dataAccess = DataAccess.getStorageIO(dataFile); //Populate metadata dataAccess.open(DataAccessOption.READ_ACCESS); - // (this will make a remote call to check if the file exists - // and obtain its size) + // (the .open() above makes a remote call to check if + // the file exists and obtains its size) confirmedFileSize = dataAccess.getSize(); // For directly-uploaded files, we will perform the file size diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java index 9d13be005c9..262d488acab 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/JvmSettings.java @@ -51,6 +51,7 @@ public enum JvmSettings { DOCROOT_DIRECTORY(SCOPE_FILES, "docroot"), GUESTBOOK_AT_REQUEST(SCOPE_FILES, "guestbook-at-request"), GLOBUS_CACHE_MAXAGE(SCOPE_FILES, "globus-cache-maxage"), + GLOBUS_TASK_MONITORING_SERVER(SCOPE_FILES, "globus-monitoring-server"), //STORAGE DRIVER SETTINGS SCOPE_DRIVER(SCOPE_FILES), diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 7417a5db4d4..c664d7d6730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -82,7 +82,6 @@ public class SystemConfig { private String buildNumber = null; private static final String JVM_TIMER_SERVER_OPTION = "dataverse.timerServer"; - private static final String JVM_GLOBUS_TASK_MONITORING_OPTION = "dataverse.globus.taskMonitoringServer"; private static final long DEFAULT_GUESTBOOK_RESPONSES_DISPLAY_LIMIT = 5000L; private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_IMAGE = 3000000L; // 3 MB @@ -547,14 +546,6 @@ public boolean isTimerServer() { return false; } - public boolean isGlobusTaskMonitoringServer() { - String optionValue = System.getProperty(JVM_GLOBUS_TASK_MONITORING_OPTION); - if ("true".equalsIgnoreCase(optionValue)) { - return true; - } - return false; - } - public String getFooterCopyrightAndYear() { return BundleUtil.getStringFromBundle("footer.copyright", Arrays.asList(Year.now().getValue() + "")); } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 25ae8ad00c0..d7e30c5e0e3 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -253,7 +253,7 @@ notification.mail.import.filesystem=Dataset {2} ({0}/dataset.xhtml?persistentId= notification.mail.globus.upload.completed=Globus transfer to Dataset {2} was successful. File(s) have been uploaded and verified.

{3}
notification.mail.globus.download.completed=Globus transfer of file(s) from the dataset {2} was successful.

{3}
notification.mail.globus.upload.completedWithErrors=Globus transfer to Dataset {2} is complete with errors.

{3}
-notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus collections for Dataset {2} failed, reported via Globus API.

{3}
+notification.mail.globus.upload.failedRemotely=Remote data transfer between Globus endpoints for Dataset {2} failed, as reported via Globus API.

{3}
notification.mail.globus.upload.failedLocally=Dataverse received a confirmation of a successful Globus data transfer for Dataset {2}, but failed to add the files to the dataset locally.

{3}
notification.mail.globus.download.completedWithErrors=Globus transfer from the dataset {2} is complete with errors.

{3}
notification.import.filesystem=Dataset {1} has been successfully uploaded and verified. From 0ca5e621002711befaba23ba21208e2c19f08a92 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 14:32:19 -0400 Subject: [PATCH 26/31] added an upfront locks check to the /addGlobusFiles api #10623 --- .../java/edu/harvard/iq/dataverse/api/Datasets.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 960cc408ee5..47505ef3879 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -99,6 +99,7 @@ import java.util.stream.Collectors; import static edu.harvard.iq.dataverse.api.ApiConstants.*; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.BAD_REQUEST; @@ -4045,7 +4046,16 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, return badRequest(BundleUtil.getStringFromBundle("datasets.api.globusuploaddisabled")); } - // @todo check if the dataset is already locked! + // Check if the dataset is already locked + // We are reusing the code and logic used by various command to determine + // if there are any locks on the dataset that would prevent the current + // users from modifying it: + try { + DataverseRequest dataverseRequest = createDataverseRequest(authUser); + permissionService.checkEditDatasetLock(dataset, dataverseRequest, new UpdateDatasetVersionCommand(dataset, dataverseRequest)); + } catch (IllegalCommandException icex) { + return error(Response.Status.FORBIDDEN, "Dataset " + datasetId + " is locked: " + icex.getLocalizedMessage()); + } JsonObject jsonObject = null; try { From 23d0f6c8db678c2f87e7fc2ff4bffabbc3b44d94 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 23 Aug 2024 14:44:26 -0400 Subject: [PATCH 27/31] added an upfront locks check to the /addGlobusFiles api #10623 --- src/main/java/edu/harvard/iq/dataverse/api/Datasets.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 47505ef3879..d8e010940cf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -4052,7 +4052,7 @@ public Response addGlobusFilesToDataset(@Context ContainerRequestContext crc, // users from modifying it: try { DataverseRequest dataverseRequest = createDataverseRequest(authUser); - permissionService.checkEditDatasetLock(dataset, dataverseRequest, new UpdateDatasetVersionCommand(dataset, dataverseRequest)); + permissionService.checkEditDatasetLock(dataset, dataverseRequest, null); } catch (IllegalCommandException icex) { return error(Response.Status.FORBIDDEN, "Dataset " + datasetId + " is locked: " + icex.getLocalizedMessage()); } From 7b6f81eb4992c075be22e2debea3ec1b24c9551f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 23 Sep 2024 11:21:44 -0400 Subject: [PATCH 28/31] remove tabs to make reviewdog happy. woof! #10623 --- .../iq/dataverse/ExternalFileUploadInProgress.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java index 96bfd3f63f5..c90fdc6edc2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalFileUploadInProgress.java @@ -23,10 +23,10 @@ * externally. (?) */ @NamedQueries({ - @NamedQuery( name="ExternalFileUploadInProgress.deleteByTaskId", - query="DELETE FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId"), - @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", - query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) + @NamedQuery(name = "ExternalFileUploadInProgress.deleteByTaskId", + query = "DELETE FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId"), + @NamedQuery(name = "ExternalFileUploadInProgress.findByTaskId", + query = "SELECT f FROM ExternalFileUploadInProgress f WHERE f.taskId=:taskId")}) @Entity @Table(indexes = {@Index(columnList="taskid")}) public class ExternalFileUploadInProgress implements Serializable { From 2baf62e7cac957979f5d16637acd1b4c39e3713f Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 23 Sep 2024 11:39:54 -0400 Subject: [PATCH 29/31] globus doc tweaks #10623 --- doc/release-notes/10623-globus-improvements.md | 2 +- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- doc/sphinx-guides/source/installation/config.rst | 6 ++++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/release-notes/10623-globus-improvements.md b/doc/release-notes/10623-globus-improvements.md index 03579b59631..c6b9febbb02 100644 --- a/doc/release-notes/10623-globus-improvements.md +++ b/doc/release-notes/10623-globus-improvements.md @@ -1 +1 @@ -A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See the `globus-use-experimental-async-framework` feature flag in the Configuration guide. \ No newline at end of file +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See the `globus-use-experimental-async-framework` under [Feature Flags](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#feature-flags) in the Installation Guide. diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 99ea4d2e0ba..02dd31b8472 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -188,4 +188,4 @@ An overview of the control and data transfer interactions between components was See also :ref:`Globus settings <:GlobusSettings>`. -An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`) and the JVM option :ref:`dataverse.globus.taskMonitoringServer`. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 0f16dc704ef..26620fd6a92 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -185,7 +185,7 @@ As the transfer can take significant time and the API call is asynchronous, the Once the transfer completes, Dataverse will remove the write permission for the principal. -An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`) and the JVM option :ref:`dataverse.globus.taskMonitoringServer`. Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 23168e12f30..bfbf9190357 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3271,7 +3271,7 @@ Can also be set via *MicroProfile Config API* sources, e.g. the environment vari dataverse.globus.taskMonitoringServer +++++++++++++++++++++++++++++++++++++ -This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag. Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. +This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag (see :ref:`feature-flags`). Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. .. _feature-flags: @@ -3309,7 +3309,7 @@ please find all known feature flags below. Any of these flags can be activated u - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. - ``Off`` * - globus-use-experimental-async-framework - - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Note that the JVM option ``dataverse.globus.taskMonitoringServer`` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. + - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Affects :ref:`:GlobusPollingInterval`. Note that the JVM option :ref:`dataverse.globus.taskMonitoringServer` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable @@ -4789,6 +4789,8 @@ The list of parent dataset field names for which the LDN Announce workflow step The URL where the `dataverse-globus `_ "transfer" app has been deployed to support Globus integration. See :ref:`globus-support` for details. +.. _:GlobusPollingInterval: + :GlobusPollingInterval ++++++++++++++++++++++ From d610094c7fc557b06fad2115f44291057d72cc3d Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 25 Sep 2024 10:20:17 -0400 Subject: [PATCH 30/31] Updated the docs to reflect the new name of a JVM option (#10623) --- doc/sphinx-guides/source/developers/big-data-support.rst | 2 +- doc/sphinx-guides/source/developers/globus-api.rst | 2 +- doc/sphinx-guides/source/installation/config.rst | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/sphinx-guides/source/developers/big-data-support.rst b/doc/sphinx-guides/source/developers/big-data-support.rst index 99ea4d2e0ba..dc4006b7d89 100644 --- a/doc/sphinx-guides/source/developers/big-data-support.rst +++ b/doc/sphinx-guides/source/developers/big-data-support.rst @@ -188,4 +188,4 @@ An overview of the control and data transfer interactions between components was See also :ref:`Globus settings <:GlobusSettings>`. -An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.files.globus-monitoring-server`` described in the Configuration guide. diff --git a/doc/sphinx-guides/source/developers/globus-api.rst b/doc/sphinx-guides/source/developers/globus-api.rst index 0f16dc704ef..a1bde106f92 100644 --- a/doc/sphinx-guides/source/developers/globus-api.rst +++ b/doc/sphinx-guides/source/developers/globus-api.rst @@ -185,7 +185,7 @@ As the transfer can take significant time and the API call is asynchronous, the Once the transfer completes, Dataverse will remove the write permission for the principal. -An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.globus.taskMonitoringServer`` described in the Configuration guide. +An alternative, experimental implementation of Globus polling of ongoing upload transfers has been added in v6.4. This new framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. Due to its experimental nature it is not enabled by default. See the ``globus-use-experimental-async-framework`` feature flag and the JVM option ``dataverse.files.globus-monitoring-server`` described in the Configuration guide. Note that when using a managed endpoint that uses the Globus S3 Connector, the checksum should be correct as Dataverse can validate it. For file-based endpoints, the checksum should be included if available but Dataverse cannot verify it. diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index 23168e12f30..bf64793e385 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -3266,10 +3266,10 @@ The email for your institution that you'd like to appear in bag-info.txt. See :r Can also be set via *MicroProfile Config API* sources, e.g. the environment variable ``DATAVERSE_BAGIT_SOURCEORG_EMAIL``. -.. _dataverse.globus.taskMonitoringServer: +.. _dataverse.files.globus-monitoring-server: -dataverse.globus.taskMonitoringServer -+++++++++++++++++++++++++++++++++++++ +dataverse.files.globus-monitoring-server +++++++++++++++++++++++++++++++++++++++++ This setting is required in conjunction with the ``globus-use-experimental-async-framework`` feature flag. Setting it to true designates the Dataverse instance to serve as the dedicated polling server. It is needed so that the new framework can be used in a multi-node installation. @@ -3309,7 +3309,7 @@ please find all known feature flags below. Any of these flags can be activated u - Removes the reason field in the `Publish/Return To Author` dialog that was added as a required field in v6.2 and makes the reason an optional parameter in the :ref:`return-a-dataset` API call. - ``Off`` * - globus-use-experimental-async-framework - - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Note that the JVM option ``dataverse.globus.taskMonitoringServer`` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. + - Activates a new experimental implementation of Globus polling of ongoing remote data transfers that does not rely on the instance staying up continuously for the duration of the transfers and saves the state information about Globus upload requests in the database. Added in v6.4. Note that the JVM option ``dataverse.files.globus-monitoring-server`` described above must also be enabled on one (and only one, in a multi-node installation) Dataverse instance. - ``Off`` **Note:** Feature flags can be set via any `supported MicroProfile Config API source`_, e.g. the environment variable From 682c89fc069f71c5cd6477bdbffc58107fb01720 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Wed, 25 Sep 2024 11:43:33 -0400 Subject: [PATCH 31/31] improve release note #10623 --- doc/release-notes/10623-globus-improvements.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/10623-globus-improvements.md b/doc/release-notes/10623-globus-improvements.md index c6b9febbb02..9eb529bc4f7 100644 --- a/doc/release-notes/10623-globus-improvements.md +++ b/doc/release-notes/10623-globus-improvements.md @@ -1 +1 @@ -A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See the `globus-use-experimental-async-framework` under [Feature Flags](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#feature-flags) in the Installation Guide. +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See `globus-use-experimental-async-framework` under [Feature Flags](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#feature-flags) and [dataverse.files.globus-monitoring-server](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#dataverse-files-globus-monitoring-server) in the Installation Guide. See also #10623 and #10781.