From 839237646c9846bf2a665d0cb4d18bf4b558f3a7 Mon Sep 17 00:00:00 2001 From: mderuijter Date: Mon, 4 Nov 2019 13:05:15 +0100 Subject: [PATCH 1/2] Fixed #5954 Uploading large files with S3 --- .../iq/dataverse/dataaccess/S3AccessIO.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 5cbeaf68603..d2429f6ebe7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -19,6 +19,8 @@ import com.amazonaws.services.s3.model.ResponseHeaderOverrides; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectSummary; +import com.amazonaws.services.s3.transfer.TransferManager; +import com.amazonaws.services.s3.transfer.TransferManagerBuilder; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.Dataverse; @@ -91,6 +93,11 @@ public S3AccessIO(T dvObject, DataAccessRequest req) { // let's build the client :-) this.s3 = s3CB.build(); + + // building a TransferManager instance to support multipart uploading for files over 4gb. + this.tm = TransferManagerBuilder.standard() + .withS3Client(this.s3) + .build(); } catch (Exception e) { throw new AmazonClientException( "Cannot instantiate a S3 client; check your AWS credentials and region", @@ -115,6 +122,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client) public static String S3_IDENTIFIER_PREFIX = "s3"; private AmazonS3 s3 = null; + private TransferManager tm = null; /** * Pass in a URL pointing to your S3 compatible storage. * For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html @@ -277,14 +285,13 @@ public void savePath(Path fileSystemPath) throws IOException { try { File inputFile = fileSystemPath.toFile(); if (dvObject instanceof DataFile) { - s3.putObject(new PutObjectRequest(bucketName, key, inputFile)); - + tm.upload(new PutObjectRequest(bucketName, key, inputFile)).waitForCompletion(); newFileSize = inputFile.length(); } else { throw new IOException("DvObject type other than datafile is not yet supported"); } - } catch (SdkClientException ioex) { + } catch (SdkClientException | InterruptedException ioex ) { String failureMsg = ioex.getMessage(); if (failureMsg == null) { failureMsg = "S3AccessIO: Unknown exception occured while uploading a local file into S3Object "+key; @@ -293,6 +300,7 @@ public void savePath(Path fileSystemPath) throws IOException { throw new IOException(failureMsg); } + // if it has uploaded successfully, we can reset the size // of the object: setSize(newFileSize); @@ -314,7 +322,7 @@ public void savePath(Path fileSystemPath) throws IOException { * Swift driver. * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param filesize Long representing the filesize * @throws IOException if anything goes wrong. */ @Override From d5ee4fda32986895c2a0de359b7b686476951a66 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 5 Nov 2019 10:13:25 -0500 Subject: [PATCH 2/2] add note: S3 multipart upload support for files over 4GB #5954 --- doc/sphinx-guides/source/installation/config.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f114a7403b7..fed2d180bd7 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -303,6 +303,8 @@ For institutions and organizations looking to use some kind of S3-based object s this is entirely possible. You can either use Amazon Web Services or use some other, even on-site S3-compatible storage (like Minio, Ceph RADOS S3 Gateway and many more). +The Dataverse S3 driver supports multipart upload for files over 4 GB. + **Note:** The Dataverse Team is most familiar with AWS S3, and can provide support on its usage with Dataverse. Thanks to community contributions, the application's architecture also allows non-AWS S3 providers. The Dataverse Team can provide very limited support on these other providers. We recommend reaching out to the wider Dataverse community if you have questions. First: Set Up Accounts and Access Credentials