Skip to content

Commit

Permalink
Merge pull request #6341 from mderuijter/5954-multipart-upload-support
Browse files Browse the repository at this point in the history
Fixed #5954 Uploading large files with S3 - Multipart Upload
  • Loading branch information
kcondon authored Nov 6, 2019
2 parents b46243d + c3fabf8 commit 99fd872
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
2 changes: 2 additions & 0 deletions doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,8 @@ For institutions and organizations looking to use some kind of S3-based object s
this is entirely possible. You can either use Amazon Web Services or use some other, even on-site S3-compatible
storage (like Minio, Ceph RADOS S3 Gateway and many more).

The Dataverse S3 driver supports multipart upload for files over 4 GB.

**Note:** The Dataverse Team is most familiar with AWS S3, and can provide support on its usage with Dataverse. Thanks to community contributions, the application's architecture also allows non-AWS S3 providers. The Dataverse Team can provide very limited support on these other providers. We recommend reaching out to the wider Dataverse community if you have questions.

First: Set Up Accounts and Access Credentials
Expand Down
16 changes: 12 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import com.amazonaws.services.s3.model.ResponseHeaderOverrides;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.Dataverse;
Expand Down Expand Up @@ -91,6 +93,11 @@ public S3AccessIO(T dvObject, DataAccessRequest req) {

// let's build the client :-)
this.s3 = s3CB.build();

// building a TransferManager instance to support multipart uploading for files over 4gb.
this.tm = TransferManagerBuilder.standard()
.withS3Client(this.s3)
.build();
} catch (Exception e) {
throw new AmazonClientException(
"Cannot instantiate a S3 client; check your AWS credentials and region",
Expand All @@ -115,6 +122,7 @@ public S3AccessIO(T dvObject, DataAccessRequest req, @NotNull AmazonS3 s3client)
public static String S3_IDENTIFIER_PREFIX = "s3";

private AmazonS3 s3 = null;
private TransferManager tm = null;
/**
* Pass in a URL pointing to your S3 compatible storage.
* For possible values see https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/client/builder/AwsClientBuilder.EndpointConfiguration.html
Expand Down Expand Up @@ -277,14 +285,13 @@ public void savePath(Path fileSystemPath) throws IOException {
try {
File inputFile = fileSystemPath.toFile();
if (dvObject instanceof DataFile) {
s3.putObject(new PutObjectRequest(bucketName, key, inputFile));

tm.upload(new PutObjectRequest(bucketName, key, inputFile)).waitForCompletion();
newFileSize = inputFile.length();
} else {
throw new IOException("DvObject type other than datafile is not yet supported");
}

} catch (SdkClientException ioex) {
} catch (SdkClientException | InterruptedException ioex ) {
String failureMsg = ioex.getMessage();
if (failureMsg == null) {
failureMsg = "S3AccessIO: Unknown exception occured while uploading a local file into S3Object "+key;
Expand All @@ -293,6 +300,7 @@ public void savePath(Path fileSystemPath) throws IOException {
throw new IOException(failureMsg);
}


// if it has uploaded successfully, we can reset the size
// of the object:
setSize(newFileSize);
Expand All @@ -314,7 +322,7 @@ public void savePath(Path fileSystemPath) throws IOException {
* Swift driver.
*
* @param inputStream InputStream we want to save
* @param auxItemTag String representing this Auxiliary type ("extension")
* @param filesize Long representing the filesize
* @throws IOException if anything goes wrong.
*/
@Override
Expand Down

0 comments on commit 99fd872

Please sign in to comment.