add docs and tests for format=original #4529

IQSS · Jul 15, 2020 · 44cc815 · 44cc815
1 parent abb72e7
commit 44cc815
Show file tree

Hide file tree

Showing 3 changed files with 81 additions and 3 deletions.
diff --git a/doc/sphinx-guides/source/api/dataaccess.rst b/doc/sphinx-guides/source/api/dataaccess.rst
@@ -14,6 +14,8 @@ Downloading All Files in a Dataset
 
 The "download all" API downloads as many files as possible from a dataset as a zipped bundle.
 
+By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter.
+
 There are a number of reasons why not all of the files can be downloaded:
 
 - Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files).

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java
@@ -18,6 +18,7 @@
 import static javax.ws.rs.core.Response.Status.OK;
 import static org.hamcrest.CoreMatchers.equalTo;
 import org.junit.Assert;
+import static org.junit.Assert.assertTrue;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -277,6 +278,61 @@ public void downloadAllFilesRestricted() throws IOException {
 
     }
 
+    /**
+     * This test is focused on downloading all files when tabular files are
+     * present (original vs archival).
+     */
+    @Test
+    public void downloadAllFilesTabular() throws IOException {
+
+        Response createUser = UtilIT.createRandomUser();
+        createUser.prettyPrint();
+        createUser.then().assertThat()
+                .statusCode(OK.getStatusCode());
+        String username = UtilIT.getUsernameFromResponse(createUser);
+        String apiToken = UtilIT.getApiTokenFromResponse(createUser);
+
+        Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
+        createDataverseResponse.prettyPrint();
+        createDataverseResponse.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);
+
+        Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
+        createDataset.prettyPrint();
+        createDataset.then().assertThat()
+                .statusCode(CREATED.getStatusCode());
+
+        Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
+        String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);
+
+        String pathToFile = "scripts/search/data/tabular/50by1000.dta";
+
+        Response uploadTabular = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
+        uploadTabular.prettyPrint();
+        uploadTabular.then().assertThat()
+                .statusCode(OK.getStatusCode())
+                .body("data.files[0].label", equalTo("50by1000.dta"));
+
+        assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));
+
+        Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken);
+        downloadFiles1.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        // By default we get the archival version (.tab).
+        Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.tab", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream()));
+
+        String format = "original";
+        Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, format, apiToken);
+        downloadFiles2.then().assertThat()
+                .statusCode(OK.getStatusCode());
+
+        // By passing format=original we get the original version, Stata (.dta) in this case.
+        Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.dta", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream()));
+    }
+
     private HashSet<String> gatherFilenames(InputStream inputStream) throws IOException {
         HashSet<String> filenamesFound = new HashSet<>();
         try (ZipInputStream zipStream = new ZipInputStream(inputStream)) {

diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
@@ -752,10 +752,22 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) {
 
     static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) {
         String datasetVersion = null;
-        return downloadFiles(datasetIdOrPersistentId, datasetVersion, apiToken);
+        String format = null;
+        return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken);
     }
 
-    static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) {
+    /**
+     * @param format can be "original" for tabular files.
+     */
+    static Response downloadFiles(String datasetIdOrPersistentId, String format, String apiToken) {
+        String datasetVersion = null;
+        return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken);
+    }
+
+    /**
+     * @param format can be "original" for tabular files.
+     */
+    static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String format, String apiToken) {
         String idInPath = datasetIdOrPersistentId; // Assume it's a number.
         String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
         if (!NumberUtils.isNumber(datasetIdOrPersistentId)) {
@@ -771,7 +783,15 @@ static Response downloadFiles(String datasetIdOrPersistentId, String datasetVers
         if (datasetVersion != null) {
             optionalVersion = "/versions/" + datasetVersion;
         }
-        return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam);
+        String optionalFormat = "";
+        if (format != null) {
+            if (!"".equals(optionalQueryParam)) {
+                optionalFormat = "&format=" + format;
+            } else {
+                optionalFormat = "?format=" + format;
+            }
+        }
+        return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat);
     }
 
     static Response subset(String fileId, String variables, String apiToken) {