Skip to content

Commit

Permalink
add docs and tests for format=original #4529
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Jul 15, 2020
1 parent abb72e7 commit 44cc815
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 3 deletions.
2 changes: 2 additions & 0 deletions doc/sphinx-guides/source/api/dataaccess.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Downloading All Files in a Dataset

The "download all" API downloads as many files as possible from a dataset as a zipped bundle.

By default, tabular files are downloaded in their "archival" form (tab-separated values). To download the original files (Stata, for example), add ``format=original`` as a query parameter.

There are a number of reasons why not all of the files can be downloaded:

- Some of the files are restricted and your API token doesn't have access (you will still get the unrestricted files).
Expand Down
56 changes: 56 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/DownloadFilesIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import static javax.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import org.junit.Assert;
import static org.junit.Assert.assertTrue;
import org.junit.BeforeClass;
import org.junit.Test;

Expand Down Expand Up @@ -277,6 +278,61 @@ public void downloadAllFilesRestricted() throws IOException {

}

/**
* This test is focused on downloading all files when tabular files are
* present (original vs archival).
*/
@Test
public void downloadAllFilesTabular() throws IOException {

Response createUser = UtilIT.createRandomUser();
createUser.prettyPrint();
createUser.then().assertThat()
.statusCode(OK.getStatusCode());
String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken);
createDataverseResponse.prettyPrint();
createDataverseResponse.then().assertThat()
.statusCode(CREATED.getStatusCode());

String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse);

Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDataset.prettyPrint();
createDataset.then().assertThat()
.statusCode(CREATED.getStatusCode());

Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
String datasetPid = UtilIT.getDatasetPersistentIdFromResponse(createDataset);

String pathToFile = "scripts/search/data/tabular/50by1000.dta";

Response uploadTabular = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);
uploadTabular.prettyPrint();
uploadTabular.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.files[0].label", equalTo("50by1000.dta"));

assertTrue("Failed test if Ingest Lock exceeds max duration " + pathToFile, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", apiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

Response downloadFiles1 = UtilIT.downloadFiles(datasetPid, apiToken);
downloadFiles1.then().assertThat()
.statusCode(OK.getStatusCode());

// By default we get the archival version (.tab).
Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.tab", "MANIFEST.TXT")), gatherFilenames(downloadFiles1.getBody().asInputStream()));

String format = "original";
Response downloadFiles2 = UtilIT.downloadFiles(datasetPid, format, apiToken);
downloadFiles2.then().assertThat()
.statusCode(OK.getStatusCode());

// By passing format=original we get the original version, Stata (.dta) in this case.
Assert.assertEquals(new HashSet<>(Arrays.asList("50by1000.dta", "MANIFEST.TXT")), gatherFilenames(downloadFiles2.getBody().asInputStream()));
}

private HashSet<String> gatherFilenames(InputStream inputStream) throws IOException {
HashSet<String> filenamesFound = new HashSet<>();
try (ZipInputStream zipStream = new ZipInputStream(inputStream)) {
Expand Down
26 changes: 23 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -752,10 +752,22 @@ static Response downloadFilesOriginal(Integer[] fileIds, String apiToken) {

static Response downloadFiles(String datasetIdOrPersistentId, String apiToken) {
String datasetVersion = null;
return downloadFiles(datasetIdOrPersistentId, datasetVersion, apiToken);
String format = null;
return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken);
}

static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String apiToken) {
/**
* @param format can be "original" for tabular files.
*/
static Response downloadFiles(String datasetIdOrPersistentId, String format, String apiToken) {
String datasetVersion = null;
return downloadFiles(datasetIdOrPersistentId, datasetVersion, format, apiToken);
}

/**
* @param format can be "original" for tabular files.
*/
static Response downloadFiles(String datasetIdOrPersistentId, String datasetVersion, String format, String apiToken) {
String idInPath = datasetIdOrPersistentId; // Assume it's a number.
String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
if (!NumberUtils.isNumber(datasetIdOrPersistentId)) {
Expand All @@ -771,7 +783,15 @@ static Response downloadFiles(String datasetIdOrPersistentId, String datasetVers
if (datasetVersion != null) {
optionalVersion = "/versions/" + datasetVersion;
}
return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam);
String optionalFormat = "";
if (format != null) {
if (!"".equals(optionalQueryParam)) {
optionalFormat = "&format=" + format;
} else {
optionalFormat = "?format=" + format;
}
}
return requestSpecification.get("/api/access/downloadAll/" + idInPath + optionalVersion + optionalQueryParam + optionalFormat);
}

static Response subset(String fileId, String variables, String apiToken) {
Expand Down

0 comments on commit 44cc815

Please sign in to comment.