Skip to content

Commit e723d14

Browse files
committed
RestAssured test for the new harvesting client features #10909
1 parent 5039ba4 commit e723d14

File tree

1 file changed

+127
-0
lines changed

1 file changed

+127
-0
lines changed

src/test/java/edu/harvard/iq/dataverse/api/HarvestingClientsIT.java

+127
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ public class HarvestingClientsIT {
4040
private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test";
4141
private static final String CONTROL_OAI_SET = "controlTestSet2";
4242
private static final int DATASETS_IN_CONTROL_SET = 8;
43+
private static final String DATACITE_ARCHIVE_URL = "https://oai.datacite.org";
44+
private static final String DATACITE_OAI_URL = DATACITE_ARCHIVE_URL + "/oai";
4345
private static String normalUserAPIKey;
4446
private static String adminUserAPIKey;
4547
private static String harvestCollectionAlias;
@@ -303,4 +305,129 @@ private void harvestingClientRun(boolean allowHarvestingMissingCVV) throws Inte
303305
// Fail if it hasn't completed in maxWait seconds
304306
assertTrue(i < maxWait);
305307
}
308+
309+
/*
310+
* Being able to harvest from DataCite (issue #10909, pr #11011) is an
311+
* important enough feature to warrant a dedicated test.
312+
* Just like the other tests above, this will rely on an external OAI
313+
* server, which is somewhat problematic inherently. However, of all the
314+
* external servers and services, DataCite can be safely considered to be
315+
* more reliable than most.
316+
* The test is super straightforward, with the goal of harvesting one
317+
* specific IQSS dataset (doi:10.7910/DVN/TJCLKP) from
318+
* https://oai.datacite.org/oai. As part of testing the overal functionality
319+
* of being able to work with the quirks of the DataCite OAI service, it
320+
* tests 2 new features:
321+
* "useOaiIdentifiersAsPids": true,
322+
* "useListRecords": true,
323+
* (both have useful applications in other scenarios, i.e. when harvesting
324+
* from other sources, not just from DataCite!)
325+
*
326+
*/
327+
@Test
328+
public void testHarvestingFromDatacite() throws InterruptedException {
329+
String nickName = "philTJCLKP" + UtilIT.getRandomString(6);
330+
331+
// The magical string used as the name of our "pseudo set" is the
332+
// native DataCite search API query that finds our dataset, base64-encoded.
333+
// i.e.,
334+
// native API: https://api.datacite.org/dois?query=doi:10.7910/DVN/TJCLKP
335+
// encoded:
336+
// echo "doi:10.7910/DVN/TJCLKP" | base64
337+
// ZG9pOjEwLjc5MTAvRFZOL1RKQ0xLUAo=
338+
String pseudoSetName = "~ZG9pOjEwLjc5MTAvRFZOL1RKQ0xLUAo=";
339+
340+
clientApiPath = String.format(HARVEST_CLIENTS_API+"%s", nickName);
341+
String clientJson = String.format("{\"dataverseAlias\":\"root\","
342+
+ "\"type\":\"oai\","
343+
+ "\"harvestUrl\":\"%s\","
344+
+ "\"archiveUrl\":\"%s\","
345+
+ "\"set\":\"%s\","
346+
+ "\"useOaiIdentifiersAsPids\": true,"
347+
+ "\"useListRecords\": true,"
348+
+ "\"metadataFormat\":\"%s\"}",
349+
DATACITE_OAI_URL, DATACITE_ARCHIVE_URL, pseudoSetName, HARVEST_METADATA_FORMAT);
350+
351+
Response createResponse = given()
352+
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
353+
.body(clientJson)
354+
.post(clientApiPath);
355+
assertEquals(CREATED.getStatusCode(), createResponse.getStatusCode());
356+
357+
// API TEST 1. Run the harvest using the configuration (client) we have
358+
// just created
359+
360+
String runHarvestApiPath = String.format(HARVEST_CLIENTS_API+"%s/run", nickName);
361+
362+
Response runResponse = given()
363+
.header(UtilIT.API_TOKEN_HTTP_HEADER, adminUserAPIKey)
364+
.post(runHarvestApiPath);
365+
assertEquals(ACCEPTED.getStatusCode(), runResponse.getStatusCode());
366+
367+
// API TEST 2. As indicated by the ACCEPTED status code above, harvesting
368+
// is an asynchronous operation that will be performed in the background.
369+
// Verify that this "in progress" status is properly reported while it's
370+
// running, and that it completes in some reasonable amount of time.
371+
372+
int i = 0;
373+
int maxWait=20; // a very conservative interval; this harvest has no business taking this long
374+
375+
do {
376+
// Give it an initial 2 sec. delay, to make sure the client state
377+
// has been updated in the database, which can take some appreciable
378+
// amount of time on a heavily-loaded server running a full suite of
379+
// tests:
380+
Thread.sleep(2000L);
381+
// keep checking the status of the client with the GET api:
382+
Response getClientResponse = given()
383+
.get(clientApiPath);
384+
385+
assertEquals(OK.getStatusCode(), getClientResponse.getStatusCode());
386+
JsonPath responseJsonPath = getClientResponse.body().jsonPath();
387+
assertNotNull(responseJsonPath, "Invalid JSON in GET client response");
388+
assertEquals(ApiConstants.STATUS_OK, responseJsonPath.getString("status"));
389+
390+
String clientStatus = responseJsonPath.getString("data.status");
391+
assertNotNull(clientStatus);
392+
393+
if ("inProgress".equals(clientStatus) || "IN PROGRESS".equals(responseJsonPath.getString("data.lastResult"))) {
394+
// we'll sleep for another second
395+
i++;
396+
} else {
397+
logger.info("getClientResponse.prettyPrint: "
398+
+ getClientResponse.prettyPrint());
399+
// Check the values in the response:
400+
// a) Confirm that the harvest has completed:
401+
assertEquals("inActive", clientStatus, "Unexpected client status: "+clientStatus);
402+
403+
// b) Confirm that it has actually succeeded:
404+
assertEquals("SUCCESS", responseJsonPath.getString("data.lastResult"), "Last harvest not reported a success (took "+i+" seconds)");
405+
String harvestTimeStamp = responseJsonPath.getString("data.lastHarvest");
406+
assertNotNull(harvestTimeStamp);
407+
408+
// c) Confirm that the other timestamps match:
409+
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastSuccessful"));
410+
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty"));
411+
412+
// d) Confirm that the expected 1 dataset has been harvested, with no failures:
413+
assertEquals(1, responseJsonPath.getInt("data.lastDatasetsHarvested"));
414+
assertEquals(0, responseJsonPath.getInt("data.lastDatasetsFailed"));
415+
assertEquals(0, responseJsonPath.getInt("data.lastDatasetsDeleted"));
416+
417+
// ok, it looks like the harvest has completed successfully.
418+
break;
419+
}
420+
} while (i<maxWait);
421+
422+
System.out.println("Waited " + i + " seconds for the harvest to complete.");
423+
// Fail if it hasn't completed in maxWait seconds
424+
assertTrue(i < maxWait);
425+
426+
// @todo: maybe call native API and check specifically on
427+
// /api/datasets/:persistentId?persistentId=doi:10.7910/DVN/TJCLKP
428+
// to verify that it has been properly imporated.
429+
430+
// No need to delete the client (and the harvested dataset with it) here,
431+
// it will be deleted by the @AfterEach cleanup() method
432+
}
306433
}

0 commit comments

Comments
 (0)