@@ -40,6 +40,8 @@ public class HarvestingClientsIT {
40
40
private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test" ;
41
41
private static final String CONTROL_OAI_SET = "controlTestSet2" ;
42
42
private static final int DATASETS_IN_CONTROL_SET = 8 ;
43
+ private static final String DATACITE_ARCHIVE_URL = "https://oai.datacite.org" ;
44
+ private static final String DATACITE_OAI_URL = DATACITE_ARCHIVE_URL + "/oai" ;
43
45
private static String normalUserAPIKey ;
44
46
private static String adminUserAPIKey ;
45
47
private static String harvestCollectionAlias ;
@@ -303,4 +305,129 @@ private void harvestingClientRun(boolean allowHarvestingMissingCVV) throws Inte
303
305
// Fail if it hasn't completed in maxWait seconds
304
306
assertTrue (i < maxWait );
305
307
}
308
+
309
+ /*
310
+ * Being able to harvest from DataCite (issue #10909, pr #11011) is an
311
+ * important enough feature to warrant a dedicated test.
312
+ * Just like the other tests above, this will rely on an external OAI
313
+ * server, which is somewhat problematic inherently. However, of all the
314
+ * external servers and services, DataCite can be safely considered to be
315
+ * more reliable than most.
316
+ * The test is super straightforward, with the goal of harvesting one
317
+ * specific IQSS dataset (doi:10.7910/DVN/TJCLKP) from
318
+ * https://oai.datacite.org/oai. As part of testing the overal functionality
319
+ * of being able to work with the quirks of the DataCite OAI service, it
320
+ * tests 2 new features:
321
+ * "useOaiIdentifiersAsPids": true,
322
+ * "useListRecords": true,
323
+ * (both have useful applications in other scenarios, i.e. when harvesting
324
+ * from other sources, not just from DataCite!)
325
+ *
326
+ */
327
+ @ Test
328
+ public void testHarvestingFromDatacite () throws InterruptedException {
329
+ String nickName = "philTJCLKP" + UtilIT .getRandomString (6 );
330
+
331
+ // The magical string used as the name of our "pseudo set" is the
332
+ // native DataCite search API query that finds our dataset, base64-encoded.
333
+ // i.e.,
334
+ // native API: https://api.datacite.org/dois?query=doi:10.7910/DVN/TJCLKP
335
+ // encoded:
336
+ // echo "doi:10.7910/DVN/TJCLKP" | base64
337
+ // ZG9pOjEwLjc5MTAvRFZOL1RKQ0xLUAo=
338
+ String pseudoSetName = "~ZG9pOjEwLjc5MTAvRFZOL1RKQ0xLUAo=" ;
339
+
340
+ clientApiPath = String .format (HARVEST_CLIENTS_API +"%s" , nickName );
341
+ String clientJson = String .format ("{\" dataverseAlias\" :\" root\" ,"
342
+ + "\" type\" :\" oai\" ,"
343
+ + "\" harvestUrl\" :\" %s\" ,"
344
+ + "\" archiveUrl\" :\" %s\" ,"
345
+ + "\" set\" :\" %s\" ,"
346
+ + "\" useOaiIdentifiersAsPids\" : true,"
347
+ + "\" useListRecords\" : true,"
348
+ + "\" metadataFormat\" :\" %s\" }" ,
349
+ DATACITE_OAI_URL , DATACITE_ARCHIVE_URL , pseudoSetName , HARVEST_METADATA_FORMAT );
350
+
351
+ Response createResponse = given ()
352
+ .header (UtilIT .API_TOKEN_HTTP_HEADER , adminUserAPIKey )
353
+ .body (clientJson )
354
+ .post (clientApiPath );
355
+ assertEquals (CREATED .getStatusCode (), createResponse .getStatusCode ());
356
+
357
+ // API TEST 1. Run the harvest using the configuration (client) we have
358
+ // just created
359
+
360
+ String runHarvestApiPath = String .format (HARVEST_CLIENTS_API +"%s/run" , nickName );
361
+
362
+ Response runResponse = given ()
363
+ .header (UtilIT .API_TOKEN_HTTP_HEADER , adminUserAPIKey )
364
+ .post (runHarvestApiPath );
365
+ assertEquals (ACCEPTED .getStatusCode (), runResponse .getStatusCode ());
366
+
367
+ // API TEST 2. As indicated by the ACCEPTED status code above, harvesting
368
+ // is an asynchronous operation that will be performed in the background.
369
+ // Verify that this "in progress" status is properly reported while it's
370
+ // running, and that it completes in some reasonable amount of time.
371
+
372
+ int i = 0 ;
373
+ int maxWait =20 ; // a very conservative interval; this harvest has no business taking this long
374
+
375
+ do {
376
+ // Give it an initial 2 sec. delay, to make sure the client state
377
+ // has been updated in the database, which can take some appreciable
378
+ // amount of time on a heavily-loaded server running a full suite of
379
+ // tests:
380
+ Thread .sleep (2000L );
381
+ // keep checking the status of the client with the GET api:
382
+ Response getClientResponse = given ()
383
+ .get (clientApiPath );
384
+
385
+ assertEquals (OK .getStatusCode (), getClientResponse .getStatusCode ());
386
+ JsonPath responseJsonPath = getClientResponse .body ().jsonPath ();
387
+ assertNotNull (responseJsonPath , "Invalid JSON in GET client response" );
388
+ assertEquals (ApiConstants .STATUS_OK , responseJsonPath .getString ("status" ));
389
+
390
+ String clientStatus = responseJsonPath .getString ("data.status" );
391
+ assertNotNull (clientStatus );
392
+
393
+ if ("inProgress" .equals (clientStatus ) || "IN PROGRESS" .equals (responseJsonPath .getString ("data.lastResult" ))) {
394
+ // we'll sleep for another second
395
+ i ++;
396
+ } else {
397
+ logger .info ("getClientResponse.prettyPrint: "
398
+ + getClientResponse .prettyPrint ());
399
+ // Check the values in the response:
400
+ // a) Confirm that the harvest has completed:
401
+ assertEquals ("inActive" , clientStatus , "Unexpected client status: " +clientStatus );
402
+
403
+ // b) Confirm that it has actually succeeded:
404
+ assertEquals ("SUCCESS" , responseJsonPath .getString ("data.lastResult" ), "Last harvest not reported a success (took " +i +" seconds)" );
405
+ String harvestTimeStamp = responseJsonPath .getString ("data.lastHarvest" );
406
+ assertNotNull (harvestTimeStamp );
407
+
408
+ // c) Confirm that the other timestamps match:
409
+ assertEquals (harvestTimeStamp , responseJsonPath .getString ("data.lastSuccessful" ));
410
+ assertEquals (harvestTimeStamp , responseJsonPath .getString ("data.lastNonEmpty" ));
411
+
412
+ // d) Confirm that the expected 1 dataset has been harvested, with no failures:
413
+ assertEquals (1 , responseJsonPath .getInt ("data.lastDatasetsHarvested" ));
414
+ assertEquals (0 , responseJsonPath .getInt ("data.lastDatasetsFailed" ));
415
+ assertEquals (0 , responseJsonPath .getInt ("data.lastDatasetsDeleted" ));
416
+
417
+ // ok, it looks like the harvest has completed successfully.
418
+ break ;
419
+ }
420
+ } while (i <maxWait );
421
+
422
+ System .out .println ("Waited " + i + " seconds for the harvest to complete." );
423
+ // Fail if it hasn't completed in maxWait seconds
424
+ assertTrue (i < maxWait );
425
+
426
+ // @todo: maybe call native API and check specifically on
427
+ // /api/datasets/:persistentId?persistentId=doi:10.7910/DVN/TJCLKP
428
+ // to verify that it has been properly imporated.
429
+
430
+ // No need to delete the client (and the harvested dataset with it) here,
431
+ // it will be deleted by the @AfterEach cleanup() method
432
+ }
306
433
}
0 commit comments