Skip to content

Commit 4b96cec

Browse files
authored
Remap oai_dc fields dc:type and dc:date (#10737)
* Remap oai_dc fields dc:type, dc:date, and dc:rights #8129. The `oai_dc` export and harvesting format has had the following fields remapped: - dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". - dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped only to the field "Publication Date". - dc:rights was not mapped to anything. Now it is mapped (when available) to terms of use, restrictions, and license. * add tests for export and citation date #8129 * map dc:date to pub date or field for citation date #8129 * back out of any changes to dc:rights #8129 * remove OAI-PMH changes from API changelog (also in release note) #8129 * tweak release note, mention backward incompatibility, reexport #8129
1 parent 2792cf9 commit 4b96cec

File tree

5 files changed

+157
-11
lines changed

5 files changed

+157
-11
lines changed

doc/release-notes/8129-harvesting.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
### Remap oai_dc export and harvesting format fields: dc:type and dc:date
2+
3+
The `oai_dc` export and harvesting format has had the following fields remapped:
4+
5+
- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset".
6+
- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.3/api/native-api.html#set-citation-date-field-type-for-a-dataset)).
7+
8+
In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run.
9+
10+
For more information, please see #8129 and #10737.
11+
12+
### Backward incompatible changes
13+
14+
See the "Remap oai_dc export" section above.
15+
16+
### Upgrade instructions
17+
18+
In order for changes to the `oai_dc` metadata export format to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run.

doc/sphinx-guides/source/api/native-api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -1836,6 +1836,8 @@ The fully expanded example above (without environment variables) looks like this
18361836
18371837
.. note:: You cannot deaccession a dataset more than once. If you call this endpoint twice for the same dataset version, you will get a not found error on the second call, since the dataset you are looking for will no longer be published since it is already deaccessioned.
18381838

1839+
.. _set-citation-date-field:
1840+
18391841
Set Citation Date Field Type for a Dataset
18401842
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
18411843

src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java

+27-6
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import com.google.gson.Gson;
99
import edu.harvard.iq.dataverse.DatasetFieldConstant;
10+
import edu.harvard.iq.dataverse.DatasetFieldType;
11+
import edu.harvard.iq.dataverse.DatasetServiceBean;
1012
import edu.harvard.iq.dataverse.GlobalId;
1113
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
1214
import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO;
@@ -176,22 +178,41 @@ private static void createOAIDC(XMLStreamWriter xmlw, DatasetDTO datasetDto, Str
176178

177179
writeFullElementList(xmlw, dcFlavor+":"+"language", dto2PrimitiveList(version, DatasetFieldConstant.language));
178180

179-
String date = dto2Primitive(version, DatasetFieldConstant.productionDate);
180-
if (date == null) {
181-
date = datasetDto.getPublicationDate();
181+
/**
182+
* dc:date. "I suggest changing the Dataverse / DC Element (oai_dc)
183+
* mapping, so that dc:date is mapped with Publication Date. This is
184+
* also in line with citation recommendations. The publication date is
185+
* the preferred date when citing research data; see, e.g., page 12 in
186+
* The Tromsø Recommendations for Citation of Research Data in
187+
* Linguistics; https://doi.org/10.15497/rda00040 ." --
188+
* https://github.com/IQSS/dataverse/issues/8129
189+
*
190+
* However, if the citation date field has been set, use that.
191+
*/
192+
String date = datasetDto.getPublicationDate();
193+
DatasetFieldType citationDataType = jakarta.enterprise.inject.spi.CDI.current().select(DatasetServiceBean.class).get().findByGlobalId(globalId.asString()).getCitationDateDatasetFieldType();
194+
if (citationDataType != null) {
195+
date = dto2Primitive(version, citationDataType.getName());
182196
}
183-
writeFullElement(xmlw, dcFlavor+":"+"date", date);
197+
198+
writeFullElement(xmlw, dcFlavor+":"+"date", date);
184199

185200
writeFullElement(xmlw, dcFlavor+":"+"contributor", dto2Primitive(version, DatasetFieldConstant.depositor));
186201

187202
writeContributorElement(xmlw, version, dcFlavor);
188203

189204
writeFullElementList(xmlw, dcFlavor+":"+"relation", dto2PrimitiveList(version, DatasetFieldConstant.relatedDatasets));
190205

191-
writeFullElementList(xmlw, dcFlavor+":"+"type", dto2PrimitiveList(version, DatasetFieldConstant.kindOfData));
206+
/**
207+
* dc:type. "Dublin Core (see
208+
* https://www.dublincore.org/specifications/dublin-core/dcmi-terms/#http://purl.org/dc/terms/type
209+
* ) recommends “to use a controlled vocabulary such as the DCMI Type
210+
* Vocabulary” for dc:type." So we hard-coded it to "Dataset". See
211+
* https://github.com/IQSS/dataverse/issues/8129
212+
*/
213+
writeFullElement(xmlw, dcFlavor+":"+"type", "Dataset");
192214

193215
writeFullElementList(xmlw, dcFlavor+":"+"source", dto2PrimitiveList(version, DatasetFieldConstant.dataSources));
194-
195216

196217
}
197218

src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java

+83-5
Original file line numberDiff line numberDiff line change
@@ -630,8 +630,7 @@ public void testCreatePublishDestroyDataset() {
630630
Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPersistentId, "oai_dc", apiToken);
631631
exportDatasetAsDublinCore.prettyPrint();
632632
exportDatasetAsDublinCore.then().assertThat()
633-
// FIXME: Get this working. See https://github.com/rest-assured/rest-assured/wiki/Usage#example-3---complex-parsing-and-validation
634-
// .body("oai_dc:dc.find { it == 'dc:title' }.item", hasItems("Darwin's Finches"))
633+
.body("oai_dc.title", is("Darwin's Finches"))
635634
.statusCode(OK.getStatusCode());
636635

637636
Response exportDatasetAsDdi = UtilIT.exportDataset(datasetPersistentId, "ddi", apiToken);
@@ -1195,8 +1194,7 @@ public void testExport() {
11951194
Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPersistentId, "oai_dc", apiToken);
11961195
exportDatasetAsDublinCore.prettyPrint();
11971196
exportDatasetAsDublinCore.then().assertThat()
1198-
// FIXME: Get this working. See https://github.com/rest-assured/rest-assured/wiki/Usage#example-3---complex-parsing-and-validation
1199-
// .body("oai_dc:dc.find { it == 'dc:title' }.item", hasItems("Darwin's Finches"))
1197+
.body("oai_dc.title", is("Dataset One"))
12001198
.statusCode(OK.getStatusCode());
12011199

12021200
Response exportDatasetAsDdi = UtilIT.exportDataset(datasetPersistentId, "ddi", apiToken);
@@ -4103,7 +4101,87 @@ public void getDatasetVersionCitation() {
41034101
.assertThat().body("data.message", containsString(String.valueOf(persistentId)));
41044102
}
41054103

4106-
4104+
@Test
4105+
public void testCitationDate() throws IOException {
4106+
4107+
Response createUser = UtilIT.createRandomUser();
4108+
createUser.then().assertThat().statusCode(OK.getStatusCode());
4109+
String username = UtilIT.getUsernameFromResponse(createUser);
4110+
String apiToken = UtilIT.getApiTokenFromResponse(createUser);
4111+
4112+
Response createDataverse = UtilIT.createRandomDataverse(apiToken);
4113+
createDataverse.then().assertThat().statusCode(CREATED.getStatusCode());
4114+
String dataverseAlias = UtilIT.getAliasFromResponse(createDataverse);
4115+
Integer dataverseId = UtilIT.getDataverseIdFromResponse(createDataverse);
4116+
Response createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
4117+
createDataset.then().assertThat().statusCode(CREATED.getStatusCode());
4118+
Integer datasetId = UtilIT.getDatasetIdFromResponse(createDataset);
4119+
String datasetPid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId");
4120+
4121+
Path pathToAddDateOfDepositJson = Paths.get(java.nio.file.Files.createTempDirectory(null) + File.separator + "dateOfDeposit.json");
4122+
String dateOfDeposit = """
4123+
{
4124+
"fields": [
4125+
{
4126+
"typeName": "dateOfDeposit",
4127+
"value": "1999-12-31"
4128+
}
4129+
]
4130+
}
4131+
""";
4132+
java.nio.file.Files.write(pathToAddDateOfDepositJson, dateOfDeposit.getBytes());
4133+
4134+
Response addDateOfDeposit = UtilIT.addDatasetMetadataViaNative(datasetPid, pathToAddDateOfDepositJson.toString(), apiToken);
4135+
addDateOfDeposit.prettyPrint();
4136+
addDateOfDeposit.then().assertThat()
4137+
.statusCode(OK.getStatusCode())
4138+
.body("data.metadataBlocks.citation.fields[5].value", equalTo("1999-12-31"));
4139+
4140+
Response setCitationDate = UtilIT.setDatasetCitationDateField(datasetPid, "dateOfDeposit", apiToken);
4141+
setCitationDate.prettyPrint();
4142+
setCitationDate.then().assertThat().statusCode(OK.getStatusCode());
4143+
4144+
UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
4145+
UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode());
4146+
4147+
Response getCitationAfter = UtilIT.getDatasetVersionCitation(datasetId, DS_VERSION_LATEST_PUBLISHED, true, apiToken);
4148+
getCitationAfter.prettyPrint();
4149+
4150+
String doi = datasetPid.substring(4);
4151+
4152+
// Note that the year 1999 appears in the citation because we
4153+
// set the citation date field to a field that has that year.
4154+
String expectedCitation = "Finch, Fiona, 1999, \"Darwin's Finches\", <a href=\"https://doi.org/" + doi + "\" target=\"_blank\">https://doi.org/" + doi + "</a>, Root, V1";
4155+
4156+
getCitationAfter.then().assertThat()
4157+
.statusCode(OK.getStatusCode())
4158+
.body("data.message", is(expectedCitation));
4159+
4160+
Response exportDatasetAsDublinCore = UtilIT.exportDataset(datasetPid, "oai_dc", apiToken);
4161+
exportDatasetAsDublinCore.prettyPrint();
4162+
exportDatasetAsDublinCore.then().assertThat()
4163+
.body("oai_dc.type", equalTo("Dataset"))
4164+
.body("oai_dc.date", equalTo("1999-12-31"))
4165+
.statusCode(OK.getStatusCode());
4166+
4167+
Response clearDateField = UtilIT.clearDatasetCitationDateField(datasetPid, apiToken);
4168+
clearDateField.prettyPrint();
4169+
clearDateField.then().assertThat().statusCode(OK.getStatusCode());
4170+
4171+
// Clearing not enough. You have to reexport because the previous date is cached.
4172+
Response rexport = UtilIT.reexportDatasetAllFormats(datasetPid);
4173+
rexport.prettyPrint();
4174+
rexport.then().assertThat().statusCode(OK.getStatusCode());
4175+
4176+
String todayDate = LocalDate.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd"));
4177+
Response exportPostClear = UtilIT.exportDataset(datasetPid, "oai_dc", apiToken);
4178+
exportPostClear.prettyPrint();
4179+
exportPostClear.then().assertThat()
4180+
.body("oai_dc.type", equalTo("Dataset"))
4181+
.body("oai_dc.date", equalTo(todayDate))
4182+
.statusCode(OK.getStatusCode());
4183+
}
4184+
41074185
@Test
41084186
public void getVersionFiles() throws IOException, InterruptedException {
41094187
Response createUser = UtilIT.createRandomUser();

src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java

+27
Original file line numberDiff line numberDiff line change
@@ -3735,6 +3735,33 @@ static Response getDatasetVersionCitation(Integer datasetId, String version, boo
37353735
return response;
37363736
}
37373737

3738+
static Response setDatasetCitationDateField(String datasetIdOrPersistentId, String dateField, String apiToken) {
3739+
String idInPath = datasetIdOrPersistentId; // Assume it's a number.
3740+
String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
3741+
if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) {
3742+
idInPath = ":persistentId";
3743+
optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId;
3744+
}
3745+
Response response = given()
3746+
.header(API_TOKEN_HTTP_HEADER, apiToken)
3747+
.body(dateField)
3748+
.put("/api/datasets/" + idInPath + "/citationdate" + optionalQueryParam);
3749+
return response;
3750+
}
3751+
3752+
static Response clearDatasetCitationDateField(String datasetIdOrPersistentId, String apiToken) {
3753+
String idInPath = datasetIdOrPersistentId; // Assume it's a number.
3754+
String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path.
3755+
if (!NumberUtils.isCreatable(datasetIdOrPersistentId)) {
3756+
idInPath = ":persistentId";
3757+
optionalQueryParam = "?persistentId=" + datasetIdOrPersistentId;
3758+
}
3759+
Response response = given()
3760+
.header(API_TOKEN_HTTP_HEADER, apiToken)
3761+
.delete("/api/datasets/" + idInPath + "/citationdate" + optionalQueryParam);
3762+
return response;
3763+
}
3764+
37383765
static Response getFileCitation(Integer fileId, String datasetVersion, String apiToken) {
37393766
Boolean includeDeaccessioned = null;
37403767
return getFileCitation(fileId, datasetVersion, includeDeaccessioned, apiToken);

0 commit comments

Comments
 (0)