diff --git a/conf/solr/schema.xml b/conf/solr/schema.xml index 1773837e39d..2aed50e9998 100644 --- a/conf/solr/schema.xml +++ b/conf/solr/schema.xml @@ -352,6 +352,7 @@ + @@ -593,6 +594,7 @@ + diff --git a/doc/release-notes/10632-DataCiteXMLandRelationType.md b/doc/release-notes/10632-DataCiteXMLandRelationType.md new file mode 100644 index 00000000000..42c1cfb6eda --- /dev/null +++ b/doc/release-notes/10632-DataCiteXMLandRelationType.md @@ -0,0 +1,41 @@ +### Enhanced DataCite Metadata, Relation Type + +A new field has been added to the citation metadatablock to allow entry of the "Relation Type" between a "Related Publication" and a dataset. The Relation Type is currently limited to the most common 6 values recommended by DataCite: isCitedBy, Cites, IsSupplementTo, IsSupplementedBy, IsReferencedBy, and References. For existing datasets where no "Relation Type" has been specified, "IsSupplementTo" is assumed. + +Dataverse now supports the DataCite v4.5 schema. Additional metadata, including metadata about Related Publications, and files in the dataset are now being sent to DataCite and improvements to how PIDs (ORCID, ROR, DOIs, etc.), license/terms, geospatial, and other metadata is represented have been made. The enhanced metadata will automatically be sent when datasets are created and published and is available in the DataCite XML export after publication. + +The additions are in rough alignment with the OpenAIRE XML export, but there are some minor differences in addition to the Relation Type addition, including an update to the DataCite 4.5 schema. For details see https://github.com/IQSS/dataverse/pull/10632 and https://github.com/IQSS/dataverse/pull/10615 and the [design document](https://docs.google.com/document/d/1JzDo9UOIy9dVvaHvtIbOI8tFU6bWdfDfuQvWWpC0tkA/edit?usp=sharing) referenced there. + +Multiple backward incompatible changes and bug fixes have been made to API calls (3 of the four of which were not documented) related to updating PID target urls and metadata at the provider service: +- [Update Target URL for a Published Dataset at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-target-url-for-a-published-dataset-at-the-pid-provider) +- [Update Target URL for all Published Datasets at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-target-url-for-all-published-datasets-at-the-pid-provider) +- [Update Metadata for a Published Dataset at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-metadata-for-a-published-dataset-at-the-pid-provider) +- [Update Metadata for all Published Datasets at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider) + +Upgrade instructions +-------------------- + +The Solr schema has to be updated via the normal mechanism to add the new "relationType" field. + +The citation metadatablock has to be reinstalled using the standard instructions. + +With these two changes, the "Relation Type" fields will be available and creation/publication of datasets will result in the expanded XML being sent to DataCite. + +To update existing datasets (and files using DataCite DOIs): + +Exports can be updated by running `curl http://localhost:8080/api/admin/metadata/reExportAll` + +Entries at DataCite for published datasets can be updated by a superuser using an API call (newly documented): + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets/modifyRegistrationPIDMetadataAll` + +This will loop through all published datasets (and released files with PIDs). As long as the loop completes, the call will return a 200/OK response. Any PIDs for which the update fails can be found using + +`grep 'Failure for id' server.log` + +Failures may occur if PIDs were never registered, or if they were never made findable. Any such cases can be fixed manually in DataCite Fabrica or using the [Reserve a PID](https://guides.dataverse.org/en/latest/api/native-api.html#reserve-a-pid) API call and the newly documented `/api/datasets//modifyRegistration` call respectively. See https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#send-dataset-metadata-to-pid-provider. Please reach out with any questions. + +PIDs can also be updated by a superuser on a per-dataset basis using + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets//modifyRegistrationMetadata` + diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index c6f15968f00..b4d365c4fd4 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -195,12 +195,41 @@ Mints a new identifier for a dataset previously registered with a handle. Only a .. _send-metadata-to-pid-provider: -Send Dataset metadata to PID provider -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Update Target URL for a Published Dataset at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Forces update to metadata provided to the PID provider of a published dataset. Only accessible to superusers. :: +Forces update to the target URL provided to the PID provider of a published dataset and assures the PID is findable. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/$dataset-id/modifyRegistration + +Update Target URL for all Published Datasets at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Forces update to the target URL provided to the PID provider of all published datasets and assures the PID is findable. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/modifyRegistrationAll + +Update Metadata for a Published Dataset at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Checks to see that the PID metadata for a published dataset (and any released files in it using file PIDs) +is up-to-date at the provider and updates the metadata if necessary. +Only accessible to superusers. :: curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/$dataset-id/modifyRegistrationMetadata + +Update Metadata for all Published Datasets at the PID provider +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Checks to see that the PID metadata is up-to-date at the provider for all published datasets +(and any released files in them using file PIDs) and updates the metadata if necessary. +Only accessible to superusers. :: + + curl -H "X-Dataverse-key: $API_TOKEN" -X POST http://$SERVER/api/datasets/modifyRegistrationPIDMetadataAll + +The call returns 200/OK as long as the call completes. Any errors for individual datasets are reported in the log. Check for Unreserved PIDs and Reserve Them ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index a7af3e84b28..c5890fd9996 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -7,6 +7,13 @@ This API changelog is experimental and we would love feedback on its usefulness. :local: :depth: 1 +v6.4 +---- + +- **/api/datasets/$dataset-id/modifyRegistration**: Changed from GET to POST +- **/api/datasets/modifyRegistrationPIDMetadataAll**: Changed from GET to POST + + v6.3 ---- diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index a3d0743216e..5087bb8d4da 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -232,6 +232,10 @@ Dataverse can be configured with one or more PID providers, each of which can mi to manage an authority/shoulder combination, aka a "prefix" (PermaLinks also support custom separator characters as part of the prefix), along with an optional list of individual PIDs (with different authority/shoulders) than can be managed with that account. +Dataverse automatically manages assigning PIDs and making them findable when datasets are published. There are also :ref:`API calls that +allow updating the PID target URLs and metadata of already-published datasets manually if needed `, e.g. if a Dataverse instance is +moved to a new URL or when the software is updated to generate additional metadata or address schema changes at the PID service. + Testing PID Providers +++++++++++++++++++++ @@ -246,11 +250,11 @@ configure the credentials as described below. Alternately, you may wish to configure other providers for testing: - - EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. +- EZID is available to University of California scholars and researchers. Testing can be done using the authority 10.5072 and shoulder FK2 with the "apitest" account (contact EZID for credentials) or an institutional account. Configuration in Dataverse is then analogous to using DataCite. - - The PermaLink provider, like the FAKE DOI provider, does not involve an external account. - Unlike the Fake DOI provider, the PermaLink provider creates PIDs that begin with "perma:", making it clearer that they are not DOIs, - and that do resolve to the local dataset/file page in Dataverse, making them useful for some production use cases. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. +- The PermaLink provider, like the FAKE DOI provider, does not involve an external account. + Unlike the Fake DOI provider, the PermaLink provider creates PIDs that begin with "perma:", making it clearer that they are not DOIs, + and that do resolve to the local dataset/file page in Dataverse, making them useful for some production use cases. See :ref:`permalinks` and (for the FAKE DOI provider) the :doc:`/developers/dev-environment` section of the Developer Guide. Provider-specific configuration is described below. diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index cc856c6372f..151c4732ad7 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -331,6 +331,12 @@ "typeClass": "compound", "value": [ { + "publicationRelationType" : { + "typeName" : "publicationRelationType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "IsSupplementTo" + }, "publicationCitation": { "typeName": "publicationCitation", "multiple": false, diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index b7c265f7244..cd0bcbe31f5 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -31,55 +31,56 @@ topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + publicationRelationType Relation Type The nature of the relationship between this Dataset and the related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://datacite.org/schema/kernel-4/simpleTypes#relationType + publicationCitation Citation The full bibliographic citation for the related publication textbox 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 32 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 33 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 34 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 35 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 36 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 37 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 38 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 40 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 41 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 42
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 43 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 44 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 45 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 46 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 48 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 51 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 52 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 54 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 63 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 65 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 66 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 67 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 68 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 70 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 71 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 72 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 75 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 76 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 79 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -8060,3 +8061,9 @@ language ǂUngkue gku 7918 gku language ǃXóõ nmn 7919 nmn language Not applicable 7920 + publicationRelationType IsCitedBy RT1 1 + publicationRelationType Cites RT2 2 + publicationRelationType IsSupplementTo RT3 3 + publicationRelationType IsSupplementedBy RT4 4 + publicationRelationType IsReferencedBy RT5 5 + publicationRelationType References RT6 6 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 29a4a14c021..1a610d9ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1123,4 +1123,23 @@ private boolean tagExists(String tagLabel) { } return false; } + + public boolean isDeaccessioned() { + // return true, if all published versions were deaccessioned + boolean inDeaccessionedVersions = false; + for (FileMetadata fmd : getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + return false; + } + // Also check for draft version + if (testDsv.isDraft()) { + return false; + } + if (testDsv.isDeaccessioned()) { + inDeaccessionedVersions = true; + } + } + return inDeaccessionedVersions; // since any published version would have already returned + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index d91aa101eb5..abb812d1ba3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -40,6 +40,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String note = "note"; + public final static String publicationRelationType = "publicationRelationType"; public final static String publicationCitation = "publicationCitation"; public final static String publicationIDType = "publicationIDType"; public final static String publicationIDNumber = "publicationIDNumber"; @@ -157,6 +158,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String confidentialityDeclaration="confidentialityDeclaration"; public final static String specialPermissions="specialPermissions"; public final static String restrictions="restrictions"; + @Deprecated + //Doesn't appear to be used and is not datasetContact public final static String contact="contact"; public final static String citationRequirements="citationRequirements"; public final static String depositorRequirements="depositorRequirements"; @@ -487,6 +490,8 @@ public String getRestrictions() { return restrictions; } + @Deprecated + //Appears to not be used public String getContact() { return contact; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java index 7680ebc16db..a0696ab38d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java @@ -6,7 +6,6 @@ package edu.harvard.iq.dataverse; - /** * * @author skraffmiller @@ -25,10 +24,12 @@ public class DatasetRelPublication { private String description; private boolean replicationData; private int displayOrder; + private String relationType; public int getDisplayOrder() { return displayOrder; } + public void setDisplayOrder(int displayOrder) { this.displayOrder = displayOrder; } @@ -64,8 +65,7 @@ public String getUrl() { public void setUrl(String url) { this.url = url; } - - + public String getTitle() { return title; } @@ -82,12 +82,21 @@ public void setDescription(String description) { this.description = description; } - public boolean isEmpty() { - return ((text==null || text.trim().equals("")) - && (!replicationData) - && (idType==null || idType.trim().equals("")) - && (idNumber==null || idNumber.trim().equals("")) - && (url==null || url.trim().equals(""))); - } + public void setRelationType(String type) { + relationType = type; + + } + + public String getRelationType() { + return relationType; + } + + public boolean isEmpty() { + return ((text == null || text.trim().equals("")) + && (!replicationData) + && (idType == null || idType.trim().equals("")) + && (idNumber == null || idNumber.trim().equals("")) + && (url == null || url.trim().equals(""))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 8b81800ba39..ca044c1554d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -947,7 +947,7 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont try { Thread.sleep(1000); } catch (Exception ex) { - logger.warning("Failed to sleep for a second."); + logger.warning("Failed to sleep for one second."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 943693355a3..0433c425fd2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1342,7 +1342,7 @@ public List getGeographicCoverage() { } geoCoverages.add(coverageItem); } - + break; } } return geoCoverages; @@ -1356,24 +1356,42 @@ public List getRelatedPublications() { for (DatasetFieldCompoundValue publication : dsf.getDatasetFieldCompoundValues()) { DatasetRelPublication relatedPublication = new DatasetRelPublication(); for (DatasetField subField : publication.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - String citation = subField.getDisplayValue(); - relatedPublication.setText(citation); - } - - - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url metadata field is likely set up so that the display value is automatically - // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. - // So we want to use the raw value of the field instead, with - // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). - String url = subField.getValue(); - if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { - relatedPublication.setUrl(""); - } else { - relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + relatedPublication.setText(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationURL: + // We have to avoid using subField.getDisplayValue() here - because the + // DisplayFormatType + // for this url metadata field is likely set up so that the display value is + // automatically + // turned into a clickable HTML HREF block, which we don't want to end in our + // Schema.org + // JSON-LD output. So we want to use the raw value of the field instead, with + // minimal HTML + // sanitation, just in case (this would be done on all URLs in + // getDisplayValue()). + String url = subField.getValue(); + if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { + relatedPublication.setUrl(""); + } else { + relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); + } + break; + case DatasetFieldConstant.publicationIDType: + // QDR idType has a trailing : now (Aug 2021) + // Get value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdType(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdNumber(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationRelationType: + relatedPublication.setRelationType(subField.getDisplayValue()); + break; } } relatedPublications.add(relatedPublication); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index 55b98c178bb..f1ddf2304b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -16,6 +16,8 @@ import java.util.List; import java.util.TreeMap; +import org.apache.commons.lang3.StringUtils; + import jakarta.ejb.EJB; import jakarta.faces.view.ViewScoped; import jakarta.inject.Inject; @@ -62,14 +64,14 @@ public void setMetadataBlocksForEdit(TreeMap> public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boolean createBlanks) { /*takes in the values of a dataset version and apportions them into lists for - viewing and editng in the dataset page. + viewing and editing in the dataset page. */ setDatasetVersion(datasetVersion); //this.setDatasetAuthors(new ArrayList()); this.setDatasetRelPublications(new ArrayList<>()); - // loop through vaues to get fields for view mode + // loop through values to get fields for view mode for (DatasetField dsf : datasetVersion.getDatasetFields()) { //Special Handling for various fields displayed above tabs in dataset page view. if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.title)) { @@ -114,17 +116,23 @@ public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boo datasetRelPublication.setTitle(dsf.getDatasetFieldType().getLocaleTitle()); datasetRelPublication.setDescription(dsf.getDatasetFieldType().getLocaleDescription()); for (DatasetField subField : relPubVal.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - datasetRelPublication.setText(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDNumber)) { - datasetRelPublication.setIdNumber(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDType)) { - datasetRelPublication.setIdType(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - datasetRelPublication.setUrl(subField.getValue()); + String value = subField.getValue(); + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + datasetRelPublication.setText(subField.getValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + datasetRelPublication.setIdNumber(subField.getValue()); + break; + case DatasetFieldConstant.publicationIDType: + datasetRelPublication.setIdType(subField.getValue()); + break; + case DatasetFieldConstant.publicationURL: + datasetRelPublication.setUrl(subField.getValue()); + break; + case DatasetFieldConstant.publicationRelationType: + datasetRelPublication.setRelationType(subField.getValue()); + break; } } this.getDatasetRelPublications().add(datasetRelPublication); @@ -263,6 +271,18 @@ public String getRelPublicationUrl() { } } + public String getRelPublicationRelationType() { + if (!this.datasetRelPublications.isEmpty()) { + //Add ': ' formatting if relationType exists + String relationType = this.getDatasetRelPublications().get(0).getRelationType(); + if (!StringUtils.isBlank(relationType)) { + return relationType + ": "; + } + } + return ""; + + } + public String getUNF() { //todo get UNF to calculate and display here. return ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java index 0b7285c017e..8c4fb6b1325 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java +++ b/src/main/java/edu/harvard/iq/dataverse/ExternalIdentifier.java @@ -12,7 +12,9 @@ public enum ExternalIdentifier { GND("GND", "https://d-nb.info/gnd/%s", "^1[01]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]$"), // note: DAI is missing from this list, because it doesn't have resolvable URL ResearcherID("ResearcherID", "https://publons.com/researcher/%s/", "^[A-Z\\d][A-Z\\d-]+[A-Z\\d]$"), - ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"); + ScopusID("ScopusID", "https://www.scopus.com/authid/detail.uri?authorId=%s", "^\\d*$"), + //Requiring ROR to be URL form as we use it where there is no id type field and matching any 9 digit number starting with 0 seems a bit aggressive + ROR("ROR", "https://ror.org/%s", "^(https:\\/\\/ror.org\\/)0[a-hj-km-np-tv-z|0-9]{6}[0-9]{2}$"); private String name; private String template; diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index a542cb52ac0..1c8783c5bd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -100,6 +100,13 @@ public String asURL() { } return null; } + + public String asRawIdentifier() { + if (protocol == null || authority == null || identifier == null) { + return ""; + } + return authority + separator + identifier; + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 034ba4536a1..f26b6d67edf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -658,7 +658,7 @@ public Response getLinkset(@Context ContainerRequestContext crc, @PathParam("id" } } - @GET + @POST @AuthRequired @Path("{id}/modifyRegistration") public Response updateDatasetTargetURL(@Context ContainerRequestContext crc, @PathParam("id") String id ) { @@ -706,7 +706,7 @@ public Response updateDatasetPIDMetadata(@Context ContainerRequestContext crc, @ }, getRequestUser(crc)); } - @GET + @POST @AuthRequired @Path("/modifyRegistrationPIDMetadataAll") public Response updateDatasetPIDMetadataAll(@Context ContainerRequestContext crc) { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index 7b7c5fd0e93..db9dc142506 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -118,9 +118,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { pidProvider.generatePid(theDataset); } - // Attempt the registration if importing dataset through the API, or the app (but not harvest) - handlePid(theDataset, ctxt); - DatasetType defaultDatasetType = ctxt.datasetTypes().getByName(DatasetType.DEFAULT_DATASET_TYPE); DatasetType existingDatasetType = theDataset.getDatasetType(); logger.fine("existing dataset type: " + existingDatasetType); @@ -130,6 +127,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { } else { theDataset.setDatasetType(defaultDatasetType); } + + // Attempt the registration if importing dataset through the API, or the app (but not harvest) + handlePid(theDataset, ctxt); + + ctxt.em().persist(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 69ebe6feed8..fa8cfeb810a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -211,7 +211,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getLatestVersion().getVersionState() != RELEASED) { // some imported datasets may already be released. - + theDataset.getLatestVersion().setVersionState(RELEASED); if (!datasetExternallyReleased) { publicizeExternalIdentifier(theDataset, ctxt); // Will throw a CommandException, unless successful. @@ -220,7 +220,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // a failure - it will remove any locks, and it will send a // proper notification to the user(s). } - theDataset.getLatestVersion().setVersionState(RELEASED); } final Dataset ds = ctxt.em().merge(theDataset); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java index 5a0ae7cbf5d..8cf2d0109d6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetTargetURLCommand.java @@ -46,11 +46,13 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { ctxt.em().merge(target); ctxt.em().flush(); for (DataFile df : target.getFiles()) { - doiRetString = pidProvider.modifyIdentifierTargetURL(df); - if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { - df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); - ctxt.em().merge(df); - ctxt.em().flush(); + if (df.isReleased()) { + doiRetString = pidProvider.modifyIdentifierTargetURL(df); + if (doiRetString != null && doiRetString.contains(df.getIdentifier())) { + df.setGlobalIdCreateTime(new Timestamp(new Date().getTime())); + ctxt.em().merge(df); + ctxt.em().flush(); + } } } } else { diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java index 5bf54ac1ec1..14d17dcd900 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDvObjectPIDMetadataCommand.java @@ -69,7 +69,8 @@ protected void executeImpl(CommandContext ctxt) throws CommandException { for (DataFile df : target.getFiles()) { if (isFilePIDsEnabled && // using file PIDs and (!(df.getIdentifier() == null || df.getIdentifier().isEmpty()) || // identifier exists, or - canCreatePidsForFiles) // we can create PIDs for files + canCreatePidsForFiles) && // we can create PIDs for files and + df.isReleased() // the file is not a draft ) { doiRetString = pidProvider.updateIdentifier(df); if (doiRetString) { diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java index 8caf32b2df0..c21d6b5cd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java @@ -7,6 +7,7 @@ import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; @@ -20,11 +21,7 @@ */ @AutoService(Exporter.class) public class DataCiteExporter implements XMLExporter { - - private static String DEFAULT_XML_NAMESPACE = "http://datacite.org/schema/kernel-3"; - private static String DEFAULT_XML_SCHEMALOCATION = "http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"; - private static String DEFAULT_XML_VERSION = "3.0"; - + public static final String NAME = "Datacite"; @Override @@ -60,17 +57,17 @@ public Boolean isAvailableToUsers() { @Override public String getXMLNameSpace() { - return DataCiteExporter.DEFAULT_XML_NAMESPACE; + return XmlMetadataTemplate.XML_NAMESPACE; } @Override public String getXMLSchemaLocation() { - return DataCiteExporter.DEFAULT_XML_SCHEMALOCATION; + return XmlMetadataTemplate.XML_SCHEMA_LOCATION; } @Override public String getXMLSchemaVersion() { - return DataCiteExporter.DEFAULT_XML_VERSION; + return XmlMetadataTemplate.XML_SCHEMA_VERSION; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9a689f7a4ed..f5efc448090 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -24,6 +24,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -111,9 +113,9 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); @@ -133,9 +135,9 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, fileDetails); @@ -186,15 +188,15 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "subTitl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.subTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(pid); @@ -218,23 +220,23 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) boolean excludeRepository = settingsService.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false); if (!StringUtils.isEmpty(datasetDto.getPublisher()) && !(excludeRepository && distributorSet)) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); //distrbtr } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); /* per SCHEMA, depositr comes before depDate! - L.A. */ - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + XmlWriterUtil.writeFullElement(xmlw, "depositr", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.depositor)); /* ... and depDate comes before distDate - L.A. */ - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + XmlWriterUtil.writeFullElement(xmlw, "depDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + XmlWriterUtil.writeFullElement(xmlw, "distDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); xmlw.writeStartElement("holdings"); - writeAttribute(xmlw, "URI", pidUri); + XmlWriterUtil.writeAttribute(xmlw, "URI", pidUri); xmlw.writeEndElement(); //holdings xmlw.writeEndElement(); // citation @@ -247,7 +249,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version, datasetDto.getMetadataLanguage()); //Subject and Keywords writeAbstractElement(xmlw, version, datasetDto.getMetadataLanguage()); // Description writeSummaryDescriptionElement(xmlw, version, datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.notesText)); //////// xmlw.writeEndElement(); // stdyInfo @@ -255,7 +257,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeDataAccess(xmlw , version); writeOtherStudyMaterial(xmlw , version); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); xmlw.writeEndElement(); // stdyDscr @@ -274,10 +276,10 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio return; } xmlw.writeStartElement("othrStdyMat"); - writeFullElementList(xmlw, "relMat", relMaterials); - writeFullElementList(xmlw, "relStdy", relDatasets); + XmlWriterUtil.writeFullElementList(xmlw, "relMat", relMaterials); + XmlWriterUtil.writeFullElementList(xmlw, "relStdy", relDatasets); writeRelPublElement(xmlw, version); - writeFullElementList(xmlw, "othRefs", relReferences); + XmlWriterUtil.writeFullElementList(xmlw, "othRefs", relReferences); xmlw.writeEndElement(); //othrStdyMat } @@ -292,29 +294,29 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver xmlw.writeStartElement("dataAccs"); xmlw.writeStartElement("setAvail"); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); + XmlWriterUtil.writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + XmlWriterUtil.writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + XmlWriterUtil.writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + XmlWriterUtil.writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + XmlWriterUtil.writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail xmlw.writeStartElement("useStmt"); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "citReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + XmlWriterUtil.writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + XmlWriterUtil.writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + XmlWriterUtil.writeFullElement(xmlw, "restrctn", version.getRestrictions()); + XmlWriterUtil.writeFullElement(xmlw, "contact", version.getContactForAccess()); + XmlWriterUtil.writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "conditions", version.getConditions()); + XmlWriterUtil.writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt /* any s: */ if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeAttribute("type", NOTE_TYPE_TERMS_OF_ACCESS); + xmlw.writeAttribute("level", LEVEL_DV); xmlw.writeCharacters(version.getTermsOfAccess()); xmlw.writeEndElement(); //notes } @@ -341,9 +343,9 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt @@ -351,11 +353,11 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase //The doc is always published by the Dataverse Repository if (!StringUtils.isEmpty(datasetDto.getPublisher())) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); // distrbtr } - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + XmlWriterUtil.writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); @@ -369,10 +371,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - writeAttribute(xmlw,"source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); - writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -523,14 +525,14 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset * "" entries, then all the "" ones: */ for (String nationEntry : nationList) { - writeFullElement(xmlw, "nation", nationEntry); + XmlWriterUtil.writeFullElement(xmlw, "nation", nationEntry); } for (String geogCoverEntry : geogCoverList) { - writeFullElement(xmlw, "geogCover", geogCoverEntry); + XmlWriterUtil.writeFullElement(xmlw, "geogCover", geogCoverEntry); } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + XmlWriterUtil.writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); /* Only 1 geoBndBox is allowed in the DDI. So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ @@ -563,16 +565,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset */ if (geoBndBoxMap.get("westBL") != null) { - writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + XmlWriterUtil.writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); } if (geoBndBoxMap.get("eastBL") != null) { - writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + XmlWriterUtil.writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); } if (geoBndBoxMap.get("southBL") != null) { - writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + XmlWriterUtil.writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); } if (geoBndBoxMap.get("northBL") != null) { - writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + XmlWriterUtil.writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); } xmlw.writeEndElement(); @@ -580,7 +582,7 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset /* analyUnit: */ if (unitOfAnalysisDTO != null) { - writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); } @@ -600,16 +602,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO, String lang) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { //Write multiple lang vals for controlled vocab, otherwise don't include any lang tag - writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); + XmlWriterUtil.writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); - writeAttribute(xmlw, "cycle", cycle); - writeAttribute(xmlw, "event", event); - writeAttribute(xmlw, "date", dateIn); + XmlWriterUtil.writeAttribute(xmlw, "cycle", cycle); + XmlWriterUtil.writeAttribute(xmlw, "event", event); + XmlWriterUtil.writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); @@ -641,15 +643,15 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); - writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); - writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); - writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); - writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); + XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); + XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); writeTargetSampleElement(xmlw, version); - writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); + XmlWriterUtil.writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); @@ -658,37 +660,37 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO // Below is a backward compatibility check allowing export to work in // an instance where the metadata block has not been updated yet. if (collModeFieldDTO.getMultiple()) { - writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); } else { - writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } /* and so does : */ - writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); xmlw.writeEndElement(); //sources - writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); - writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); + XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ - writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); - writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl /* before : */ writeNotesElement(xmlw, version); xmlw.writeStartElement("anlyInfo"); - //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); - writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); - writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); + XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -705,7 +707,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO if (CITATION_BLOCK_NAME.equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", + XmlWriterUtil.writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", fieldDTO.getTypeClass(), "citation", lang); } @@ -732,14 +734,10 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!keywordValue.isEmpty()) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue(keywordValue, DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -753,13 +751,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // Keyword } @@ -792,14 +786,10 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), } if (!topicClassificationValue.isEmpty()) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue( topicClassificationValue, DatasetFieldConstant.topicClassValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -813,13 +803,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // topcClas } @@ -856,9 +842,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - if(!authorAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",authorAffiliation); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -879,9 +863,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); - if(!contributorType.isEmpty()){ - writeAttribute(xmlw,"role", contributorType); - } + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId } @@ -921,12 +903,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - if(!datasetContactAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - } - if(!datasetContactEmail.isEmpty()){ - writeAttribute(xmlw,"email",datasetContactEmail); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -969,15 +947,9 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); - if (!producerAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", producerAffiliation); - } - if (!producerAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", producerAbbreviation); - } - /*if (!producerLogo.isEmpty()) { - writeAttribute(xmlw, "role", producerLogo); - }*/ + XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); + //XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -987,7 +959,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + XmlWriterUtil.writeFullElement(xmlw, "prodDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.productionDate)); // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) @@ -1033,17 +1005,11 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - if (!distributorAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", distributorAffiliation); - } - if (!distributorAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", distributorAbbreviation); - } - if (!distributorURL.isEmpty()) { - writeAttribute(xmlw, "URI", distributorURL); + xmlw.writeAttribute("xml:lang", lang); } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", distributorAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", distributorAbbreviation); + XmlWriterUtil.writeAttribute(xmlw, "URI", distributorURL); xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1102,7 +1068,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO (In other words - titlStmt is mandatory! -L.A.) */ xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", citation); + XmlWriterUtil.writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { xmlw.writeStartElement("IDNo"); @@ -1115,7 +1081,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); // titlStmt - writeFullElement(xmlw,"biblCit",citation); + XmlWriterUtil.writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation if (url != null && !url.trim().equals("") ) { xmlw.writeStartElement("ExtLink"); @@ -1163,11 +1129,9 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - if(!descriptionDate.isEmpty()){ - writeAttribute(xmlw,"date",descriptionDate); - } + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); + xmlw.writeAttribute("xml:lang", lang); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract @@ -1200,9 +1164,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - if(!grantAgency.isEmpty()){ - writeAttribute(xmlw,"agency",grantAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1234,9 +1196,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - if(!otherIdAgency.isEmpty()){ - writeAttribute(xmlw,"agency",otherIdAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1268,9 +1228,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - if(!softwareVersion.isEmpty()){ - writeAttribute(xmlw,"version",softwareVersion); - } + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1383,12 +1341,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - if(!notesType.isEmpty()){ - writeAttribute(xmlw,"type",notesType); - } - if(!notesSubject.isEmpty()){ - writeAttribute(xmlw,"subject",notesSubject); - } + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } @@ -1412,14 +1366,14 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); + XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); if (pidURL != null && !pidURL.isEmpty()){ - writeAttribute(xmlw, "URI", pidURL); + xmlw.writeAttribute("URI", pidURL); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl @@ -1430,9 +1384,9 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } @@ -1460,14 +1414,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // and observations, etc.) if (!fileJson.containsKey("dataTables")) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileJson.getJsonNumber(("id").toString())); + xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileJson.getString("filename")); xmlw.writeEndElement(); // labl @@ -1482,9 +1436,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // specially formatted notes section: if (fileJson.containsKey("contentType")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(fileJson.getString("contentType")); xmlw.writeEndElement(); // notes } @@ -1502,33 +1456,7 @@ private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) xmlw.writeEndElement(); // txt } - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getSinglePrimitive(); - } - } - } - return null; - } - - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - String rawVal = fieldDTO.getSinglePrimitive(); - if (fieldDTO.isControlledVocabularyField()) { - return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), - locale, false); - } - } - } - } - return null; - } + private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { @@ -1562,104 +1490,6 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String return null; } - private static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { - //For the simplest Elements we can - if (values != null && !values.isEmpty()) { - for (String value : values) { - xmlw.writeStartElement(name); - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - } - - private static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, - String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) - throws XMLStreamException { - - if (values != null && !values.isEmpty()) { - Locale defaultLocale = Locale.getDefault(); - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); - if (localeVal != null) { - - value = localeVal; - writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); - } else { - writeFullElement(xmlw, name, value); - } - } else { - writeFullElement(xmlw, name, value); - } - } - if (lang != null && !defaultLocale.getLanguage().equals(lang)) { - // Get values in dataset metadata language - // Loop before testing fieldTypeClass to be ready for external CVV - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); - if (localeVal != null) { - writeFullElement(xmlw, name, localeVal, lang); - } - } - } - } - } - } - - private static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, - String fieldTypeName, String lang) throws XMLStreamException { - // Get the default value - String val = dto2Primitive(version, fieldTypeName); - Locale defaultLocale = Locale.getDefault(); - // Get the language-specific value for the default language - // A null value is returned if this is not a CVV field - String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); - String requestedLocaleVal = null; - if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { - // Also get the value in the requested locale/lang if that's not the default - // lang. - requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); - } - // FWIW locale-specific vals will only be non-null for CVV values (at present) - if (localeVal == null && requestedLocaleVal == null) { - // Not CVV/no translations so print without lang tag - writeFullElement(xmlw, name, val); - } else { - // Print in either/both languages if we have values - if (localeVal != null) { - // Print the value for the default locale with it's own lang tag - writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); - } - // Also print in the request lang (i.e. the metadata language for the dataset) if a value exists, print it with a lang tag - if (requestedLocaleVal != null) { - writeFullElement(xmlw, name, requestedLocaleVal, lang); - } - } - } - - private static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - writeFullElement(xmlw, name, value, null); - } - - private static void writeFullElement (XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { - //For the simplest Elements we can - if (!StringUtilisEmpty(value)) { - xmlw.writeStartElement(name); - if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - - private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - if (!StringUtilisEmpty(value)) { - xmlw.writeAttribute(name, value); - } - } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { @@ -1747,14 +1577,14 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t } private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); - writeAttribute(xmlw, "ID", "VG" + varGrp.getJsonNumber("id").toString()); + xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); String vars = ""; JsonArray varsInGroup = varGrp.getJsonArray("dataVariableIds"); for (int j=0;j sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); + XmlWriterUtil.writeAttribute(xmlw, "type", sumStat.getKey()); xmlw.writeCharacters(((JsonString)sumStat.getValue()).getString()); xmlw.writeEndElement(); // sumStat } @@ -1917,7 +1747,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject varCat = varCats.getJsonObject(i); xmlw.writeStartElement("catgry"); if (varCat.getBoolean("isMissing")) { - writeAttribute(xmlw, "missing", "Y"); + xmlw.writeAttribute("missing", "Y"); } // catValu @@ -1928,7 +1758,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // label if (varCat.containsKey("label")) { xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "category"); + xmlw.writeAttribute("level", "category"); xmlw.writeCharacters(varCat.getString("label")); xmlw.writeEndElement(); // labl } @@ -1936,7 +1766,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // catStat if (varCat.containsKey("frequency")) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("type", "freq"); Double freq = varCat.getJsonNumber("frequency").doubleValue(); // if frequency is actually a long value, we want to write "100" instead of // "100.0" @@ -1955,8 +1785,8 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject cm = catMetas.getJsonObject(j); if (cm.getString("categoryValue").equals(varCat.getString("value"))) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "wgtd", "wgtd"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("wgtd", "wgtd"); + xmlw.writeAttribute("type", "freq"); xmlw.writeCharacters(cm.getJsonNumber("wFreq").toString()); xmlw.writeEndElement(); // catStat break; @@ -1972,24 +1802,24 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // varFormat xmlw.writeEmptyElement("varFormat"); if(dvar.containsKey("variableFormatType")) { - writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); + XmlWriterUtil.writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } if(dvar.containsKey("format")) { - writeAttribute(xmlw, "formatname", dvar.getString("format")); + XmlWriterUtil.writeAttribute(xmlw, "formatname", dvar.getString("format")); } //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); if(dvar.containsKey("formatCategory")) { - writeAttribute(xmlw, "category", dvar.getString("formatCategory")); + XmlWriterUtil.writeAttribute(xmlw, "category", dvar.getString("formatCategory")); } // notes if (dvar.containsKey("UNF") && !dvar.getString("UNF").isBlank()) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); - writeAttribute(xmlw, "level", "variable"); - writeAttribute(xmlw, "type", "Dataverse:UNF"); + xmlw.writeAttribute("subject", "Universal Numeric Fingerprint"); + xmlw.writeAttribute("level", "variable"); + xmlw.writeAttribute("type", "Dataverse:UNF"); xmlw.writeCharacters(dvar.getString("UNF")); xmlw.writeEndElement(); //notes } @@ -2020,8 +1850,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) } xmlw.writeStartElement("fileDscr"); String fileId = fileJson.getJsonNumber("id").toString(); - writeAttribute(xmlw, "ID", "f" + fileId); - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileId); + xmlw.writeAttribute("ID", "f" + fileId); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileId); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); @@ -2064,9 +1894,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) // (Universal Numeric Fingerprint) signature: if ((dt!=null) && (dt.containsKey("UNF") && !dt.getString("UNF").isBlank())) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_UNF); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_UNF); + xmlw.writeAttribute("subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getString("UNF")); xmlw.writeEndElement(); // notes } @@ -2075,9 +1905,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_TAG); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_TAG); + xmlw.writeAttribute("subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(tags.getString(j)); xmlw.writeEndElement(); // notes } @@ -2091,13 +1921,7 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) - private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - return true; - } public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 9a2c3085d2d..d201801bc45 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -30,6 +30,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; + /** * * @author skraffmi @@ -322,26 +324,35 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO String IDType = ""; String IDNo = ""; String url = ""; + String relationType = null; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); - if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { - citation = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { - IDType = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { - IDNo = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { - url = next.getSinglePrimitive(); + switch (next.getTypeName()) { + case DatasetFieldConstant.publicationCitation: + citation = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDType: + IDType = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDNumber: + IDNo = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationURL: + url = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationRelationType: + relationType = next.getSinglePrimitive(); + break; } } + if(StringUtils.isBlank(relationType)) { + relationType = "isReferencedBy"; + } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ - xmlw.writeStartElement(dcFlavor+":"+"isReferencedBy"); + xmlw.writeStartElement(dcFlavor+":" + relationType); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 4b8822e8b66..dd01750942d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -932,6 +932,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe String relatedIdentifierType = null; String relatedIdentifier = null; // is used when relatedIdentifierType variable is not URL String relatedURL = null; // is used when relatedIdentifierType variable is URL + String relationType = null; // is used when relatedIdentifierType variable is URL for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -944,6 +945,9 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { relatedURL = next.getSinglePrimitive(); } + if (DatasetFieldConstant.publicationRelationType.equals(next.getTypeName())) { + relationType = next.getSinglePrimitive(); + } } if (StringUtils.isNotBlank(relatedIdentifierType)) { @@ -956,7 +960,10 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe } relatedIdentifier_map.put("relatedIdentifierType", relatedIdentifierType); - relatedIdentifier_map.put("relationType", "IsCitedBy"); + if(relationType== null) { + relationType = "IsCitedBy"; + } + relatedIdentifier_map.put("relationType", relationType); if (StringUtils.containsIgnoreCase(relatedIdentifierType, "url")) { writeFullElement(xmlw, null, "relatedIdentifier", relatedIdentifier_map, relatedURL, language); @@ -1436,6 +1443,8 @@ public static void writeFundingReferencesElement(XMLStreamWriter xmlw, DatasetVe writeEndTag(xmlw, fundingReference_check); } + + //Duplicates XmlWriterUtil.dto2Primitive private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { // give the single value of the given metadata for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java index 40044408c63..b01fb5e7eba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/PidProviderFactoryBean.java @@ -23,9 +23,11 @@ import jakarta.ejb.Singleton; import jakarta.ejb.Startup; import jakarta.inject.Inject; +import jakarta.json.JsonObject; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.GlobalId; @@ -247,4 +249,5 @@ public PidProvider getDefaultPidGenerator() { return PidUtil.getPidProvider(protocol, authority, shoulder); } } + } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..02a7dedce47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -91,31 +91,30 @@ public String getMetadataFromDvObject(String identifier, Map met } else { dataset = (Dataset) dvObject.getOwner(); } - - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + doiMetadata.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + doiMetadata.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + doiMetadata.setDescription(fileDescription == null ? "" : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setTitle(dvObject.getCurrentName()); String producerString = pidProviderService.getProducer(); if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { producerString = UNAVAILABLE; } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java new file mode 100644 index 00000000000..ffd24747bc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java @@ -0,0 +1,138 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.ArrayList; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DatasetAuthor; + + +//Parses some specific parts of a DataCite XML metadata file +public class DoiMetadata { + + private String identifier; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List datafileIdentifiers; + private List authors; + private String description; + private List contacts; + private List producers; + + + public DoiMetadata() { + } + + public void parseDataCiteXML(String xmlMetaData) { + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index fb4e294d246..a74a9f34bc9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1,340 +1,1565 @@ package edu.harvard.iq.dataverse.pidproviders.doi; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; -import java.util.logging.Level; +import java.util.Map; +import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; +import org.ocpsoft.common.util.Strings; + +import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldServiceBean; +import edu.harvard.iq.dataverse.DatasetRelPublication; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.ExternalIdentifier; +import edu.harvard.iq.dataverse.FileMetadata; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.dataset.DatasetType; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; +import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import jakarta.enterprise.inject.spi.CDI; +import jakarta.json.JsonObject; public class XmlMetadataTemplate { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; + private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); - static { - try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } + public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; + public static final String XML_SCHEMA_VERSION = "4.5"; - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String resourceTypeGeneral; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } + private DoiMetadata doiMetadata; + //QDR - used to get ROR name from ExternalVocabularyValue via pidProvider.get + private PidProvider pidProvider = null; - public void setProducers(List producers) { - this.producers = producers; + public XmlMetadataTemplate() { } - public List getContacts() { - return contacts; + public XmlMetadataTemplate(DoiMetadata doiMetadata) { + this.doiMetadata = doiMetadata; } - public void setContacts(List contacts) { - this.contacts = contacts; - } + public String generateXML(DvObject dvObject) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + generateXML(dvObject, outputStream); - public String getDescription() { - return description; + String xml = outputStream.toString(); + logger.fine(xml); + return XmlPrinter.prettyPrintXml(xml); + } catch (XMLStreamException | IOException e) { + logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); + e.printStackTrace(); + } + return null; } - public void setDescription(String description) { - this.description = description; - } + private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { + // Could/should use dataset metadata language for metadata from DvObject itself? + String language = null; // machine locale? e.g. for Publisher which is global + String metadataLanguage = null; // when set, otherwise = language? + + //QDR - used to get ROR name from ExternalVocabularyValue via pidProvider.get + GlobalId pid = null; + pid = dvObject.getGlobalId(); + if ((pid == null) && (dvObject instanceof DataFile df)) { + pid = df.getOwner().getGlobalId(); + } + pidProvider = PidUtil.getPidProvider(pid.getProviderId()); + XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); + xmlw.writeStartElement("resource"); + boolean deaccessioned=false; + if(dvObject instanceof Dataset d) { + deaccessioned=d.isDeaccessioned(); + } else if (dvObject instanceof DataFile df) { + deaccessioned = df.isDeaccessioned(); + } + xmlw.writeDefaultNamespace(XML_NAMESPACE); + xmlw.writeAttribute("xmlns:xsi", XML_XSI); + xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - public List getAuthors() { - return authors; + writeIdentifier(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors(), deaccessioned); + writeTitles(xmlw, dvObject, language, deaccessioned); + writePublisher(xmlw, dvObject, deaccessioned); + writePublicationYear(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + } + writeResourceType(xmlw, dvObject); + if (!deaccessioned) { + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + } + writeDescriptions(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); + } + xmlw.writeEndElement(); + xmlw.flush(); } - public void setAuthors(List authors) { - this.authors = authors; - } + /** + * 3, Title(s) (with optional type sub-properties) (M) + * + * @param xmlw + * The Stream writer + * @param dvObject + * The dataset/file + * @param language + * the metadata language + * @return + * @throws XMLStreamException + */ + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language, boolean deaccessioned) throws XMLStreamException { + String title = null; + String subTitle = null; + List altTitles = new ArrayList<>(); - public XmlMetadataTemplate() { - } + if (!deaccessioned) { + title = doiMetadata.getTitle(); - public List getDatafileIdentifiers() { - return datafileIdentifiers; - } + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } + } + } else { + title = AbstractDOIProvider.UNAVAILABLE; + } + if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { + xmlw.writeStartElement("titles"); + if (StringUtils.isNotBlank(title)) { + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + } + Map attributes = new HashMap(); - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; + if (StringUtils.isNotBlank(subTitle)) { + attributes.put("titleType", "Subtitle"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, subTitle); + } + if ((altTitles != null && !String.join("", altTitles).isBlank())) { + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } + } + xmlw.writeEndElement(); + } } - public XmlMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); + /** + * 1, Identifier (with mandatory type sub-property) (M) Note DataCite expects + * identifierType="DOI" but OpenAire allows several others (see + * https://guidelines.readthedocs.io/en/latest/data/field_identifier.html#d-identifiertype) + * Dataverse is currently only capable of creating DOI, Handle, or URL types + * from the OpenAire list (the last from PermaLinks) ToDo - If we add,e.g., an + * ARK or PURL provider, this code has to change or we'll need to refactor so + * that the identifiertype and id value can be sent via the JSON/ORE + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset or file with the PID + * @throws XMLStreamException + */ + private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + GlobalId pid = dvObject.getGlobalId(); + String identifierType = null; + String identifier = null; + switch (pid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = pid.asRawIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = pid.asRawIdentifier(); + break; + case PermaLinkPidProvider.PERMA_PROTOCOL: + identifierType = "URL"; + identifier = pid.asURL(); + break; } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); + Map attributeMap = new HashMap(); + attributeMap.put("identifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "identifier", attributeMap, identifier); + } + + /** + * 2, Creator (with optional given name, family name, name identifier and + * affiliation sub-properties) (M) + * + * @param xmlw + * The stream writer + * @param authorList + * - the list of authors + * @throws XMLStreamException + */ + public void writeCreators(XMLStreamWriter xmlw, List authorList, boolean deaccessioned) throws XMLStreamException { + // creators -> creator -> creatorName with nameType attribute, givenName, + // familyName, nameIdentifier + // write all creators + xmlw.writeStartElement("creators"); // + if(deaccessioned) { + //skip the loop below + authorList = null; } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); + boolean nothingWritten = true; + if (authorList != null && !authorList.isEmpty()) { + for (DatasetAuthor author : authorList) { + String creatorName = author.getName().getDisplayValue(); + String affiliation = null; + if (author.getAffiliation() != null && !author.getAffiliation().getValue().isEmpty()) { + affiliation = author.getAffiliation().getValue(); + } + String nameIdentifier = null; + String nameIdentifierScheme = null; + if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { + nameIdentifier = author.getIdValue(); + if (nameIdentifier != null) { + // Normalizes to the URL form of the identifier, returns null if the identifier + // is not valid given the type + nameIdentifier = author.getIdentifierAsUrl(); + } + nameIdentifierScheme = author.getIdType(); + } + + if (StringUtils.isNotBlank(creatorName)) { + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + nothingWritten = false; + writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); + } + + + } } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); + if (nothingWritten) { + // Authors unavailable + xmlw.writeStartElement("creator"); + XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); + xmlw.writeEndElement(); } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); + xmlw.writeEndElement(); // + } + + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { + // publisher should already be non null - :unav if it wasn't available + if(deaccessioned) { + doiMetadata.setPublisher(AbstractPidProvider.UNAVAILABLE); } + XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); } - public String generateXML(DvObject dvObject) { + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; + String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (this.publisherYear != null) { + if (! deaccessioned && (doiMetadata.getPublisherYear() != null)) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; - } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) - .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors != null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() - && !author.getIdValue().isEmpty() && author.getAffiliation() != null - && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } + pubYear = doiMetadata.getPublisherYear(); + } + XmlWriterUtil.writeFullElement(xmlw, "publicationYear", String.valueOf(pubYear)); + } + + /** + * 6, Subject (with scheme sub-property) R + * + * @param xmlw + * The Steam writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // subjects -> subject with subjectScheme and schemeURI attributes when + // available + boolean subjectsCreated = false; + List subjects = new ArrayList(); + List compoundKeywords = new ArrayList(); + List compoundTopics = new ArrayList(); + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.subject)) { + subjects.addAll(dsf.getValues()); } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement - .append("" + author.getAffiliation().getDisplayValue() + ""); + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { + compoundKeywords = dsf.getDatasetFieldCompoundValues(); + } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { + compoundTopics = dsf.getDatasetFieldCompoundValues(); } - creatorsElement.append(""); } - } else { - creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) - .append(""); + } else if (dvObject instanceof DataFile df) { + subjects = df.getTagLabels(); } + for (String subject : subjects) { + if (StringUtils.isNotBlank(subject)) { + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElement(xmlw, "subject", StringEscapeUtils.escapeXml10(subject)); + } + } + for (DatasetFieldCompoundValue keywordFieldValue : compoundKeywords) { + String keyword = null; + String scheme = null; + String schemeUri = null; - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.keywordValue: + keyword = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(keyword)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); + } + } + for (DatasetFieldCompoundValue topicFieldValue : compoundTopics) { + String topic = null; + String scheme = null; + String schemeUri = null; - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); + for (DatasetField subField : topicFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.topicClassValue: + topic = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(topic)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(topic)); } } + if (subjectsCreated) { + xmlw.writeEndElement(); + } + } + + /** + * 7, Contributor (with optional given name, family name, name identifier and + * affiliation sub-properties) + * + * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, java.lang.String) + * + * @param xmlw + * The stream writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean contributorsCreated = false; + List compoundProducers = new ArrayList(); + List compoundDistributors = new ArrayList(); + List compoundContacts = new ArrayList(); + List compoundContributors = new ArrayList(); + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + // ToDo Include for files? + /* + * if(dvObject instanceof DataFile df) { dvObject = df.getOwner(); } + */ - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producer: + compoundProducers = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.distributor: + compoundDistributors = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.datasetContact: + compoundContacts = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contributor: + compoundContributors = dsf.getDatasetFieldCompoundValues(); } - contributorsElement.append(""); } } - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - String datasetTypeName = dataset.getDatasetType().getName(); - resourceTypeGeneral = switch (datasetTypeName) { - case DatasetType.DATASET_TYPE_DATASET -> - "Dataset"; - case DatasetType.DATASET_TYPE_SOFTWARE -> - "Software"; - case DatasetType.DATASET_TYPE_WORKFLOW -> - "Workflow"; - default -> - "Dataset"; - }; - xmlMetadata = xmlMetadata.replace("${resourceTypeGeneral}", resourceTypeGeneral); + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { + String producer = null; + String affiliation = null; + + for (DatasetField subField : producerFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producerName: + producer = subField.getValue(); + break; + case DatasetFieldConstant.producerAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(producer)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(producer, false, false); + writeEntityElements(xmlw, "contributor", "Producer", entityObject, affiliation, null, null); + } + } - String relIdentifiers = generateRelatedIdentifiers(dvObject); + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { + String distributor = null; + String affiliation = null; - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + for (DatasetField subField : distributorFieldValue.getChildDatasetFields()) { - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributorName: + distributor = subField.getValue(); + break; + case DatasetFieldConstant.distributorAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(distributor)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(distributor, false, false); + writeEntityElements(xmlw, "contributor", "Distributor", entityObject, affiliation, null, null); + } - private String generateRelatedIdentifiers(DvObject dvObject) { + } + for (DatasetFieldCompoundValue contactFieldValue : compoundContacts) { + String contact = null; + String affiliation = null; - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + for (DatasetField subField : contactFieldValue.getChildDatasetFields()) { - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); - } - sb.append("" - + dataFile.getGlobalId() + ""); - } + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.datasetContactName: + contact = subField.getValue(); + break; + case DatasetFieldConstant.datasetContactAffiliation: + affiliation = subField.getValue(); + break; } + } + if (StringUtils.isNotBlank(contact)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contact, false, false); + writeEntityElements(xmlw, "contributor", "ContactPerson", entityObject, affiliation, null, null); + } - if (!sb.toString().isEmpty()) { - sb.append(""); + } + for (DatasetFieldCompoundValue contributorFieldValue : compoundContributors) { + String contributor = null; + String contributorType = null; + + for (DatasetField subField : contributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.contributorName: + contributor = subField.getValue(); + break; + case DatasetFieldConstant.contributorType: + contributorType = subField.getValue(); + if (contributorType != null) { + contributorType = contributorType.replace(" ", ""); + } + break; } } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" - + df.getOwner().getGlobalId() + ""); - sb.append(""); + // QDR - doesn't have Funder in the contributor type list. + // Using a string isn't i18n + if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { + contributorType = getCanonicalContributorType(contributorType); + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contributor, false, false); + writeEntityElements(xmlw, "contributor", contributorType, entityObject, null, null, null); + } + + } + + if (contributorsCreated) { + xmlw.writeEndElement(); } - return sb.toString(); } - public void generateFileIdentifiers(DvObject dvObject) { + //List from https://schema.datacite.org/meta/kernel-4/include/datacite-contributorType-v4.xsd + private Set contributorTypes = new HashSet<>(Arrays.asList("ContactPerson", "DataCollector", "DataCurator", "DataManager", "Distributor", "Editor", + "HostingInstitution", "Other", "Producer", "ProjectLeader", "ProjectManager", "ProjectMember", "RegistrationAgency", "RegistrationAuthority", + "RelatedPerson", "ResearchGroup", "RightsHolder", "Researcher", "Sponsor", "Supervisor", "WorkPackageLeader")); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + private String getCanonicalContributorType(String contributorType) { + if(StringUtils.isBlank(contributorType) || !contributorTypes.contains(contributorType)) { + return "Other"; + } + return contributorType; + } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + private void writeEntityElements(XMLStreamWriter xmlw, String elementName, String type, JsonObject entityObject, String affiliation, String nameIdentifier, String nameIdentifierScheme) throws XMLStreamException { + xmlw.writeStartElement(elementName); + Map attributeMap = new HashMap(); + if (StringUtils.isNotBlank(type)) { + xmlw.writeAttribute("contributorType", type); + } + // person name=, + if (entityObject.getBoolean("isPerson")) { + attributeMap.put("nameType", "Personal"); + } else { + attributeMap.put("nameType", "Organizational"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, elementName + "Name", attributeMap, + StringEscapeUtils.escapeXml10(entityObject.getString("fullName"))); + if (entityObject.containsKey("givenName")) { + XmlWriterUtil.writeFullElement(xmlw, "givenName", StringEscapeUtils.escapeXml10(entityObject.getString("givenName"))); + } + if (entityObject.containsKey("familyName")) { + XmlWriterUtil.writeFullElement(xmlw, "familyName", StringEscapeUtils.escapeXml10(entityObject.getString("familyName"))); + } - datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - + template.substring(x, template.length() - 1); + if (nameIdentifier != null) { + attributeMap.clear(); + URL url; + try { + url = new URL(nameIdentifier); + String protocol = url.getProtocol(); + String authority = url.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + attributeMap.put("schemeURI", site); + attributeMap.put("nameIdentifierScheme", nameIdentifierScheme); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "nameIdentifier", attributeMap, nameIdentifier); + } catch (MalformedURLException e) { + logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); + } + } + if (StringUtils.isNotBlank(affiliation)) { + attributeMap.clear(); + boolean isROR=false; + String orgName = affiliation; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; + if (externalIdentifier.isValidIdentifier(orgName)) { + isROR = true; + JsonObject jo = getExternalVocabularyValue(orgName); + if (jo != null) { + orgName = jo.getString("termName"); } + } + + if (isROR) { - } else { - xmlMetadata = xmlMetadata.replace( - "${relatedIdentifier}", - ""); + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("affiliationIdentifierScheme", "ROR"); + attributeMap.put("affiliationIdentifier", orgName); } + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "affiliation", attributeMap, StringEscapeUtils.escapeXml10(orgName)); } + xmlw.writeEndElement(); } - public static String getTemplate() { - return template; + private JsonObject getExternalVocabularyValue(String id) { + return CDI.current().select(DatasetFieldServiceBean.class).get().getExternalVocabularyValue(id); } - public static void setTemplate(String template) { - XmlMetadataTemplate.template = template; + /** + * 8, Date (with type sub-property) (R) + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile + * @throws XMLStreamException + */ + private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean datesWritten = false; + String dateOfDistribution = null; + String dateOfProduction = null; + String dateOfDeposit = null; + Date releaseDate = null; + String publicationDate = null; + boolean isAnUpdate=false; + List datesOfCollection = new ArrayList(); + List timePeriods = new ArrayList(); + + if (dvObject instanceof DataFile df) { + // Find the first released version the file is in to give a published date + List fmds = df.getFileMetadatas(); + DatasetVersion initialVersion = null; + for (FileMetadata fmd : fmds) { + DatasetVersion dv = fmd.getDatasetVersion(); + if (dv.isReleased()) { + initialVersion = dv; + publicationDate = Util.getDateFormat().format(dv.getReleaseTime()); + break; + } + } + // And the last update is the most recent + for (int i = fmds.size() - 1; i >= 0; i--) { + DatasetVersion dv = fmds.get(i).getDatasetVersion(); + if (dv.isReleased() && !dv.equals(initialVersion)) { + releaseDate = dv.getReleaseTime(); + isAnUpdate=true; + break; + } + } + } else if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Long versionNumber = dv.getVersionNumber(); + if (versionNumber != null && !(versionNumber.equals(1) && dv.getMinorVersionNumber().equals(0))) { + isAnUpdate = true; + } + releaseDate = dv.getReleaseTime(); + publicationDate = d.getPublicationDateFormattedYYYYMMDD(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributionDate: + dateOfDistribution = dsf.getValue(); + break; + case DatasetFieldConstant.productionDate: + dateOfProduction = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfDeposit: + dateOfDeposit = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfCollection: + datesOfCollection = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.timePeriodCovered: + timePeriods = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + Map attributes = new HashMap(); + if (StringUtils.isNotBlank(dateOfDistribution)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Issued"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDistribution); + } + // dates -> date with dateType attribute + + if (StringUtils.isNotBlank(dateOfProduction)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Created"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfProduction); + } + if (StringUtils.isNotBlank(dateOfDeposit)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Submitted"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDeposit); + } + + if (publicationDate != null) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Available"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, publicationDate); + } + if (isAnUpdate) { + String date = Util.getDateFormat().format(releaseDate); + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Updated"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, date); + } + if (datesOfCollection != null) { + for (DatasetFieldCompoundValue collectionDateFieldValue : datesOfCollection) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.dateOfCollectionStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.dateOfCollectionEnd: + endDate = subField.getValue(); + break; + } + } + // Minimal clean-up - useful? Parse/format would remove unused chars, and an + // exception would clear the date so we don't send nonsense + startDate = cleanUpDate(startDate); + endDate = cleanUpDate(endDate); + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Collected"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (timePeriods != null) { + for (DatasetFieldCompoundValue timePeriodFieldValue : timePeriods) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : timePeriodFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.timePeriodCoveredStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.timePeriodCoveredEnd: + endDate = subField.getValue(); + break; + } + } + // Minimal clean-up - useful? Parse/format would remove unused chars, and an + // exception would clear the date so we don't send nonsense + startDate = cleanUpDate(startDate); + endDate = cleanUpDate(endDate); + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Other"); + attributes.put("dateInformation", "Time period covered by the data"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (datesWritten) { + xmlw.writeEndElement(); + } } - public String getIdentifier() { - return identifier; + private String cleanUpDate(String date) { + String newDate = null; + if (!StringUtils.isBlank(date)) { + try { + SimpleDateFormat sdf = Util.getDateFormat(); + Date start = sdf.parse(date); + newDate = sdf.format(start); + } catch (ParseException e) { + logger.warning("Could not parse date: " + date); + } + } + return newDate; } - public void setIdentifier(String identifier) { - this.identifier = identifier; + // 9, Language (MA), language + private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // Currently not supported. Spec indicates one 'primary' language. Could send + // the first entry in DatasetFieldConstant.language or send iff there is only + // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? + return; } - public List getCreators() { - return creators; + // 10, ResourceType (with mandatory general type + // description sub- property) (M) + private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List kindOfDataValues = new ArrayList(); + Map attributes = new HashMap(); + String resourceType = "Dataset"; + if (dvObject instanceof Dataset dataset) { + String datasetTypeName = dataset.getDatasetType().getName(); + resourceType = switch (datasetTypeName) { + case DatasetType.DATASET_TYPE_DATASET -> "Dataset"; + case DatasetType.DATASET_TYPE_SOFTWARE -> "Software"; + case DatasetType.DATASET_TYPE_WORKFLOW -> "Workflow"; + default -> "Dataset"; + }; + } + attributes.put("resourceTypeGeneral", resourceType); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.kindOfData: + List vals = dsf.getValues(); + for(String val: vals) { + if(StringUtils.isNotBlank(val)) { + kindOfDataValues.add(val); + } + } + break; + } + } + } + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(";", kindOfDataValues)); + + } else { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } + } - public void setCreators(List creators) { - this.creators = creators; + /** + * 11 AlternateIdentifier (with type sub-property) (O) + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile + * @throws XMLStreamException + */ + private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List otherIdentifiers = new ArrayList(); + Set altPids = dvObject.getAlternativePersistentIndentifiers(); + + boolean alternatesWritten = false; + + Map attributes = new HashMap(); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (DatasetFieldConstant.otherId.equals(dsf.getDatasetFieldType().getName())) { + otherIdentifiers = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + + if (altPids != null && !altPids.isEmpty()) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternateIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + + } + } + + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { + String identifierType = null; + String identifier = null; + for (DatasetField subField : otherIdentifier.getChildDatasetFields()) { + identifierType = ":unav"; + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.otherIdAgency: + identifierType = subField.getValue(); + break; + case DatasetFieldConstant.otherIdValue: + identifier = subField.getValue(); + break; + } + } + attributes.put("alternateIdentifierType", identifierType); + if (!StringUtils.isBlank(identifier)) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } + } + if (alternatesWritten) { + xmlw.writeEndElement(); + } } - public String getTitle() { - return title; + /** + * 12, RelatedIdentifier (with type and relation type sub-properties) (R) + * + * @param xmlw + * The Steam writer + * @param dvObject + * the dataset/datafile + * @throws XMLStreamException + */ + private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean relatedIdentifiersWritten = false; + + Map attributes = new HashMap(); + + if (dvObject instanceof Dataset dataset) { + List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); + if (!relatedPublications.isEmpty()) { + for (DatasetRelPublication relatedPub : relatedPublications) { + attributes.clear(); + + String pubIdType = relatedPub.getIdType(); + String identifier = relatedPub.getIdNumber(); + String url = relatedPub.getUrl(); + String relationType = relatedPub.getRelationType(); + if(StringUtils.isBlank(relationType)) { + relationType = "IsSupplementTo"; + } + /* + * Note - with identifier and url fields, it's not clear that there's a single + * way those two fields are used for all identifier types. The code here is + * ~best effort to interpret those fields. + */ + logger.fine("Found relpub: " + pubIdType + " " + identifier + " " + url); + + pubIdType = getCanonicalPublicationType(pubIdType); + logger.fine("Canonical type: " + pubIdType); + // Prefer identifier if set, otherwise check url + String relatedIdentifier = identifier; + if (StringUtils.isBlank(relatedIdentifier)) { + relatedIdentifier = url; + } + logger.fine("Related identifier: " + relatedIdentifier); + // For types where we understand the protocol, get the canonical form + if (StringUtils.isNotBlank(relatedIdentifier)) { + switch (pubIdType != null ? pubIdType : "none") { + case "DOI": + if (!(relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http"))) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + logger.fine("Intermediate Related identifier: " + relatedIdentifier); + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + logger.warning("Invalid DOI: " + e.getLocalizedMessage()); + relatedIdentifier = null; + } + logger.fine("Final Related identifier: " + relatedIdentifier); + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || !relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + // If a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URI(relatedIdentifier).toURL(); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (URISyntaxException | MalformedURLException | IllegalArgumentException e) { + // Just an identifier but without a pubIdType we won't include it + logger.warning("Invalid Identifier of type URL: " + relatedIdentifier); + relatedIdentifier = null; + } + break; + case "none": + //Try to identify PIDs and URLs and send them as related identifiers + if (relatedIdentifier != null) { + // See if it is a GlobalID we know + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + pubIdType = getCanonicalPublicationType(pid.getProtocol()); + } catch (IllegalArgumentException e) { + } + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URI(relatedIdentifier).toURL(); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + pubIdType = "URL"; + } catch (URISyntaxException | MalformedURLException | IllegalArgumentException e) { + // Just an identifier but without a pubIdType we won't include it + logger.warning("Related Identifier found without type: " + relatedIdentifier); + //Won't be sent since pubIdType is null - could also set relatedIdentifier to null + } + } + break; + default: + //Some other valid type - we just send the identifier w/o optional attributes + //To Do - validation for other types? + break; + } + } + if (StringUtils.isNotBlank(relatedIdentifier) && StringUtils.isNotBlank(pubIdType)) { + // Still have a valid entry + attributes.put("relatedIdentifierType", pubIdType); + attributes.put("relationType", relationType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); + } + } + } + List fmds = dataset.getLatestVersionForCopy().getFileMetadatas(); + if (!((fmds==null) && fmds.isEmpty())) { + attributes.clear(); + attributes.put("relationType", "HasPart"); + for (FileMetadata fmd : fmds) { + DataFile dataFile = fmd.getDataFile(); + GlobalId pid = dataFile.getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } + } + } + } + } else if (dvObject instanceof DataFile df) { + GlobalId pid = df.getOwner().getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + + attributes.clear(); + attributes.put("relationType", "IsPartOf"); + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } + } + } + if (relatedIdentifiersWritten) { + xmlw.writeEndElement(); + } } - public void setTitle(String title) { - this.title = title; + static HashMap relatedIdentifierTypeMap = new HashMap(); + + private static String getCanonicalPublicationType(String pubIdType) { + if (relatedIdentifierTypeMap.isEmpty()) { + relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); + relatedIdentifierTypeMap.put("arXiv", "arXiv"); + relatedIdentifierTypeMap.put("bibcode".toLowerCase(), "bibcode"); + relatedIdentifierTypeMap.put("DOI".toLowerCase(), "DOI"); + relatedIdentifierTypeMap.put("EAN13".toLowerCase(), "EAN13"); + relatedIdentifierTypeMap.put("EISSN".toLowerCase(), "EISSN"); + relatedIdentifierTypeMap.put("Handle".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("IGSN".toLowerCase(), "IGSN"); + relatedIdentifierTypeMap.put("ISBN".toLowerCase(), "ISBN"); + relatedIdentifierTypeMap.put("ISSN".toLowerCase(), "ISSN"); + relatedIdentifierTypeMap.put("ISTC".toLowerCase(), "ISTC"); + relatedIdentifierTypeMap.put("LISSN".toLowerCase(), "LISSN"); + relatedIdentifierTypeMap.put("LSID".toLowerCase(), "LSID"); + relatedIdentifierTypeMap.put("PISSN".toLowerCase(), "PISSN"); + relatedIdentifierTypeMap.put("PMID".toLowerCase(), "PMID"); + relatedIdentifierTypeMap.put("PURL".toLowerCase(), "PURL"); + relatedIdentifierTypeMap.put("UPC".toLowerCase(), "UPC"); + relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); + relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); + relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + // Add entry for Handle,Perma protocols so this can be used with GlobalId/getProtocol() + relatedIdentifierTypeMap.put("hdl".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("perma".toLowerCase(), "URL"); + + } + return relatedIdentifierTypeMap.get(pubIdType); } - public String getPublisher() { - return publisher; + private void writeSize(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // sizes -> size + boolean sizesWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + Long size = dataFile.getFilesize(); + if (size != -1) { + sizesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "sizes", sizesWritten); + XmlWriterUtil.writeFullElement(xmlw, "size", size.toString()); + } + } + } + if (sizesWritten) { + xmlw.writeEndElement(); + } + } - public void setPublisher(String publisher) { - this.publisher = publisher; + private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean formatsWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + String format = dataFile.getContentType(); + if (StringUtils.isNotBlank(format)) { + formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); + XmlWriterUtil.writeFullElement(xmlw, "format", format); + } + /* + * Should original formats be sent? What about original sizes above? + * if(dataFile.isTabularData()) { String originalFormat = + * dataFile.getOriginalFileFormat(); if(StringUtils.isNotBlank(originalFormat)) + * { XmlWriterUtil.writeFullElement(xmlw, "format", format); } } + */ + } + } + if (formatsWritten) { + xmlw.writeEndElement(); + } + } - public String getPublisherYear() { - return publisherYear; + private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + Dataset d = null; + if (dvObject instanceof Dataset) { + d = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + d = ((DataFile) dvObject).getOwner(); + } + if (d != null) { + DatasetVersion dv = d.getLatestVersionForCopy(); + String version = dv.getFriendlyVersionNumber(); + if (StringUtils.isNotBlank(version)) { + XmlWriterUtil.writeFullElement(xmlw, "version", version); + } + } + } - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // rightsList -> rights with rightsURI attribute + xmlw.writeStartElement("rightsList"); // + + // set terms from the info:eu-repo-Access-Terms vocabulary + xmlw.writeStartElement("rights"); // + DatasetVersion dv = null; + boolean closed = false; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + closed = dv.isHasRestrictedFile(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + + closed = df.isRestricted(); + } + TermsOfUseAndAccess terms = dv.getTermsOfUseAndAccess(); + boolean requestsAllowed = terms.isFileAccessRequest(); + License license = terms.getLicense(); + + if (requestsAllowed && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); + } else if (!requestsAllowed && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); + } else { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); + } + xmlw.writeEndElement(); // + xmlw.writeStartElement("rights"); // + + if (license != null) { + xmlw.writeAttribute("rightsURI", license.getUri().toString()); + xmlw.writeCharacters(license.getName()); + } else { + xmlw.writeAttribute("rightsURI", DatasetUtil.getLicenseURI(dv)); + xmlw.writeCharacters(BundleUtil.getStringFromBundle("license.custom.description")); + ; + } + xmlw.writeEndElement(); // + xmlw.writeEndElement(); // } - public String getResourceTypeGeneral() { - return resourceTypeGeneral; + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { + // descriptions -> description with descriptionType attribute + boolean descriptionsWritten = false; + List descriptions = null; + DatasetVersion dv = null; + if(deaccessioned) { + descriptions = new ArrayList(); + descriptions.add(AbstractDOIProvider.UNAVAILABLE); + } else { + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + descriptions = dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } + } + } + Map attributes = new HashMap(); + attributes.put("descriptionType", "Abstract"); + if (descriptions != null) { + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, StringEscapeUtils.escapeXml10(description)); + } + } + + if (dv != null) { + List dsfs = dv.getDatasetFields(); + + for (DatasetField dsf : dsfs) { + + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.software: + attributes.clear(); + attributes.put("descriptionType", "TechnicalInfo"); + List dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + + String softwareName = null; + String softwareVersion = null; + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + if (DatasetFieldConstant.softwareName.equals(childDsf.getDatasetFieldType().getName())) { + softwareName = childDsf.getValue(); + } else if (DatasetFieldConstant.softwareVersion.equals(childDsf.getDatasetFieldType().getName())) { + softwareVersion = childDsf.getValue(); + } + } + if (StringUtils.isNotBlank(softwareName)) { + if (StringUtils.isNotBlank(softwareVersion)) { + } + softwareName = softwareName + ", " + softwareVersion; + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); + } + } + break; + case DatasetFieldConstant.originOfSources: + case DatasetFieldConstant.characteristicOfSources: + case DatasetFieldConstant.accessToSources: + attributes.clear(); + attributes.put("descriptionType", "Methods"); + String method = dsf.getValue(); + if (StringUtils.isNotBlank(method)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, method); + + } + break; + case DatasetFieldConstant.series: + attributes.clear(); + attributes.put("descriptionType", "SeriesInformation"); + dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + + if (DatasetFieldConstant.seriesName.equals(childDsf.getDatasetFieldType().getName())) { + String seriesInformation = childDsf.getValue(); + if (StringUtils.isNotBlank(seriesInformation)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, seriesInformation); + } + break; + } + } + } + break; + case DatasetFieldConstant.notesText: + attributes.clear(); + attributes.put("descriptionType", "Other"); + String notesText = dsf.getValue(); + if (StringUtils.isNotBlank(notesText)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, notesText); + } + break; + + } + } + + } + + if (descriptionsWritten) { + xmlw.writeEndElement(); // + } } - public void setResourceTypeGeneral(String resourceTypeGeneral) { - this.resourceTypeGeneral = resourceTypeGeneral; + private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + if (dvObject instanceof Dataset d) { + boolean geoLocationsWritten = false; + DatasetVersion dv = d.getLatestVersionForCopy(); + + List places = dv.getGeographicCoverage(); + if (places != null && !places.isEmpty()) { + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + for (String[] place : places) { + xmlw.writeStartElement("geoLocation"); // + + ArrayList placeList = new ArrayList(); + for (String placePart : place) { + if (!StringUtils.isBlank(placePart)) { + placeList.add(placePart); + } + } + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + xmlw.writeEndElement(); // + } + + } + boolean boundingBoxFound = false; + boolean productionPlaceFound = false; + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.geographicBoundingBox: + boundingBoxFound = true; + for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) { + List childDsfs = dsfcv.getChildDatasetFields(); + String nLatitude = null; + String sLatitude = null; + String eLongitude = null; + String wLongitude = null; + for (DatasetField childDsf : childDsfs) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.northLatitude: + nLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.southLatitude: + sLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.eastLongitude: + eLongitude = childDsf.getValue(); + break; + case DatasetFieldConstant.westLongitude: + wLongitude = childDsf.getValue(); + + } + } + if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + xmlw.writeStartElement("geoLocation"); // + if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { + // A point + xmlw.writeStartElement("geoLocationPoint"); + XmlWriterUtil.writeFullElement(xmlw, "pointLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "pointLatitude", sLatitude); + xmlw.writeEndElement(); + } else { + // A box + xmlw.writeStartElement("geoLocationBox"); + XmlWriterUtil.writeFullElement(xmlw, "westBoundLongitude", wLongitude); + XmlWriterUtil.writeFullElement(xmlw, "eastBoundLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "southBoundLatitude", sLatitude); + XmlWriterUtil.writeFullElement(xmlw, "northBoundLatitude", nLatitude); + xmlw.writeEndElement(); + + } + xmlw.writeEndElement(); // + } + } + case DatasetFieldConstant.productionPlace: + productionPlaceFound = true; + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + List prodPlaces = dsf.getValues(); + for (String prodPlace : prodPlaces) { + xmlw.writeStartElement("geoLocation"); // + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + xmlw.writeEndElement(); // + } + break; + } + if (boundingBoxFound && productionPlaceFound) { + break; + } + } + if (geoLocationsWritten) { + xmlw.writeEndElement(); // + } + } + } + private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // fundingReferences -> fundingReference -> funderName, awardNumber + boolean fundingReferenceWritten = false; + DatasetVersion dv = null; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + } + if (dv != null) { + List retList = new ArrayList<>(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributor)) { + boolean addFunder = false; + for (DatasetFieldCompoundValue contributorValue : dsf.getDatasetFieldCompoundValues()) { + String contributorName = null; + String contributorType = null; + for (DatasetField subField : contributorValue.getChildDatasetFields()) { + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorName)) { + contributorName = subField.getDisplayValue(); + } + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorType)) { + contributorType = subField.getRawValue(); + } + } + // SEK 02/12/2019 move outside loop to prevent contrib type to carry over to + // next contributor + // TODO: Consider how this will work in French, Chinese, etc. + if ("Funder".equals(contributorType)) { + if (!StringUtils.isBlank(contributorName)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(contributorName)); + xmlw.writeEndElement(); // + } + } + } + } + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumber)) { + for (DatasetFieldCompoundValue grantObject : dsf.getDatasetFieldCompoundValues()) { + String funder = null; + String awardNumber = null; + for (DatasetField subField : grantObject.getChildDatasetFields()) { + // It would be nice to do something with grantNumberValue (the actual number) + // but schema.org doesn't support it. + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberAgency)) { + String grantAgency = subField.getDisplayValue(); + funder = grantAgency; + } else if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberValue)) { + String grantNumberValue = subField.getDisplayValue(); + awardNumber = grantNumberValue; + } + } + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + boolean isROR=false; + String funderIdentifier = null; + ExternalIdentifier externalIdentifier = ExternalIdentifier.ROR; + if (externalIdentifier.isValidIdentifier(funder)) { + isROR = true; + JsonObject jo = getExternalVocabularyValue(funder); + if (jo != null) { + funderIdentifier = funder; + funder = jo.getString("termName"); + } + } + + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); + if (isROR) { + Map attributeMap = new HashMap<>(); + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("funderIdentifierType", "ROR"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "funderIdentifier", attributeMap, StringEscapeUtils.escapeXml10(funderIdentifier)); + } + if (StringUtils.isNotBlank(awardNumber)) { + XmlWriterUtil.writeFullElement(xmlw, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); + } + xmlw.writeEndElement(); // + } + + } + } + } + + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + + } + } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java index cda70cbc506..a4d788de4df 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import org.xmlunit.builder.DiffBuilder; @@ -125,28 +126,28 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractPidProvider.UNAVAILABLE; } - metadataTemplate.setDescription(description); + doiMetadata.setDescription(description); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. //This could/should be removed if the datafile methods add escaping String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + doiMetadata.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); String title = dvObject.getCurrentName(); if(dvObject.isInstanceofDataFile()) { //Note file title is not currently escaped the way the dataset title is, so adding it here. @@ -157,40 +158,41 @@ public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + DoiMetadata doiMetadata = new DoiMetadata(); + + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + doiMetadata.setDescription(AbstractPidProvider.UNAVAILABLE); String title =metadata.get("datacite.title"); System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - metadataTemplate.setAuthors(null); + doiMetadata.setAuthors(null); - metadataTemplate.setTitle(title); + doiMetadata.setTitle(title); String producerString = AbstractPidProvider.UNAVAILABLE; - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } @@ -244,11 +246,12 @@ Map getMetadata(String identifier) throws IOException { Map metadata = new HashMap<>(); try { String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", String.join("; ", template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.parseDataCiteXML(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", doiMetadata.getCreators())); + metadata.put("datacite.title", doiMetadata.getTitle()); + metadata.put("datacite.publisher", doiMetadata.getPublisher()); + metadata.put("datacite.publicationyear", doiMetadata.getPublisherYear()); } catch (RuntimeException e) { logger.log(Level.INFO, identifier, e); } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java index cd765933796..5630844fb32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -17,6 +17,8 @@ import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import jakarta.json.JsonObject; + import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; @@ -124,6 +126,7 @@ public String modifyIdentifierTargetURL(DvObject dvObject) throws Exception { String identifier = getIdentifier(dvObject); try { Map metadata = getIdentifierMetadata(dvObject); + metadata.put("_target", getTargetUrl(dvObject)); doiDataCiteRegisterService.modifyIdentifier(identifier, metadata, dvObject); } catch (Exception e) { logger.log(Level.WARNING, "modifyMetadata failed", e); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java index d185b0249b9..465b10ee407 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteRESTfullClient.java @@ -21,6 +21,7 @@ import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.HttpEntity; import org.apache.http.entity.StringEntity; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.client.CloseableHttpClient; @@ -75,7 +76,12 @@ public String getUrl(String doi) { HttpGet httpGet = new HttpGet(this.url + "/doi/" + doi); try { HttpResponse response = httpClient.execute(httpGet,context); - String data = EntityUtils.toString(response.getEntity(), encoding); + HttpEntity entity = response.getEntity(); + String data = null; + + if(entity != null) { + data = EntityUtils.toString(entity, encoding); + } if (response.getStatusLine().getStatusCode() != 200) { throw new RuntimeException("Response code: " + response.getStatusLine().getStatusCode() + ", " + data); } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java index 586ca50b6fd..cec64ab95b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java @@ -24,7 +24,12 @@ public class XmlValidator { private static final Logger logger = Logger.getLogger(XmlValidator.class.getCanonicalName()); public static boolean validateXmlSchema(String fileToValidate, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { + Source xmlFile = new StreamSource(new File(fileToValidate)); + return validateXmlSchema(xmlFile, schemaToValidateAgainst); + } + + public static boolean validateXmlSchema(Source xmlFile, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = schemaFactory.newSchema(schemaToValidateAgainst); Validator validator = schema.newValidator(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java new file mode 100644 index 00000000000..8ec426ead1f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -0,0 +1,167 @@ +package edu.harvard.iq.dataverse.util.xml; + +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; + +public class XmlWriterUtil { + + public static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { + // For the simplest Elements we can + if (values != null && !values.isEmpty()) { + for (String value : values) { + xmlw.writeStartElement(name); + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + } + + public static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, + String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) + throws XMLStreamException { + + if (values != null && !values.isEmpty()) { + Locale defaultLocale = Locale.getDefault(); + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); + if (localeVal != null) { + + value = localeVal; + writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); + } else { + writeFullElement(xmlw, name, value); + } + } else { + writeFullElement(xmlw, name, value); + } + } + if (lang != null && !defaultLocale.getLanguage().equals(lang)) { + // Get values in dataset metadata language + // Loop before testing fieldTypeClass to be ready for external CVV + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); + if (localeVal != null) { + writeFullElement(xmlw, name, localeVal, lang); + } + } + } + } + } + } + + public static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, + String fieldTypeName, String lang) throws XMLStreamException { + // Get the default value + String val = dto2Primitive(version, fieldTypeName); + Locale defaultLocale = Locale.getDefault(); + // Get the language-specific value for the default language + // A null value is returned if this is not a CVV field + String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); + String requestedLocaleVal = null; + if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { + // Also get the value in the requested locale/lang if that's not the default + // lang. + requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); + } + // FWIW locale-specific vals will only be non-null for CVV values (at present) + if (localeVal == null && requestedLocaleVal == null) { + // Not CVV/no translations so print without lang tag + writeFullElement(xmlw, name, val); + } else { + // Print in either/both languages if we have values + if (localeVal != null) { + // Print the value for the default locale with it's own lang tag + writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); + } + // Also print in the request lang (i.e. the metadata language for the dataset) + // if a value exists, print it with a lang tag + if (requestedLocaleVal != null) { + writeFullElement(xmlw, name, requestedLocaleVal, lang); + } + } + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + return fieldDTO.getSinglePrimitive(); + } + } + } + return null; + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + String rawVal = fieldDTO.getSinglePrimitive(); + if (fieldDTO.isControlledVocabularyField()) { + return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), + locale, false); + } + } + } + } + return null; + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + writeFullElement(xmlw, name, value, null); + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { + // For the simplest Elements we can + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + if (DvObjectContainer.isMetadataLanguageSet(lang)) { + writeAttribute(xmlw, "xml:lang", lang); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeAttribute(name, value); + } + } + + + public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + for (String key : attributeMap.keySet()) { + writeAttribute(xmlw, key, attributeMap.get(key)); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static boolean writeOpenTagIfNeeded(XMLStreamWriter xmlw, String tag, boolean element_check) throws XMLStreamException { + // check if the current tag isn't opened + if (!element_check) { + xmlw.writeStartElement(tag); // + } + return true; + } +} diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 6c771d8337b..4ba5e4a9f14 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2698,7 +2698,7 @@ files.api.fileNotFound=File could not be found. datasets.api.updatePIDMetadata.failure.dataset.must.be.released=Modify Registration Metadata must be run on a published dataset. datasets.api.updatePIDMetadata.auth.mustBeSuperUser=Forbidden. You must be a superuser. datasets.api.updatePIDMetadata.success.for.single.dataset=Dataset {0} PID Metadata updated successfully. -datasets.api.updatePIDMetadata.success.for.update.all=All Dataset PID Metadata update completed successfully. +datasets.api.updatePIDMetadata.success.for.update.all=All Dataset PID Metadata update completed. See log for any issues. datasets.api.moveDataset.error.targetDataverseNotFound=Target dataverse not found. datasets.api.moveDataset.error.suggestForce=Use the query parameter forceMove=true to complete the move. datasets.api.moveDataset.success=Dataset moved successfully. diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index 1e4b251b084..d90a7bc5036 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -31,6 +31,7 @@ datasetfieldtype.topicClassValue.title=Term datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication +datasetfieldtype.publicationRelationType.title=Relation Type datasetfieldtype.publicationCitation.title=Citation datasetfieldtype.publicationIDType.title=Identifier Type datasetfieldtype.publicationIDNumber.title=Identifier @@ -110,6 +111,7 @@ datasetfieldtype.topicClassValue.description=A topic or subject term datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationRelationType.description=The nature of the relationship between this Dataset and the related publication datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication datasetfieldtype.publicationIDNumber.description=The identifier for a related publication @@ -189,6 +191,7 @@ datasetfieldtype.topicClassValue.watermark= datasetfieldtype.topicClassVocab.watermark= datasetfieldtype.topicClassVocabURI.watermark=https:// datasetfieldtype.publication.watermark= +datasetfieldtype.publicationRelationType.watermark= datasetfieldtype.publicationCitation.watermark= datasetfieldtype.publicationIDType.watermark= datasetfieldtype.publicationIDNumber.watermark= @@ -271,6 +274,12 @@ controlledvocabulary.publicationIDType.upc=upc controlledvocabulary.publicationIDType.url=url controlledvocabulary.publicationIDType.urn=urn controlledvocabulary.publicationIDType.dash-nrs=DASH-NRS +controlledvocabulary.publicationRelationType.iscitedby=Is Cited By +controlledvocabulary.publicationRelationType.cites=Cites +controlledvocabulary.publicationRelationType.issupplementto=Is Supplement To +controlledvocabulary.publicationRelationType.issupplementedby=Is Supplemented By +controlledvocabulary.publicationRelationType.isreferencedby=Is Referenced By +controlledvocabulary.publicationRelationType.references=References controlledvocabulary.contributorType.data_collector=Data Collector controlledvocabulary.contributorType.data_curator=Data Curator controlledvocabulary.contributorType.data_manager=Data Manager diff --git a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml deleted file mode 100644 index 150a098834e..00000000000 --- a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml +++ /dev/null @@ -1,18 +0,0 @@ - - - ${identifier} - ${creators} - - ${title} - - ${publisher} - ${publisherYear} - - ${relatedIdentifiers} - - ${description} - - {$contributors} - diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 910cce405dd..6de0f00e94e 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -641,6 +641,7 @@ or !empty DatasetPage.datasetVersionUI.keywordDisplay or !empty DatasetPage.datasetVersionUI.subject.value or !empty DatasetPage.datasetVersionUI.relPublicationCitation + or !empty DatasetPage.datasetVersionUI.relPublicationUrl or !empty DatasetPage.datasetVersionUI.notes.value) and !empty DatasetPage.datasetSummaryFields}"> @@ -660,8 +661,10 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).description}">