From 5f29144762c166c7856958497e24f629d53c92a0 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 4 Dec 2023 12:58:01 -0500 Subject: [PATCH 01/26] adding 6.1 release notes and removing .md files --- ...001-datasets-files-api-user-permissions.md | 13 -- doc/release-notes/10060-api-changelog.md | 3 - .../10093-signedUrl_improvements.md | 5 - .../10104-dataset-citation-deaccessioned.md | 1 - doc/release-notes/6.1-release-notes.md | 195 ++++++++++++++++++ .../9268-8349-oidc-improvements.md | 43 ---- doc/release-notes/9412-markdown-previewer.md | 1 - doc/release-notes/9428-alternative-title.md | 9 - doc/release-notes/9589-ds-configure-tool.md | 1 - doc/release-notes/9590-intellij-redeploy.md | 3 - .../9599-guestbook-at-request.md | 2 - doc/release-notes/9635-solr-improvements.md | 4 - doc/release-notes/9692-files-api-extension.md | 7 - .../9714-files-api-extension-filters.md | 14 -- .../9763-versions-api-improvements.md | 8 - .../9785-files-api-extension-search-text.md | 3 - .../9834-files-api-extension-counts.md | 6 - ...oad-extension-new-file-access-endpoints.md | 14 -- .../9852-files-api-extension-deaccession.md | 12 -- .../9880-info-api-zip-limit-embargo.md | 5 - .../9907-files-api-counts-with-criteria.md | 11 - doc/release-notes/9955-Signposting-updates.md | 7 - ...et-api-downloadsize-ignore-tabular-size.md | 9 - .../9972-files-api-filter-by-tabular-tags.md | 3 - ...with-criteria-and-deaccessioned-support.md | 12 -- 25 files changed, 195 insertions(+), 196 deletions(-) delete mode 100644 doc/release-notes/10001-datasets-files-api-user-permissions.md delete mode 100644 doc/release-notes/10060-api-changelog.md delete mode 100644 doc/release-notes/10093-signedUrl_improvements.md delete mode 100644 doc/release-notes/10104-dataset-citation-deaccessioned.md create mode 100644 doc/release-notes/6.1-release-notes.md delete mode 100644 doc/release-notes/9268-8349-oidc-improvements.md delete mode 100644 doc/release-notes/9412-markdown-previewer.md delete mode 100644 doc/release-notes/9428-alternative-title.md delete mode 100644 doc/release-notes/9589-ds-configure-tool.md delete mode 100644 doc/release-notes/9590-intellij-redeploy.md delete mode 100644 doc/release-notes/9599-guestbook-at-request.md delete mode 100644 doc/release-notes/9635-solr-improvements.md delete mode 100644 doc/release-notes/9692-files-api-extension.md delete mode 100644 doc/release-notes/9714-files-api-extension-filters.md delete mode 100644 doc/release-notes/9763-versions-api-improvements.md delete mode 100644 doc/release-notes/9785-files-api-extension-search-text.md delete mode 100644 doc/release-notes/9834-files-api-extension-counts.md delete mode 100644 doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md delete mode 100644 doc/release-notes/9852-files-api-extension-deaccession.md delete mode 100644 doc/release-notes/9880-info-api-zip-limit-embargo.md delete mode 100644 doc/release-notes/9907-files-api-counts-with-criteria.md delete mode 100644 doc/release-notes/9955-Signposting-updates.md delete mode 100644 doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md delete mode 100644 doc/release-notes/9972-files-api-filter-by-tabular-tags.md delete mode 100644 doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md diff --git a/doc/release-notes/10001-datasets-files-api-user-permissions.md b/doc/release-notes/10001-datasets-files-api-user-permissions.md deleted file mode 100644 index 0aa75f9218a..00000000000 --- a/doc/release-notes/10001-datasets-files-api-user-permissions.md +++ /dev/null @@ -1,13 +0,0 @@ -- New query parameter `includeDeaccessioned` added to the getVersion endpoint (/api/datasets/{id}/versions/{versionId}) to consider deaccessioned versions when searching for versions. - - -- New endpoint to get user permissions on a dataset (/api/datasets/{id}/userPermissions). In particular, the user permissions that this API call checks, returned as booleans, are the following: - - - Can view the unpublished dataset - - Can edit the dataset - - Can publish the dataset - - Can manage the dataset permissions - - Can delete the dataset draft - - -- New permission check "canManageFilePermissions" added to the existing endpoint for getting user permissions on a file (/api/access/datafile/{id}/userPermissions). \ No newline at end of file diff --git a/doc/release-notes/10060-api-changelog.md b/doc/release-notes/10060-api-changelog.md deleted file mode 100644 index 56ac96e3564..00000000000 --- a/doc/release-notes/10060-api-changelog.md +++ /dev/null @@ -1,3 +0,0 @@ -We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html - -See also #10060. diff --git a/doc/release-notes/10093-signedUrl_improvements.md b/doc/release-notes/10093-signedUrl_improvements.md deleted file mode 100644 index 26a17c65e3f..00000000000 --- a/doc/release-notes/10093-signedUrl_improvements.md +++ /dev/null @@ -1,5 +0,0 @@ -A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - -SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - -Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. diff --git a/doc/release-notes/10104-dataset-citation-deaccessioned.md b/doc/release-notes/10104-dataset-citation-deaccessioned.md deleted file mode 100644 index 0ba06d729c4..00000000000 --- a/doc/release-notes/10104-dataset-citation-deaccessioned.md +++ /dev/null @@ -1 +0,0 @@ -The getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md new file mode 100644 index 00000000000..c2b52ab34b8 --- /dev/null +++ b/doc/release-notes/6.1-release-notes.md @@ -0,0 +1,195 @@ +# Dataverse 6.1 + +(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) + +This release brings new features, enhancements, and bug fixes to the Dataverse software. +Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights (Major Upgrades, Breaking Changes) + +This release contains major upgrades to core components. Detailed upgrade instructions can be found below. + +## Detailed Release Highlights, New Features and Use Case Scenarios + +### Dataverse installation can be now be configured to allow out-of-band upload +- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + +### Alternative Title is made repeatable. +- One will need to update database with updated citation block. + `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` +- One will also need to update solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` + Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` + +Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] + +### Improvements in the /versions API +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +### The following API endpoints have been added: + +- /api/files/{id}/downloadCount +- /api/files/{id}/dataTables +- /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. +- canManageFilePermissions (/access/datafile/{id}/userPermissions) Added for getting user permissions on a file. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). +- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. +- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. +- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). +- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. +- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. + +### Extended the existing endpoints: +- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. +- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: + - contentType + - accessStatus + - categoryName + - tabularTagName + - searchText +- getDownloadSize ("api/datasets/{identifier}/versions/{versionId}/downloadsize"): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. Added a new optional query parameter "mode" +This parameter applies a filter criteria to the operation and supports the following values: + - All (Default): Includes both archival and original sizes for tabular files + - Archival: Includes only the archival size for tabular files + - Original: Includes only the original size for tabular files. +- /api/datasets/{id}/versions/{versionId} New query parameter `includeDeaccessioned` added to consider deaccessioned versions when searching for versions. +- /api/datasets/{id}/userPermissions Get user permissions on a dataset, in particular, the user permissions that this API call checks, returned as booleans, are the following: + - Can view the unpublished dataset + - Can edit the dataset + - Can publish the dataset + - Can manage the dataset permissions + - Can delete the dataset draft +- getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. + + +### DataFile API payload has been extended to include the following fields: +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + + +### Misc +- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + +- Dataverse's OAI_ORE Metadata Export format and archival BagIT exports +(which include the OAI-ORE metadata export file) have been updated to include +information about the dataset version state, e.g. RELEASED or DEACCESSIONED +and to indicate which version of Dataverse was used to create the archival Bag. +As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 +version designation and it is expected that any future changes to the OAI_ORE export +format will result in a version change and that tools such as DVUploader that can +recreate datasets from archival Bags will start indicating which version(s) of the +OAI_ORE format they can read. +Dataverse installations that have been using archival Bags may wish to update any +existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse +[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +to generate updated versions. + +- This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. + - To fix #9952, we surround the license info with `<` and `>`. + - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information + - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. + +- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +See also #10060. + +### Solr Improvements +- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. + +Please see the "Installing Solr" section of the Installation Prerequisites guide. + + +### Development +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools + +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews + +- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. + - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. + - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. + +## OpenID Connect Authentication Provider Improvements + +### Using MicroProfile Config For Provisioning + +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. + +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. + +### Adding PKCE Support + +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +## Improved Testing + +With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests +like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition +of both OIDC login options in UI and API. + +The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. + +The support for setting JVM options during testing has been improved for developers. You now may add the +`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. + +As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. + +## New Configuration Options + +- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.client-id +- dataverse.auth.oidc.client-secret +- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.method +- dataverse.auth.oidc.title +- dataverse.auth.oidc.subtitle +- dataverse.auth.oidc.pkce.max-cache-size +- dataverse.auth.oidc.pkce.max-cache-age + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. diff --git a/doc/release-notes/9268-8349-oidc-improvements.md b/doc/release-notes/9268-8349-oidc-improvements.md deleted file mode 100644 index ddfc13e603c..00000000000 --- a/doc/release-notes/9268-8349-oidc-improvements.md +++ /dev/null @@ -1,43 +0,0 @@ -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support - -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options - -- dataverse.auth.oidc.enabled -- dataverse.auth.oidc.client-id -- dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url -- dataverse.auth.oidc.pkce.enabled -- dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title -- dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age diff --git a/doc/release-notes/9412-markdown-previewer.md b/doc/release-notes/9412-markdown-previewer.md deleted file mode 100644 index 8faa2679fb0..00000000000 --- a/doc/release-notes/9412-markdown-previewer.md +++ /dev/null @@ -1 +0,0 @@ -There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews diff --git a/doc/release-notes/9428-alternative-title.md b/doc/release-notes/9428-alternative-title.md deleted file mode 100644 index 3bc74f218b5..00000000000 --- a/doc/release-notes/9428-alternative-title.md +++ /dev/null @@ -1,9 +0,0 @@ -Alternative Title is made repeatable. -- One will need to update database with updated citation block. -`curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: -Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` -Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` - -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] diff --git a/doc/release-notes/9589-ds-configure-tool.md b/doc/release-notes/9589-ds-configure-tool.md deleted file mode 100644 index 70ac5fcaa6a..00000000000 --- a/doc/release-notes/9589-ds-configure-tool.md +++ /dev/null @@ -1 +0,0 @@ -Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. diff --git a/doc/release-notes/9590-intellij-redeploy.md b/doc/release-notes/9590-intellij-redeploy.md deleted file mode 100644 index 07af352ece4..00000000000 --- a/doc/release-notes/9590-intellij-redeploy.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. - -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools diff --git a/doc/release-notes/9599-guestbook-at-request.md b/doc/release-notes/9599-guestbook-at-request.md deleted file mode 100644 index e9554b71fb4..00000000000 --- a/doc/release-notes/9599-guestbook-at-request.md +++ /dev/null @@ -1,2 +0,0 @@ -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. diff --git a/doc/release-notes/9635-solr-improvements.md b/doc/release-notes/9635-solr-improvements.md deleted file mode 100644 index ad55ee3afe6..00000000000 --- a/doc/release-notes/9635-solr-improvements.md +++ /dev/null @@ -1,4 +0,0 @@ -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - diff --git a/doc/release-notes/9692-files-api-extension.md b/doc/release-notes/9692-files-api-extension.md deleted file mode 100644 index baa8e2f87cd..00000000000 --- a/doc/release-notes/9692-files-api-extension.md +++ /dev/null @@ -1,7 +0,0 @@ -The following API endpoints have been added: - -- /api/files/{id}/downloadCount -- /api/files/{id}/dataTables -- /access/datafile/{id}/userPermissions - -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination and ordering diff --git a/doc/release-notes/9714-files-api-extension-filters.md b/doc/release-notes/9714-files-api-extension-filters.md deleted file mode 100644 index 034230efe61..00000000000 --- a/doc/release-notes/9714-files-api-extension-filters.md +++ /dev/null @@ -1,14 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by: - -- Access status: through the `accessStatus` query parameter, which supports the following values: - - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic - - -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. - - -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". diff --git a/doc/release-notes/9763-versions-api-improvements.md b/doc/release-notes/9763-versions-api-improvements.md deleted file mode 100644 index 8d7f6c7a20a..00000000000 --- a/doc/release-notes/9763-versions-api-improvements.md +++ /dev/null @@ -1,8 +0,0 @@ -# Improvements in the /versions API - -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions; -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output; -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. - -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. - diff --git a/doc/release-notes/9785-files-api-extension-search-text.md b/doc/release-notes/9785-files-api-extension-search-text.md deleted file mode 100644 index fb185e1c7af..00000000000 --- a/doc/release-notes/9785-files-api-extension-search-text.md +++ /dev/null @@ -1,3 +0,0 @@ -The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support optional filtering by search text through the `searchText` query parameter. - -The search will be applied to the labels and descriptions of the dataset files. diff --git a/doc/release-notes/9834-files-api-extension-counts.md b/doc/release-notes/9834-files-api-extension-counts.md deleted file mode 100644 index 3ec15d8bd36..00000000000 --- a/doc/release-notes/9834-files-api-extension-counts.md +++ /dev/null @@ -1,6 +0,0 @@ -Implemented the following new endpoints: - -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Given a dataset and its version, retrieves file counts based on different criteria (Total count, per content type, per access status and per category name). - - -- setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. diff --git a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md b/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md deleted file mode 100644 index f306ae2ab80..00000000000 --- a/doc/release-notes/9851-datafile-payload-extension-new-file-access-endpoints.md +++ /dev/null @@ -1,14 +0,0 @@ -Implemented the following new endpoints: - -- userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - - -- hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. - - -In addition, the DataFile API payload has been extended to include the following fields: - -- tabularData: Boolean field to know if the DataFile is of tabular type - - -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) diff --git a/doc/release-notes/9852-files-api-extension-deaccession.md b/doc/release-notes/9852-files-api-extension-deaccession.md deleted file mode 100644 index 55698580e3c..00000000000 --- a/doc/release-notes/9852-files-api-extension-deaccession.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the existing endpoints: - -- getVersionFiles (/api/datasets/{id}/versions/{versionId}/files) -- getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts) - -The above endpoints now accept a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files or file counts. - -Additionally, a new endpoint has been developed to support version deaccessioning through API (Given a dataset and a version). - -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession) - -Finally, the DataFile API payload has been extended to add the field "friendlyType" diff --git a/doc/release-notes/9880-info-api-zip-limit-embargo.md b/doc/release-notes/9880-info-api-zip-limit-embargo.md deleted file mode 100644 index d2afb139e72..00000000000 --- a/doc/release-notes/9880-info-api-zip-limit-embargo.md +++ /dev/null @@ -1,5 +0,0 @@ -Implemented the following new endpoints: - -- getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - -- getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. diff --git a/doc/release-notes/9907-files-api-counts-with-criteria.md b/doc/release-notes/9907-files-api-counts-with-criteria.md deleted file mode 100644 index 07cd23daad0..00000000000 --- a/doc/release-notes/9907-files-api-counts-with-criteria.md +++ /dev/null @@ -1,11 +0,0 @@ -Extended the getVersionFileCounts endpoint (/api/datasets/{id}/versions/{versionId}/files/counts) to support filtering by criteria. - -In particular, the endpoint now accepts the following optional criteria query parameters: - -- contentType -- accessStatus -- categoryName -- tabularTagName -- searchText - -This filtering criteria is the same as the one for the getVersionFiles endpoint. diff --git a/doc/release-notes/9955-Signposting-updates.md b/doc/release-notes/9955-Signposting-updates.md deleted file mode 100644 index db0e27e51c5..00000000000 --- a/doc/release-notes/9955-Signposting-updates.md +++ /dev/null @@ -1,7 +0,0 @@ -This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - -To fix #9952, we surround the license info with `<` and `>`. - -To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - -To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. diff --git a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md b/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md deleted file mode 100644 index 2ede679b361..00000000000 --- a/doc/release-notes/9958-dataset-api-downloadsize-ignore-tabular-size.md +++ /dev/null @@ -1,9 +0,0 @@ -Added a new optional query parameter "mode" to the "getDownloadSize" API endpoint ("api/datasets/{identifier}/versions/{versionId}/downloadsize"). - -This parameter applies a filter criteria to the operation and supports the following values: - -- All (Default): Includes both archival and original sizes for tabular files - -- Archival: Includes only the archival size for tabular files - -- Original: Includes only the original size for tabular files diff --git a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md b/doc/release-notes/9972-files-api-filter-by-tabular-tags.md deleted file mode 100644 index 9c3fced1741..00000000000 --- a/doc/release-notes/9972-files-api-filter-by-tabular-tags.md +++ /dev/null @@ -1,3 +0,0 @@ -- New query parameter `tabularTagName` added to the getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) to return files to which the particular tabular tag has been added. - -- New endpoint to set tabular file tags via API: /api/files/{id}/metadata/tabularTags. diff --git a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md b/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md deleted file mode 100644 index 020224b2094..00000000000 --- a/doc/release-notes/9995-files-api-downloadsize-with-criteria-and-deaccessioned-support.md +++ /dev/null @@ -1,12 +0,0 @@ -Extended the getDownloadSize endpoint (/api/datasets/{id}/versions/{versionId}/downloadsize), including the following new features: - -- The endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned dataset versions when searching for versions to obtain the file total download size. - - -- The endpoint now supports filtering by criteria. In particular, it accepts the following optional criteria query parameters: - - - contentType - - accessStatus - - categoryName - - tabularTagName - - searchText From 0bd9f139e5dca2851ca88ed12c5e31af9c5bbfe9 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 11:01:04 -0500 Subject: [PATCH 02/26] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index c2b52ab34b8..06a3e01f7af 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -1,6 +1,6 @@ # Dataverse 6.1 -(If this note appears truncated on the GitHub Releases page, you can view it in full in the source tree: https://github.com/IQSS/dataverse/blob/master/doc/release-notes/6.1-release-notes.md) +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.1 rather than the list of releases, which will cut them off. This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. From 6fee16dec8125390ea6aa7221a19fde0db2b9730 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 6 Dec 2023 11:52:24 -0500 Subject: [PATCH 03/26] #10151 incorporate json schema --- doc/release-notes/6.1-release-notes.md | 6 +++++- doc/release-notes/9464-json-validation.md | 3 --- 2 files changed, 5 insertions(+), 4 deletions(-) delete mode 100644 doc/release-notes/9464-json-validation.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 06a3e01f7af..990ba219cad 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -24,7 +24,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value canbe "value": ["Alternative Title1", "Alternative Title2"] +For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -45,6 +45,8 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. +- getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -112,6 +114,8 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) + ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md deleted file mode 100644 index f104263ba35..00000000000 --- a/doc/release-notes/9464-json-validation.md +++ /dev/null @@ -1,3 +0,0 @@ -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) - -For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html From 6d2f87ca93c108a9b4ec4905372a2e1709b3f5cf Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:24:26 -0500 Subject: [PATCH 04/26] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 990ba219cad..4b5c20f3953 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -12,8 +12,8 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios ### Dataverse installation can be now be configured to allow out-of-band upload -- Installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). +In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. +By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### Alternative Title is made repeatable. @@ -23,7 +23,7 @@ With the upload-out-of-band option enabled, it is also possible for file upload Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatable with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] ### Improvements in the /versions API @@ -70,7 +70,6 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. - ### DataFile API payload has been extended to include the following fields: - tabularData: Boolean field to know if the DataFile is of tabular type - fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) @@ -114,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -125,12 +124,13 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is +also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. ## OpenID Connect Authentication Provider Improvements @@ -175,6 +175,8 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.subtitle - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age +- dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.guestbook-at-request ## Installation @@ -182,14 +184,17 @@ If this is a new installation, please follow our [Installation Guide](https://gu Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! -You are also very welcome to join the [Global Dataverse Community Consortium](https://dataversecommunity.global) (GDCC). +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). ## Upgrade Instructions - Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +## Backward Incompatibilities +- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, + ## Complete List of Changes For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. From 90ff56ca979cd71f1c467ff1cfa0dfeb8f619691 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:43:43 -0500 Subject: [PATCH 05/26] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 4b5c20f3953..e1a9214a982 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -122,7 +122,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. From 10e0e25fe10dda9f49b6126f591b9483adb2f765 Mon Sep 17 00:00:00 2001 From: Steven Winship <39765413+stevenwinship@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:44:49 -0500 Subject: [PATCH 06/26] Update doc/release-notes/6.1-release-notes.md Co-authored-by: Philip Durbin --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index e1a9214a982..427a07a4c2c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -123,7 +123,7 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools +For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. - There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. From 3d55ed31de8fb9e45a2cedfecf07e22c82dae12a Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:47:53 -0500 Subject: [PATCH 07/26] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 427a07a4c2c..189f21f2322 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -19,21 +19,22 @@ With the upload-out-of-band option enabled, it is also possible for file upload ### Alternative Title is made repeatable. - One will need to update database with updated citation block. `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-8.11.1/server/solr/collection1/conf/schema.xml` - Reload solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +- One will also need to update Solr schema: + Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` + Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` -Since Alternative Title is repeatable now, old json apis would not be compatible with a new version since value of alternative title has changed from simple string to an array. +Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] -### Improvements in the /versions API +### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. -### The following API endpoints have been added: +### The following API endpoints have been added: +- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables - /api/files/{id}/metadata/tabularTags New endpoint to set tabular file tags. @@ -42,11 +43,10 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - setFileCategories (/api/files/{id}/metadata/categories): Updates the categories (by name) for an existing file. If the specified categories do not exist, they will be created. - userFileAccessRequested (/api/access/datafile/{id}/userFileAccessRequested): Returns true or false depending on whether or not the calling user has requested access to a particular file. - hasBeenDeleted (/api/files/{id}/hasBeenDeleted): Know if a particular file that existed in a previous version of the dataset no longer exists in the latest version. -- deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - getZipDownloadLimit (/api/info/zipDownloadLimit): Get the configured zip file download limit. The response contains the long value of the limit in bytes. - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. -- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset json file is in proper format and contains the required elements and fields for a given dataverse collection. +- validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. @@ -113,7 +113,7 @@ to generate updated versions. - We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -192,7 +192,7 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old json apis would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1be5d4b6b2baddc5f30bf598d81bd5ed991f73ee Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 12:52:39 -0500 Subject: [PATCH 08/26] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 189f21f2322..d0fe895565c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -87,10 +87,8 @@ This parameter applies a filter criteria to the operation and supports the follo ### Misc - Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. - - Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. - - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -104,7 +102,7 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. - +- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information @@ -124,7 +122,6 @@ Please see the "Installing Solr" section of the Installation Prerequisites guide ### Development - Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews - A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. From 4ba629d643678acdd0b649128b8a76a805ee6906 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Wed, 6 Dec 2023 15:28:32 -0500 Subject: [PATCH 09/26] adding review comment changes --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index d0fe895565c..38b99e6580b 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -11,6 +11,10 @@ This release contains major upgrades to core components. Detailed upgrade instru ## Detailed Release Highlights, New Features and Use Case Scenarios +### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started +Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). + The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. + ### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). @@ -142,7 +146,7 @@ life easier during instance setups and reconfiguration. You no longer need to ge necessary JSON file. ### Adding PKCE Support - +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) From d2427bd39046f104c95e27d1869d1665b969724f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 09:49:52 -0500 Subject: [PATCH 10/26] #10151 incorporate recent additions --- doc/release-notes/6.1-release-notes.md | 22 +++++++++++++++++++++ doc/release-notes/8549-collection-quotas.md | 3 --- doc/release-notes/8760-bagit.md | 15 -------------- 3 files changed, 22 insertions(+), 18 deletions(-) delete mode 100644 doc/release-notes/8549-collection-quotas.md delete mode 100644 doc/release-notes/8760-bagit.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38b99e6580b..38a7a1064e6 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -30,6 +30,28 @@ With the upload-out-of-band option enabled, it is also possible for file upload Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +### Collection Storage Size Quota Support +-This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 + +### BagIT Export Configurations Updated +For BagIT export, it is now possible to configure the following information in bag-info.txt... + +Source-Organization: Harvard Dataverse +Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA +Organization-Email: support@dataverse.harvard.edu + +... using new JVM/MPCONFIG options: + +- dataverse.bagit.sourceorg.name +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.email + +Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. + +For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt + + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output diff --git a/doc/release-notes/8549-collection-quotas.md b/doc/release-notes/8549-collection-quotas.md deleted file mode 100644 index b3635d0c5a1..00000000000 --- a/doc/release-notes/8549-collection-quotas.md +++ /dev/null @@ -1,3 +0,0 @@ -This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 - diff --git a/doc/release-notes/8760-bagit.md b/doc/release-notes/8760-bagit.md deleted file mode 100644 index 30601857309..00000000000 --- a/doc/release-notes/8760-bagit.md +++ /dev/null @@ -1,15 +0,0 @@ -For BagIT export, it is now possible to configure the following information in bag-info.txt... - -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu - -... using new JVM/MPCONFIG options: - -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email - -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. - -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt From 05c53066ea26c809b6376051ff336f11a4bcee9d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:29:47 -0500 Subject: [PATCH 11/26] mention download tmp file API #10151 --- doc/release-notes/6.1-release-notes.md | 1 + doc/release-notes/8760-download-tmp-file.md | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 doc/release-notes/8760-download-tmp-file.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..1b4e884cded 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -73,6 +73,7 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - getMaxEmbargoDurationInMonths (/api/info/settings/:MaxEmbargoDurationInMonths): Get the maximum embargo duration in months, if available, configured through the database setting :MaxEmbargoDurationInMonths. - getDatasetJsonSchema (/api/dataverses/{id}/datasetSchema): Get a dataset schema with the fields required by a given dataverse collection. - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. +- downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. ### Extended the existing endpoints: - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. diff --git a/doc/release-notes/8760-download-tmp-file.md b/doc/release-notes/8760-download-tmp-file.md deleted file mode 100644 index 7623a91ac9a..00000000000 --- a/doc/release-notes/8760-download-tmp-file.md +++ /dev/null @@ -1,3 +0,0 @@ -A new API has been added for testing purposes that allows files to be downloaded from /tmp. - -See From 97c33218fa7224c544657e72f52c27d9cd8951bf Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Thu, 7 Dec 2023 10:30:23 -0500 Subject: [PATCH 12/26] remove duplicate "new" heading in API changelog #10151 --- doc/sphinx-guides/source/api/changelog.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index d2908533a14..910134e14f3 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -12,9 +12,6 @@ New ~~~ - **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. - **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. - -New -~~~ - **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. - **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. From 3a13ac8c56385ed2cc82bcc9db4f57fea7688a67 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:34:21 -0500 Subject: [PATCH 13/26] #10151 add upgrade instructions --- doc/release-notes/6.1-release-notes.md | 81 +++++++++++++++++++ .../9002_allow_direct_upload_setting.md | 5 -- 2 files changed, 81 insertions(+), 5 deletions(-) delete mode 100644 doc/release-notes/9002_allow_direct_upload_setting.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 38a7a1064e6..d5972338124 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,6 +51,13 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +### Direct Upload setting added +A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. + +By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). + +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. + ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions @@ -138,6 +145,7 @@ to generate updated versions. See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. ### Solr Improvements - As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. @@ -214,6 +222,79 @@ Upgrading requires a maintenance window and downtime. Please plan ahead, create These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. +0\. These instructions assume that you are upgrading from 6.0. If you are running an earlier version, the only safe way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to 5.14. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +`export PAYARA=/usr/local/payara6` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version. + +- `$PAYARA/bin/asadmin undeploy dataverse-6.0` + +2\. Stop Payara and remove the generated directory + +- `service payara stop` +- `rm -rf $PAYARA/glassfish/domains/domain1/generated` + +3\. Start Payara + +- `service payara start` + +4\. Deploy this version. + +- `$PAYARA/bin/asadmin deploy dataverse-6.1.war` + +5\. Restart Payara + +- `service payara stop` +- `service payara start` + +6\. Update Geospatial Metadata Block (to improve validation of bounding box values) + +- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` + +6a\. Update Citation Metadata Block (to make Alternative Title repeatable) + +- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` + +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). + +7a\. For installations without custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- Replace schema.xml + + - `cp /tmp/dvinstall/schema.xml /usr/local/solr/solr-9.3.0/server/solr/collection1/conf` + +- Start Solr instance (usually `service solr start`, depending on Solr/OS) + +7b\. For installations with custom or experimental metadata blocks: + +- Stop Solr instance (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/5.14/installation/prerequisites.html#solr-init-script)) + +- There are 2 ways to regenerate the schema: Either by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed): +``` + wget https://raw.githubusercontent.com/IQSS/dataverse/master/conf/solr/9.3.0/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml +``` +OR, alternatively, you can edit the following line in your schema.xml by hand as follows (to indicate that alternative title is now `multiValued="true"`): +``` + +``` + +- Restart Solr instance (usually `service solr restart` depending on solr/OS) + +8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). + + ## Backward Incompatibilities - Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, diff --git a/doc/release-notes/9002_allow_direct_upload_setting.md b/doc/release-notes/9002_allow_direct_upload_setting.md deleted file mode 100644 index 1e76ed4ad47..00000000000 --- a/doc/release-notes/9002_allow_direct_upload_setting.md +++ /dev/null @@ -1,5 +0,0 @@ -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. From a78213633e6f5bf345d1aedf4328eee5ee231ffb Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:43:40 -0500 Subject: [PATCH 14/26] #10151 remove notes previously incorporated --- .../9547-validation-for-geospatial-metadata.md | 9 --------- doc/release-notes/9859-ORE and Bag updates.md | 14 -------------- 2 files changed, 23 deletions(-) delete mode 100644 doc/release-notes/9547-validation-for-geospatial-metadata.md delete mode 100644 doc/release-notes/9859-ORE and Bag updates.md diff --git a/doc/release-notes/9547-validation-for-geospatial-metadata.md b/doc/release-notes/9547-validation-for-geospatial-metadata.md deleted file mode 100644 index a44e1a3732b..00000000000 --- a/doc/release-notes/9547-validation-for-geospatial-metadata.md +++ /dev/null @@ -1,9 +0,0 @@ -Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -For the "upgrade" steps section: - -Update Geospatial Metadata Block - -- `wget https://github.com/IQSS/dataverse/releases/download/v6.1/geospatial.tsv` -- `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file @geospatial.tsv` - diff --git a/doc/release-notes/9859-ORE and Bag updates.md b/doc/release-notes/9859-ORE and Bag updates.md deleted file mode 100644 index dd3ae3bbbe1..00000000000 --- a/doc/release-notes/9859-ORE and Bag updates.md +++ /dev/null @@ -1,14 +0,0 @@ -Dataverse's OAI_ORE Metadata Export format and archival BagIT exports -(which include the OAI-ORE metadata export file) have been updated to include -information about the dataset version state, e.g. RELEASED or DEACCESSIONED -and to indicate which version of Dataverse was used to create the archival Bag. -As part of the latter, the current OAI_ORE Metadata format has been given a 1.0.0 -version designation and it is expected that any future changes to the OAI_ORE export -format will result in a version change and that tools such as DVUploader that can -recreate datasets from archival Bags will start indicating which version(s) of the -OAI_ORE format they can read. - -Dataverse installations that have been using archival Bags may wish to update any -existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) -to generate updated versions. \ No newline at end of file From b517f6e0fca1802faa4455522a72e711963714ba Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Thu, 7 Dec 2023 10:53:07 -0500 Subject: [PATCH 15/26] #10151 S3 test notes --- doc/release-notes/6.1-release-notes.md | 2 ++ doc/release-notes/6783-s3-tests.md | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 doc/release-notes/6783-s3-tests.md diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 9a35a31a734..375717ab9c9 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -163,6 +163,8 @@ For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.ht - `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. ## OpenID Connect Authentication Provider Improvements diff --git a/doc/release-notes/6783-s3-tests.md b/doc/release-notes/6783-s3-tests.md deleted file mode 100644 index 1b9bb400cc6..00000000000 --- a/doc/release-notes/6783-s3-tests.md +++ /dev/null @@ -1,3 +0,0 @@ -Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. - -In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. From 07a8659b60acdb766fb5a4742cf4ac4537e34615 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 14:24:24 -0500 Subject: [PATCH 16/26] #10151 remove duplicate release note out of band setting previously added --- doc/release-notes/6.1-release-notes.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 375717ab9c9..b6bb7d8b806 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -51,14 +51,6 @@ Previously, customization was possible by editing `Bundle.properties` but this i For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt -### Direct Upload setting added -A Dataverse installation can be now be configured to allow out-of-band upload by setting the `dataverse.files..upload-out-of-band` JVM option to `true`. - -By default, Dataverse supports uploading files via the [add a file to a dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). - -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. - - ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output From 1d668970df1562c3cbc85d60be2abc55d8a96572 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 8 Dec 2023 15:56:27 -0500 Subject: [PATCH 17/26] #10151 standard guide links --- doc/release-notes/6.1-release-notes.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b6bb7d8b806..24194a02026 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -49,14 +49,14 @@ Organization-Email: support@dataverse.harvard.edu Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. -For details, see https://dataverse-guide--10122.org.readthedocs.build/en/10122/installation/config.html#bag-info-txt +For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt ### Improvements in the dataset versions API - optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions - a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output - when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/9763-lookup-optimizations/api/native-api.html#dataset-versions-api) section of the Guide. +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. ### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). @@ -128,13 +128,13 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://dataverse-guide--9986.org.readthedocs.build/en/9986/user/dataset-management.html#file-previews +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://dataverse-guide--10127.org.readthedocs.build/en/10127/api/changelog.html +- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) From 85206de08acb6a8373199fb0d4eec2768cb6763d Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Fri, 8 Dec 2023 15:59:21 -0500 Subject: [PATCH 18/26] simply API changelog to be about breaking changes only #10151 --- doc/release-notes/6.1-release-notes.md | 2 +- doc/sphinx-guides/source/api/changelog.rst | 19 +++++-------------- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 24194a02026..a3b04749d68 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -134,7 +134,7 @@ to generate updated versions. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. -- We have started maintaining an API changelog: https://guides.dataverse.org/en/6.1/api/changelog.html +- We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. - Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 910134e14f3..20225b99b5c 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -1,5 +1,7 @@ -API Changelog -============= +API Changelog (Breaking Changes) +================================ + +This API changelog is experimental and we would love feedback on its usefulness. Its primary purpose is to inform API developers of any breaking changes. (We try not ship any backward incompatible changes, but it happens.) To see a list of new APIs and backward-compatible changes to existing API, please see each version's release notes at https://github.com/IQSS/dataverse/releases .. contents:: |toctitle| :local: @@ -8,20 +10,9 @@ API Changelog v6.1 ---- -New -~~~ -- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. -- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. -- **/api/admin/clearThumbnailFailureFlag**: See :ref:`thumbnail_reset`. -- **/api/admin/downloadTmpFile**: See :ref:`download-file-from-tmp`. - -Changes -~~~~~~~ -- **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. +- The metadata field "Alternative Title" now supports multiple values so you must pass an array rather than a string when populating that field via API. See https://github.com/IQSS/dataverse/pull/9440 v6.0 ---- -Changes -~~~~~~~ - **/api/access/datafile**: When a null or invalid API token is provided to download a public (non-restricted) file with this API call, it will result on a ``401`` error response. Previously, the download was allowed (``200`` response). Please note that we noticed this change sometime between 5.9 and 6.0. If you can help us pinpoint the exact version (or commit!), please get in touch. See :doc:`dataaccess`. From 44bd5b7fb6d697d356d857a73847e1637aaa5763 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:19:46 -0500 Subject: [PATCH 19/26] add perf test results --- doc/release-notes/6.1-release-notes.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index a3b04749d68..b03a7a62baa 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -258,7 +258,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -298,6 +298,10 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. +## Performance Testing Results +The results of performance testing can be found here: +https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 + ## Getting Help For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 173b8a7a067b392de8e1c900c3e1d9eb806c71d6 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:25:44 -0500 Subject: [PATCH 20/26] fix backward comp Alternative Title --- doc/release-notes/6.1-release-notes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index b03a7a62baa..5bc0df4640c 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -291,7 +291,7 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as ## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version +- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, ## Complete List of Changes From 1959f2ff22d9bbc4290a586fc49f1f49eccdbd04 Mon Sep 17 00:00:00 2001 From: Steven Winship Date: Mon, 11 Dec 2023 11:29:24 -0500 Subject: [PATCH 21/26] removed unneeded header --- doc/release-notes/6.1-release-notes.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 5bc0df4640c..6d3d1912f81 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -6,11 +6,8 @@ This release brings new features, enhancements, and bug fixes to the Dataverse s Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. ## Release Highlights (Major Upgrades, Breaking Changes) - This release contains major upgrades to core components. Detailed upgrade instructions can be found below. -## Detailed Release Highlights, New Features and Use Case Scenarios - ### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. From a4e25e17155896ae5c335ea8169229f248eaf22b Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 12:15:56 -0500 Subject: [PATCH 22/26] reorg 6.1 release notes, add globus #10151 --- doc/release-notes/6.1-release-notes.md | 262 +++++++++++++------------ 1 file changed, 137 insertions(+), 125 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 6d3d1912f81..475d4fc0887 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -5,57 +5,96 @@ Please note: To read these instructions in full, please go to https://github.com This release brings new features, enhancements, and bug fixes to the Dataverse software. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. -## Release Highlights (Major Upgrades, Breaking Changes) -This release contains major upgrades to core components. Detailed upgrade instructions can be found below. +## Release highlights -### Optional support for guestbooks to appear when files access is requested rather than after access has been granted and a download is started -Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). - The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +### Guestbook at request + +Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` option) to display any configured guestbook to users when they request restricted files (new functionality) or when they download files (previous behavior). + +The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. + +### Collection-level storage quotas + +This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. +Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. + +### Globus support + +Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: + +- Globus functionality remains experimental/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. +- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model. +- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus). +- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) + +Backward incompatibilities: +- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism +- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. + +New JVM options: +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. + +Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used + +Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) + +### Alternative Title now allows multiple values + +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. + +### External tools: configure tools now available at the dataset level + +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. + +### S3 out-of-band upload -### Dataverse installation can be now be configured to allow out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. -### Alternative Title is made repeatable. -- One will need to update database with updated citation block. - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -- One will also need to update Solr schema: - Change in "alternativeTitle" field multiValued="true" in `/usr/local/solr/solr-9.3.0/server/solr/collection1/conf/schema.xml` - Reload Solr schema: `curl "http://localhost:8983/solr/admin/cores?action=RELOAD&core=collection1"` +### JSON Schema for datasets -Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version since value of alternative title has changed from simple string to an array. -For example, instead "value": "Alternative Title", the value can be "value": ["Alternative Title1", "Alternative Title2"] +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -### Collection Storage Size Quota Support --This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. -Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of 6.1 +### OpenID Connect authentication provider improvements -### BagIT Export Configurations Updated -For BagIT export, it is now possible to configure the following information in bag-info.txt... +#### Using MicroProfile Config for provisioning -Source-Organization: Harvard Dataverse -Organization-Address: 1737 Cambridge Street, Cambridge, MA, USA -Organization-Email: support@dataverse.harvard.edu +With this release it is possible to provision a single OIDC-based authentication provider +by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -... using new JVM/MPCONFIG options: +If you are using an external OIDC provider component as an identity management system and/or broker +to other authentication providers such as Google, eduGain SAML and so on, this might make your +life easier during instance setups and reconfiguration. You no longer need to generate the +necessary JSON file. -- dataverse.bagit.sourceorg.name -- dataverse.bagit.sourceorg.address -- dataverse.bagit.sourceorg.email +#### Adding PKCE Support -Previously, customization was possible by editing `Bundle.properties` but this is no longer supported. +[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable +support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) -For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +### Solr improvements -### Improvements in the dataset versions API -- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions -- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output -- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. +As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. +Please see the "Installing Solr" section of the Installation Prerequisites guide. + +### New release of Dataverse Previewers (including a Markdown previewer) + +Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. Please note: + +- SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. +- Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. +- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews + +### New or improved APIs + +The development of a [new UI for Dataverse](https://github.com/IQSS/dataverse-frontend) is driving the addition or improvement of many APIs. + +#### New API endpoints -### The following API endpoints have been added: - deaccessionDataset (/api/datasets/{id}/versions/{versionId}/deaccession): version deaccessioning through API (Given a dataset and a version). - /api/files/{id}/downloadCount - /api/files/{id}/dataTables @@ -71,7 +110,33 @@ This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/ - validateDatasetJsonSchema (/api/dataverses/{id}/validateDatasetJson): Validate that a dataset JSON file is in proper format and contains the required elements and fields for a given dataverse collection. - downloadTmpFile (/api/admin/downloadTmpFile): For testing purposes, allows files to be downloaded from /tmp. -### Extended the existing endpoints: +#### Pagination of files in dataset versions + +- optional pagination has been added to `/api/datasets/{id}/versions` that may be useful in datasets with a large number of versions +- a new flag `includeFiles` is added to both `/api/datasets/{id}/versions` and `/api/datasets/{id}/versions/{vid}` (true by default), providing an option to drop the file information from the output +- when files are requested to be included, some database lookup optimizations have been added to improve the performance on datasets with large numbers of files. + +This is reflected in the [Dataset Versions API](https://guides.dataverse.org/en/6.1/api/native-api.html#dataset-versions-api) section of the Guide. + + +#### DataFile API payload has been extended to include the following fields + +- tabularData: Boolean field to know if the DataFile is of tabular type +- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) +- friendlyType: String + +#### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering + +- Access status: through the `accessStatus` query parameter, which supports the following values: + - Public + - Restricted + - EmbargoedThenRestricted + - EmbargoedThenPublic +- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. +- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". + +#### Additional improvements to existing API endpoints + - getVersionFiles (/api/datasets/{id}/versions/{versionId}/files): Extended to support optional filtering by search text through the `searchText` query parameter. The search will be applied to the labels and descriptions of the dataset files. Added `tabularTagName` to return files to which the particular tabular tag has been added. Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain files. - getVersionFileCounts (/api/datasets/{id}/versions/{versionId}/files/counts): Added optional boolean query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain file counts. Added support for filtering by optional criteria query parameter: - contentType @@ -93,25 +158,21 @@ This parameter applies a filter criteria to the operation and supports the follo - Can delete the dataset draft - getDatasetVersionCitation (/api/datasets/{id}/versions/{versionId}/citation) endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. -### DataFile API payload has been extended to include the following fields: -- tabularData: Boolean field to know if the DataFile is of tabular type -- fileAccessRequest: Boolean field to know if the file access requests are enabled on the Dataset (DataFile owner) -- friendlyType: String +### Improvements for developers -### The getVersionFiles endpoint (/api/datasets/{id}/versions/{versionId}/files) has been extended to support pagination, ordering, and optional filtering -- Access status: through the `accessStatus` query parameter, which supports the following values: - - Public - - Restricted - - EmbargoedThenRestricted - - EmbargoedThenPublic -- Category name: through the `categoryName` query parameter. To return files to which the particular category has been added. -- Content type: through the `contentType` query parameter. To return files matching the requested content type. For example: "image/png". +- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (with the Payara Platform Tools plugin). For details, see https://guides.dataverse.org/en/6.1/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. +- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. +- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. +- With this release, we add a new type of testing to Dataverse: integration tests which are not end-to-end tests (like our API tests). Starting with OIDC authentication support, we test regularly on CI for working condition of both OIDC login options in UI and API. +- The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. +- The support for setting JVM options during testing has been improved for developers. You now may add the `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. +- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. +## Major use cases and infrastructure enhancements -### Misc -- Configure tools are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. -- Dataverse can now be configured (via the dataverse.files.guestbook-at-request option) to display any configured guestbook to users when they request restricted file(s) or when they download files (the historic default). -The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default - showing guestbooks when files are downloaded - remains as it was in prior Dataverse versions. +Changes and fixes in this release not already mentioned above include: + +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -125,68 +186,18 @@ Dataverse installations that have been using archival Bags may wish to update an existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse [archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) to generate updated versions. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt + - Source-Organization from `dataverse.bagit.sourceorg.name`. + - Organization-Address from `dataverse.bagit.sourceorg.address`. + - Organization-Email from `dataverse.bagit.sourceorg.address`. - This release fixes several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification. These changes introduce backward-incompatibility, but since Signposting support was added recently (in Dataverse 5.14 in PR #8981), we feel it's best to do this clean up and not support the old implementation that was not fully compliant with the spec. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. - - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. -- Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue 9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. - -### Solr Improvements -- As of this release application-side support is added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. - -Please see the "Installing Solr" section of the Installation Prerequisites guide. - - -### Development -- Developers can enjoy a dramatically faster feedback loop when iterating on code if they are using Netbeans or IntelliJ IDEA Ultimate (free educational licenses are available) and the Payara Platform Tools plugin. -For details, see http://preview.guides.gdcc.io/en/develop/container/dev-usage.html#intellij-idea-ultimate-and-payara-platform-tools and [the thread](https://groups.google.com/g/dataverse-community/c/zNBDzSMF2Q0/m/Z-xS6fA2BgAJ) on the mailing list. -- A new version of the standard Dataverse Previewers from https://github/com/gdcc/dataverse-previewers is available. The new version supports the use of signedUrls rather than API keys when previewing restricted files (including files in draft dataset versions). Upgrading is highly recommended. - - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- `@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. -- As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. -- Developers can now test S3 locally by using the Dockerized development environment, which now includes both LocalStack and MinIO. API (end to end) tests are in S3AccessIT. -- In addition, a new integration test class (not an API test, the new Testcontainers-based test launched with `mvn verify`) has been added at S3AccessIOLocalstackIT. It uses Testcontainers to spin up Localstack for S3 testing and does not require Dataverse to be running. - -## OpenID Connect Authentication Provider Improvements - -### Using MicroProfile Config For Provisioning - -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. - -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. - -### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) - -## Improved Testing - -With this release, we add a new type of testing to Dataverse: integration tests which are no end-to-end tests -like our API tests. Starting with OIDC authentication support, we test regularly on CI for working condition -of both OIDC login options in UI and API. - -The testing and development Keycloak realm has been updated with more users and compatibility with Keycloak 21. - -The support for setting JVM options during testing has been improved for developers. You now may add the -`@JvmSetting` annotation to classes (also inner classes) and reference factory methods for values. This improvement is -also paving the way to enable manipulating JVM options during end-to-end tests on remote ends. - -As part of these testing improvements, the code coverage report file for unit tests has moved from `target/jacoco.exec` to `target/coverage-reports/jacoco-unit.exec`. - -## New Configuration Options +## New configuration options - dataverse.auth.oidc.enabled - dataverse.auth.oidc.client-id @@ -199,8 +210,24 @@ As part of these testing improvements, the code coverage report file for unit te - dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.max-cache-age - dataverse.files.{driverId}.upload-out-of-band +- dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +## Backward incompatibilities + +- Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] +- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- See "Globus support" above for backward incompatibilies specific to Globus. + +## Complete list of changes + +For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. + +## Getting help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. + ## Installation If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! @@ -209,7 +236,7 @@ Once you are in production, we would be delighted to update our [map of Datavers You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). -## Upgrade Instructions +## Upgrade instructions Upgrading requires a maintenance window and downtime. Please plan ahead, create backups of your database, etc. These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.0. @@ -241,6 +268,8 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `$PAYARA/bin/asadmin deploy dataverse-6.1.war` +As noted above, deployment of the war file might take several minutes due a database migration script required for the new storage quotas feature. + 5\. Restart Payara - `service payara stop` @@ -255,7 +284,7 @@ In the following commands we assume that Payara 6 is installed in `/usr/local/pa - `curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file scripts/api/data/metadatablocks/citation.tsv` -7\. Update Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). +7\. Upate Solr schema.xml to allow multiple Alternative Titles to be used. See specific instructions below for those installations without custom metadata blocks (7a) and those with custom metadata blocks (7b). 7a\. For installations without custom or experimental metadata blocks: @@ -285,20 +314,3 @@ OR, alternatively, you can edit the following line in your schema.xml by hand as - Restart Solr instance (usually `service solr restart` depending on solr/OS) 8\. Run ReExportAll to update dataset metadata exports. Follow the directions in the [Admin Guide](http://guides.dataverse.org/en/5.14/admin/metadataexport.html#batch-exports-through-the-api). - - -## Backward Incompatibilities -- Since Alternative Title is repeatable now, old JSON APIs would not be compatible with a new version. Alternative Title must now be passed as an array of strings rather than a single string ([alt title]) -- Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility, - -## Complete List of Changes - -For the complete list of code changes in this release, see the [6.1 Milestone](https://github.com/IQSS/dataverse/milestone/110?closed=1) in GitHub. - -## Performance Testing Results -The results of performance testing can be found here: -https://docs.google.com/spreadsheets/d/1lwPlifvgu3-X_6xLwq6Zr6sCOervr1mV_InHIWjh5KA/edit#gid=0 - -## Getting Help - -For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/forum/#!forum/dataverse-community) or email support@dataverse.org. From 011b9291e6f694631d237bd047c3a170e6e93a2e Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 13:58:08 -0500 Subject: [PATCH 23/26] remove globus snippet (already added) #10151 --- doc/release-notes/10162-globus-support.md | 19 ------------------- 1 file changed, 19 deletions(-) delete mode 100644 doc/release-notes/10162-globus-support.md diff --git a/doc/release-notes/10162-globus-support.md b/doc/release-notes/10162-globus-support.md deleted file mode 100644 index 60670b5b101..00000000000 --- a/doc/release-notes/10162-globus-support.md +++ /dev/null @@ -1,19 +0,0 @@ -Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible, -and for referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/latest/developers/big-data-support.html#big-data-support) -- Globus functionality remains 'experimental'/advanced in that it requires significant setup, differs in multiple ways from other file storage mechanisms, and may continue to evolve with the potential for backward incompatibilities. -- The functionality is configured per store and replaces the previous single-S3-Connector-per-Dataverse-instance model -- Adding files to a dataset, and accessing files is supported via the Dataverse user interface through a separate [dataverse-globus app](https://github.com/scholarsportal/dataverse-globus) -- The functionality is also accessible via APIs (combining calls to the Dataverse and Globus APIs) - -Backward Incompatibilities: -- The configuration for use of a Globus S3 Connector has changed and is aligned with the standard store configuration mechanism -- The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. - -New JVM Options: -- A new 'globus' store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). -- dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. - - - -Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used From 3e32f42959dce41e9c21c9e2285fdf719b048dc0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 14:57:52 -0500 Subject: [PATCH 24/26] link to guides in more places, other tweaks #10151 --- doc/release-notes/6.1-release-notes.md | 43 +++++++++++++------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 475d4fc0887..fab11ce4959 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -13,12 +13,17 @@ Dataverse can now be configured (via the `dataverse.files.guestbook-at-request` The global default defined by this setting can be overridden at the collection level on the collection page and at the individual dataset level by a superuser using the API. The default, showing guestbooks when files are downloaded, remains as it was in prior Dataverse versions. +For details, see [dataverse.files.guestbook-at-request](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-guestbook-at-request) and PR #9599. + ### Collection-level storage quotas This release adds support for defining storage size quotas for collections. Please see the API guide for details. This is an experimental feature that has not yet been used in production on any real life Dataverse instance, but we are planning to try it out at Harvard/IQSS. + Please note that this release includes a database update (via a Flyway script) that will calculate the storage sizes of all the existing datasets and collections on the first deployment. On a large production database with tens of thousands of datasets this may add a couple of extra minutes to the first, initial deployment of Dataverse 6.1. -### Globus support +For details, see [Storage Quotas for Collections](https://guides.dataverse.org/en/6.1/admin/collectionquotas.html) in the Admin Guide. + +### Globus support (experimental), continued Globus support in Dataverse has been expanded to include support for using file-based Globus endpoints, including the case where files are stored on tape and are not immediately accessible and for the case of referencing files stored on remote Globus endpoints. Support for using the Globus S3 Connector with an S3 store has been retained but requires changes to the Dataverse configuration. Please note: @@ -32,54 +37,50 @@ Backward incompatibilities: - The new functionality is incompatible with older versions of the globus-dataverse app and the Globus-related functionality in the UI will only function correctly if a Dataverse 6.1 compatible version of the dataverse-globus app is configured. New JVM options: -- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage Options section of the Dataverse Guides](https://guides.dataverse.org/en/latest/installation/config.html#file-storage-using-a-local-filesystem-and-or-swift-and-or-object-stores-and-or-trusted-remote-stores). +- A new "globus" store type and associated store-related options have been added. These are described in the [File Storage](https://guides.dataverse.org/en/6.1/installation/config.html#file-storage) section of the Installation Guide. - dataverse.files.globus-cache-maxage - specifies the number of minutes Dataverse will wait between an initial request for a file transfer occurs and when that transfer must begin. Obsolete Settings: the :GlobusBasicToken, :GlobusEndpoint, and :GlobusStores settings are no longer used -Further details can be found in the [Big Data Support section of the Dataverse Guides](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) +Further details can be found in the [Big Data Support](https://guides.dataverse.org/en/6.1/developers/big-data-support.html#big-data-support) section of the Developer Guide. ### Alternative Title now allows multiple values -Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below for details. +Alternative Title now allows multiples. Note that JSON used to create a dataset with an Alternate Title must be changed. See "Backward incompatibilities" below and PR #9440 for details. ### External tools: configure tools now available at the dataset level -Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See also #9589. +Read/write "configure" tools (a type of external tool) are now available at the dataset level. They appear under the "Edit Dataset" menu. See [External Tools](https://guides.dataverse.org/en/6.1/admin/external-tools.html#dataset-level-configure-tools) in the Admin Guide and PR #9925. ### S3 out-of-band upload In some situations, direct upload might not work from the UI, e.g., when s3 storage is not accessible from the internet. This pull request adds an option to [allow direct uploads via API only](https://github.com/IQSS/dataverse/pull/9003). This way, a third party application can use direct upload from within the internal network, while there is no direct download available to the users via UI. By default, Dataverse supports uploading files via the [add a file to a dataset](https://guides.dataverse.org/en/6.1/api/native-api.html#add-a-file-to-a-dataset) API. With S3 stores, a direct upload process can be enabled to allow sending the file directly to the S3 store (without any intermediate copies on the Dataverse server). -With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://dataverse-guide--9003.org.readthedocs.build/en/9003/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. +With the upload-out-of-band option enabled, it is also possible for file upload to be managed manually or via third-party tools, with the [Adding the Uploaded file to the Dataset](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html#adding-the-uploaded-file-to-the-dataset) API call (described in the [Direct DataFile Upload/Replace API](https://guides.dataverse.org/en/6.1/developers/s3-direct-upload-api.html) page) used to add metadata and inform Dataverse that a new file has been added to the relevant store. ### JSON Schema for datasets -Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release functionality is limited to JSON format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.1/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10109. -### OpenID Connect authentication provider improvements +### OpenID Connect (OIDC) improvements #### Using MicroProfile Config for provisioning -With this release it is possible to provision a single OIDC-based authentication provider -by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. +With this release it is possible to provision a single OIDC-based authentication provider by using MicroProfile Config instead of or in addition to the classic Admin API provisioning. -If you are using an external OIDC provider component as an identity management system and/or broker -to other authentication providers such as Google, eduGain SAML and so on, this might make your -life easier during instance setups and reconfiguration. You no longer need to generate the -necessary JSON file. +If you are using an external OIDC provider component as an identity management system and/or broker to other authentication providers such as Google, eduGain SAML and so on, this might make your life easier during instance setups and reconfiguration. You no longer need to generate the necessary JSON file. #### Adding PKCE Support -[This PR adds PKCE support for OIDC providers](https://github.com/IQSS/dataverse/pull/9273) -Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable -support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) +Some OIDC providers require using PKCE as additional security layer. As of this version, you can enable support for this on any OIDC provider you configure. (Note that OAuth2 providers have not been upgraded.) + +For both features, see the [OIDC](https://guides.dataverse.org/en/6.0/installation/oidc.html) section of the Installation Guide and PR #9273. ### Solr improvements As of this release, application-side support has been added for the "circuit breaker" mechanism in Solr that makes it drop requests more gracefully when the search engine is experiencing load issues. -Please see the "Installing Solr" section of the Installation Prerequisites guide. +Please see the [Installing Solr](https://guides.dataverse.org/en/6.1/installation/prerequisites.html#installing-solr) section of the Installation Guide. ### New release of Dataverse Previewers (including a Markdown previewer) @@ -87,7 +88,7 @@ Version 1.4 of the standard Dataverse Previewers from https://github/com/gdcc/da - SignedUrls can now be used with PrivateUrl access tokens, which allows PrivateUrl users to view previewers that are configured to use SignedUrls. See #10093. - Launching a dataset-level configuration tool will automatically generate an API token when needed. This is consistent with how other types of tools work. See #10045. -- There is now a Markdown (.md) previewer: https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews +- There is now a [Markdown (.md)](https://guides.dataverse.org/en/6.1/user/dataset-management.html#file-previews) previewer. ### New or improved APIs @@ -172,7 +173,7 @@ This parameter applies a filter criteria to the operation and supports the follo Changes and fixes in this release not already mentioned above include: -- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. (issue #9547). This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. +- Validation has been added for the Geographic Bounding Box values in the Geospatial metadata block. This will prevent improperly defined bounding boxes from being created via the edit page or metadata imports. This also fixes the issue where existing datasets with invalid geoboxes were quietly failing to get reindexed. See PR #10142. - Dataverse's OAI_ORE Metadata Export format and archival BagIT exports (which include the OAI-ORE metadata export file) have been updated to include information about the dataset version state, e.g. RELEASED or DEACCESSIONED @@ -184,7 +185,7 @@ recreate datasets from archival Bags will start indicating which version(s) of t OAI_ORE format they can read. Dataverse installations that have been using archival Bags may wish to update any existing archival Bags they have, e.g. by deleting existing Bags and using the Dataverse -[archival Bag export API](https://guides.dataverse.org/en/latest/installation/config.html#bagit-export-api-calls) +[archival Bag export API](https://guides.dataverse.org/en/6.1/installation/config.html#bagit-export-api-calls) to generate updated versions. - For BagIT export, it is now possible to configure the following information in bag-info.txt. (Previously, customization was possible by editing `Bundle.properties` but this is no longer supported.) For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - Source-Organization from `dataverse.bagit.sourceorg.name`. From 92a298da25c03822c848e5a43253f039193665f9 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 15:42:55 -0500 Subject: [PATCH 25/26] add missing new config options and sort #10151 --- doc/release-notes/6.1-release-notes.md | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index fab11ce4959..1e09a207104 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -200,25 +200,28 @@ See also #10060. ## New configuration options -- dataverse.auth.oidc.enabled +- dataverse.auth.oidc.auth-server-url - dataverse.auth.oidc.client-id - dataverse.auth.oidc.client-secret -- dataverse.auth.oidc.auth-server-url +- dataverse.auth.oidc.enabled - dataverse.auth.oidc.pkce.enabled +- dataverse.auth.oidc.pkce.max-cache-age +- dataverse.auth.oidc.pkce.max-cache-size - dataverse.auth.oidc.pkce.method -- dataverse.auth.oidc.title - dataverse.auth.oidc.subtitle -- dataverse.auth.oidc.pkce.max-cache-size -- dataverse.auth.oidc.pkce.max-cache-age -- dataverse.files.{driverId}.upload-out-of-band +- dataverse.auth.oidc.title +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.address +- dataverse.bagit.sourceorg.name - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request +- dataverse.files.{driverId}.upload-out-of-band ## Backward incompatibilities - Since Alternative Title is now repeatable, the JSON you send to create or edit a dataset must be an array rather than a simple string. For example, instead of "value": "Alternative Title", you must send "value": ["Alternative Title1", "Alternative Title2"] - Several issues (#9952, #9953, #9957) where the Signposting output did not match the Signposting specification introduce backward-incompatibility. See above for details. -- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new JVM options `dataverse.bagit.sourceorg.name`, `dataverse.bagit.sourceorg.address`, and `dataverse.bagit.sourceorg.email`. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt +- For BagIT export, if you were configuring values in bag-info.txt using `Bundle.properties`, you must switch to the new `dataverse.bagit` JVM options mentioned above. For details, see https://guides.dataverse.org/en/6.1/installation/config.html#bag-info-txt - See "Globus support" above for backward incompatibilies specific to Globus. ## Complete list of changes From 3d6343eca2846edca97e4d9699f3305fb7c19c62 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 11 Dec 2023 16:09:46 -0500 Subject: [PATCH 26/26] mention configurable docroot #10151 --- doc/release-notes/6.1-release-notes.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/release-notes/6.1-release-notes.md b/doc/release-notes/6.1-release-notes.md index 1e09a207104..1279d09a023 100644 --- a/doc/release-notes/6.1-release-notes.md +++ b/doc/release-notes/6.1-release-notes.md @@ -195,6 +195,7 @@ to generate updated versions. - To fix #9952, we surround the license info with `<` and `>`. - To fix #9953, we no longer wrap the response in a `{"status":"OK","data":{` JSON object. This has also been noted in the guides at https://dataverse-guide--9955.org.readthedocs.build/en/9955/api/native-api.html#retrieve-signposting-information - To fix #9957, we corrected the mime/content type, changing it from `json+ld` to `ld+json`. For backward compatibility, we are still supporting the old one, for now. +- It's now possible to configure the docroot, which holds collection logos and more. See [dataverse.files.docroot](https://guides.dataverse.org/en/6.1/installation/config.html#dataverse-files-docroot) in the Installation Guide and PR #9819. - We have started maintaining an API changelog of breaking changes: https://guides.dataverse.org/en/6.1/api/changelog.html See also #10060. @@ -213,6 +214,7 @@ See also #10060. - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.address - dataverse.bagit.sourceorg.name +- dataverse.files.docroot - dataverse.files.globus-cache-maxage - dataverse.files.guestbook-at-request - dataverse.files.{driverId}.upload-out-of-band